#!/usr/local/bin/gawk -f #!/usr/bin/awk -f # Change the gawk path if you do not have it in /usr/local/bin. # You should use gawk 2.15.5 or later. # Put the awk line on top if you do not have gawk. # You will then have to precede options with "+" instead of "-". # This program writes to /dev/stderr, so you need to either use gawk or # have the dup driver installed. # @(#) divisions.gawk 2.4 97/06/19 # 92/08/16 john h. dubois iii (spcecdt@armory.com) # 93/03/16 Work with either /dev/hd* or /dev/dsk/* division names # 93/07/11 Added warning about undivvied partition. # 93/10/20 Added printing of partition params in blocks as well as tracks # 93/10/29 Added help & physical layout sorting # 94/03/08 Use gawk so - options can be given. # 94/08/25 added u option. # 94/11/09 Exit 0 for gawk after dparam & fdisk; check their output. # 95/08/21 Use -b to check for existance of /dev/hdxx, not -f! # Print partition device names; deal with nonexistant partition names. # Added c option. # 95/08/26 Added C option; turn c option on by default; added processing of # rcfile; print driver, unit, & SCSI params # 95/09/14 v5 port: added HTFS and DTFS to list of division types that are # considered filesystems; redirect df input from /dev/null to work # around protlib/gawk bug. # 96/09/09 Print driver major number in addition to driver name. # Get driver name from the name of the open routine given in bdevsw[]. # Print SCSI bus number. # 96/10/01 Added n option. # Read /etc/ps/booted.system to get namelist file if -n is not given. # 97/06/19 Ignore drive if dparam on it fails. BEGIN { ProgName = "divisions" Usage = "Usage: " ProgName " [-cChsux] [-n]" rcFile = "/etc/default/divisions" ARGC = Opts(ProgName,Usage,"cCsuxn:h",0,rcFile, "FSINFO,NOFSINFO,PSORT,BOUNDS_CHECK,DEBUG,NAMELIST") if ("h" in Options) { printf \ "%s: display information about disks, partitions and divisions.\n"\ "%s\n"\ "Options:\n"\ "Some of the following options can also be set by assigning values to\n"\ "variables in a configuration file named %d. Variables are assigned to\n"\ "with the syntax: varname=value or in the case of flags, by simply\n"\ "putting the indicated variable name in the file without a value. Flag\n"\ "options can be turned off on the command line by following them\n"\ "immediately with \"-\", e.g. -n- to turn off the n option in such a way\n"\ "that it cannot be turned on in the config file. Variable names appear in\n"\ "parentheses in the option descriptions.\n"\ "Options:\n"\ "-c: For divisions that contain filesystems, show filesystem size and space\n"\ " free (default). (FSINFO)\n"\ "-C: Turn off the -c option. (NOFSINFO)\n"\ "-h: Print this help.\n"\ "-n: Use as the symbol file used to access the system\n"\ " image. This is used only to get the driver and unit identification.\n"\ " (NAMELIST)\n"\ "-s: Sort output by physical disk layout instead of partition and division\n"\ " number. (PSORT)\n"\ "-u: Report space in UNIX & XENIX partitions not used by any division, and\n"\ " overlapping divisions. This turns on the s option. (BOUNDS_CHECK)\n"\ "-x: Turn on debugging (show command output, etc.). (DEBUG)\n", ProgName,Usage,rcFile exit 0 } Debug = "x" in Options ShowContents = !("C" in Options) FindUnusedPart = "u" in Options Sort = FindUnusedPart || "s" in Options if ("n" in Options) Namelist = Options["n"] if (!FindDrives(Drives,DriveDevs)) { print "Error getting drive list. Exiting." > "/dev/stderr" exit 1 } GetParams(DriveDevs,Cylinders,Heads,SPT) PrintParams(Drives,DriveDevs,Cylinders,Heads,SPT,Namelist) GetPartitions(DriveDevs,Start,End,Size,Type,Status,PDev) PrintPartitions(SPT,Drives,PDev,Status,Type,Start,End,Sort,PartMap) GetDivisions(Type,PDev,Name,DivType,FirstBlock,LastBlock) if (ShowContents) GetContents(Name,DivType,fsSize,fsUsed) PrintDivisions(Drives,Start,Name,DivType,FirstBlock,LastBlock,Sort,PartMap, FindUnusedPart,ShowContents,fsSize,fsUsed) } # Drives[1..n] gives drive number. # All other arrays are indexed by drive number. function PrintParams(Drives,DriveDevs,Cylinders,Heads,SPT, Format,i,DriveNum,Dev,Cyl,Hds,Sec,Majors,Minors,Drivers,Unit,HAtype,HAnumber, ID,LUN,Ind,Bus,devTypes) { Format = "%2s %4s %5s %3s %6s %8s %-7s %-9s %5s" SCSIformat = " %-4s%6s %2s %3s %3s\n" printf Format,"HD","Cyl","Heads","SPT","Tracks","Size(MB)","Device", "Driver","Unit" printf SCSIformat,"HA","HA-num","ID","LUN","Bus" for (i = 1; i in Drives; i++) Majors[DriveDevs[Drives[i]]] GetDevInfo(Drivers,Majors,Minors,devTypes,Namelist) Read_mscsi(HAtype,HAnumber,ID,LUN,Bus) for (i = 1; i in Drives; i++) { DriveNum = Drives[i] if (!(DriveNum in Cylinders)) continue Dev = DriveDevs[DriveNum] Cyl = Cylinders[DriveNum] Hds = Heads[DriveNum] Sec = SPT[DriveNum] Unit = int(Minors[Dev]/64) printf Format,DriveNum,Cyl,Hds,Sec,Cyl*Hds,int(Cyl*Hds*Sec/2048), substr(Dev,7),sprintf("%3s",(Dev in Majors) ? Majors[Dev] : "?") \ "/" ((Dev in Drivers) ? Drivers[Dev] : "?"),Unit if (Drivers[Dev] == "Sdsk") { Ind = "Sdsk" SUBSEP Unit printf SCSIformat,HAtype[Ind],HAnumber[Ind],ID[Ind],LUN[Ind], Bus[Ind] } else printf SCSIformat,"-","-","-","-","-" } print "" } # Input variables: # Names[DriveNum,Part,Div]: Division names (from device nodes). # DivType[DriveNum,Part,Div]: Filesystem types for each division. # Output variables: # fsSize[DriveNum,Part,Div] = filesystem size, in 1K blocks. # fsUsed[DriveNum,Part,Div] = filesystem space used, in 1K blocks. function GetContents(Names,DivType,fsSize,fsUsed, fsTypes,Cmd,Lines,Dev,Name2Ind,Elem,Ind,i) { MakeSet(fsTypes,"AFS,EAFS,S51K,HTFS,DTFS",",") for (i in DivType) { if (DivType[i] in fsTypes) # Must put leading /dev/ back on due to df bug Cmd = Cmd " /dev/" Names[i] # Make index to retrieve info from output with Name2Ind["/dev/" Names[i]] = i } if (Cmd == "") return # Redirect input from /dev/null to work around gawk+protlib bugs Cmd = "df -v " Cmd " < /dev/null" if (Debug) ErrPrint("Getting filesystem info with command: " Cmd) ReadLines(Cmd,"",2," +",Lines) # df output looks like this (first field is a df bug): # 1 2 3 4 # Mount Dir Filesystem blocks used free %used # /dev/u /dev/u 2400000 2252262 147738 93% # or, if fs is not mounted: # /dev/u 2400000 2252262 147738 93% for (Dev in Lines) if (Dev in Name2Ind) { Ind = Name2Ind[Dev] # Must split on " +" so that if 1st field is empty it will still # be the 1st field. split(Lines[Dev],Elem," +") fsSize[Ind] = int(Elem[3]/2) fsUsed[Ind] = int(Elem[4]/2) if (Debug) ErrPrint(\ Dev ": size = " fsSize[Ind] "; block used = " fsUsed[Ind]) } } # Input variables: # SPT[]: Sectors per track # Drives[1..n]: Drive numbers that exist. # PDev[DriveNum,Partition]: Partition device names. # Status[DriveNum,Partition]: Partition status. # Type[DriveNum,Partition]: Partition type. # Start[DriveNum,Partition]: Partition start track. # End[DriveNum,Partition]: Partition end track. # PSort: Whether to sort partitions by starting track. # Output variables: # PartMap[DriveNum,1..n]: Set to the partition numbers in the order they were # printed (affected by PSort). function PrintPartitions(SPT,Drives,PDev,Status,Type,Start,End,PSort,PartMap, Sec,Format,Drive,DriveNum,Partition,i,Map,Num,k,TW) { # HD Par Stat Type sTr eTr zTr sBl eBl zBl TW = 6 # Width of track fields Format = "%2s %4s %-8s %-5s %" TW "s %" TW "s %" TW "s %8s %8s %8s %s\n" printf Format,"HD","Part","Status","Type","StartT","EndT","SizeT", "StartB","EndB","SizeB","Device" for (Drive = 1; Drive in Drives; Drive++) { DriveNum = Drives[Drive] split("",Map,"") Num = 0 for (Partition = 1; Partition <= 4; Partition++) if (DriveNum SUBSEP Partition in Start) if (PSort) Map[Partition] = Start[DriveNum,Partition] else k[++Num] = Partition if (PSort) Num = qsortArbIndByValue(Map,k) for (i = 1; i <= Num; i++) { PartMap[DriveNum,i] = Partition = k[i] Sec = SPT[DriveNum] printf Format,DriveNum,Partition,Status[DriveNum,Partition], Type[DriveNum,Partition], Start[DriveNum,Partition], End[DriveNum,Partition],Size[DriveNum,Partition], Start[DriveNum,Partition] * Sec, End[DriveNum,Partition] * Sec,Size[DriveNum,Partition] * Sec, ((DriveNum,Partition) in PDev) ? PDev[DriveNum,Partition] : "-" } } print "" } function PrintDivisions(Drives,Start,Name,DivType,FirstBlock,LastBlock,PSort, PartMap,FindUnusedPart,ShowContents,fsSize,fsUsed, Division,Partition,DriveNum,Format,Drive,Map,k,i,Num,PartNum,StartB,EndB, PrevEnd,PrevName,Size,Used,Pct) { Format = "%2s %4s %3s %-13s %-8s %7s %7s %7s" if (ShowContents) Format = Format " %7s %7s %4s" Format = Format "\n" printf \ Format,"HD","Part","Div","Name","Type","Start","End","Size","FS-size", "FS-used","%" for (Drive = 1; Drive in Drives; Drive++) { DriveNum = Drives[Drive] for (PartNum = 1; (DriveNum SUBSEP PartNum) in PartMap; PartNum++) { Partition = PartMap[DriveNum,PartNum] if ((DriveNum SUBSEP Partition) in Start) { split("",Map,"") Num = 0 for (Division = 0; Division <= 6; Division++) if (DriveNum SUBSEP Partition SUBSEP Division in Name) if (PSort) Map[Division] = \ FirstBlock[DriveNum,Partition,Division] else k[++Num] = Division if (FindUnusedPart) { PrevEnd = -1 PrevName = "start of partition" } if (PSort) Num = qsortArbIndByValue(Map,k) for (i = 1; i <= Num; i++) { Division = k[i] StartB = FirstBlock[DriveNum,Partition,Division] EndB = LastBlock[DriveNum,Partition,Division] DivName = Name[DriveNum,Partition,Division] if (FindUnusedPart) { if (StartB > (PrevEnd + 1)) printf \ "* %d blocks unused (%d - %d) between %s and %s.\n", StartB-PrevEnd-1,PrevEnd+1,StartB-1, PrevName,DivName if (StartB < (PrevEnd + 1)) printf \ "!!! Divisions %s and %s overlap by %d blocks!\n", PrevName,DivName,PrevEnd-StartB+1 PrevEnd = EndB PrevName = DivName } if ((DriveNum,Partition,Division) in fsSize) { Size = fsSize[DriveNum,Partition,Division] Used = fsUsed[DriveNum,Partition,Division] Pct = Percent4(Used/Size) } else Pct = Used = Size = "-" printf Format,DriveNum,Partition,Division,DivName, DivType[DriveNum,Partition,Division], StartB, EndB, EndB - StartB + 1, Size,Used,Pct } # Report space at end of partition as unallocated even though # some is used by system because there is no obvious way of # determining how much is used by system & how much is really # unused. if (FindUnusedPart && \ (StartB = (LastBlock[DriveNum,Partition,7]+1)) > PrevEnd+1) printf \ "* %d blocks unused (%d - %d) between %s and %s.\n", StartB-PrevEnd-1,PrevEnd+1,StartB-1, PrevName,"end of partition" } } } } # Find all /dev/rhd?0 nodes # Set Drives[1..n] to the number of each drive # For each node, set DriveDevs[drivenum] to the device name function FindDrives(Drives,DriveDevs, DriveInd,Drive) { DriveInd = 1 if (CmdReadLine("echo /dev/rhd?0",1) != 1) return 0 for (Drive = 1; Drive <= NF; Drive++) { DriveNum = substr($Drive,9,1) Drives[DriveInd++] = DriveNum DriveDevs[DriveNum] = $Drive } return 1 } # Do an fdisk on each drive in DriveDevs # Set the following arrays to drive data, indexed by drive number & partition: # Start First track # End Last track # Size Total tracks # Type Partition type # Status Partition status # PDev Partition device name function GetPartitions(DriveDevs,Start,End,Size,Type,Status,PDev, cmd,DriveInd,Partition,DriveNum,DriveDev,PInfo,line,NameCmd) { for (DriveNum in DriveDevs) { # exit 0 for gawk #cmd = "/etc/fdisk -p -f " DriveDevs[DriveNum] " 2>/dev/null; exit 0" cmd = "exec /etc/fdisk -p -f " DriveDevs[DriveNum] " 2>/dev/null" if (Debug) ErrPrint("Getting partition info with: " cmd) while ((cmd | getline) == 1) { if (Debug) ErrPrint("fdisk output: " $0) # Some error messages, e.g. "cannot open /dev/rhd10", are currently # printed to stdout. Only pay attention to lines with 6 fields. if (NF >= 6) { Partition = $1 Start[DriveNum,Partition] = $2 End[DriveNum,Partition] = $3 Size[DriveNum,Partition] = $4 Type[DriveNum,Partition] = $5 Status[DriveNum,Partition] = $6 DevC = $6 == "Active" ? "a" : Partition NameCmd = NameCmd "\n"\ "n=" DriveNum " p=" Partition " c=" DevC "\n"\ "for dev in hd$n$c dsk/${n}s$c; do [ -b $dev ] &&\n"\ "{ echo $n $p $dev; break; }; done" } } close(cmd) } NameCmd = "cd /dev" NameCmd if (Debug) ErrPrint("Getting partition names with:\n" NameCmd) while ((NameCmd | getline) == 1) if (NF == 3) PDev[$1,$2] = $3 else print "Bad partition name output: " $0 > "/dev/stderr" close(NameCmd) } # Reads mscsi file, and stores info in the arrays, indexed by # [device-type,unit-number], where device-type is the SCSI device type as # given in the 2nd field of mscsi, and unit-number is the instance of the # device, starting with 0. mscsi has this format: # 1 2 3 4 5 6 # HA-type device-type HA-number SCSI-ID LUN Bus # Bus is not given in versions that do not support multiple adapter buses. function Read_mscsi(HAtype,HAnumber,ID,LUN,Bus, mscsi,ret,instance,i) { mscsi = "/etc/conf/cf.d/mscsi" FS = "[ \t]+" while ((ret = (getline < mscsi)) == 1) if ($0 !~ /^\*/ && NF >= 5) { i = $2 SUBSEP instance[$2]++ + 0 #printf "%s:%s:%s:%s\n",$1,$3,$4,$5 HAtype[i] = $1 HAnumber[i] = $3 ID[i] = $4 LUN[i] = $5 Bus[i] = (NF >= 6) ? $6 : 0 } return ret } # GetDevInfo: get information about device nodes. # Device nodes to get information about are passed as indexes of Majors[]. # For each index in Majors[], sets these: # Majors[dev]: major number # Minors[dev]: minor number # devTypes[dev]: c or b (character or block) # Drivers[dev]: driver name, based on the name of the device open routine for # the major number of the device (as reported by crash). function GetDevInfo(Drivers,Majors,Minors,devTypes,Namelist, Cmd,devName,Lines,Line,Major,Elem,Maj2Driver,type,dev,dev2maj) { for (devName in Majors) Cmd = Cmd " " devName ReadLines("l -gon" Cmd,"",0,"[ ,]+",Lines) for (devName in Lines) if (devName in Majors && (Line = Lines[devName]) ~ /^[cb]/) { split(Line,Elem,"[ ,]+") Majors[devName] = Major = Elem[3] Minors[devName] = Elem[4] devTypes[devName] = type = substr(Line,1,1) Maj2Driver[Major,type] dev2maj[devName] = Major SUBSEP type } driverNames(Maj2Driver,Namelist) for (dev in Majors) Drivers[dev] = Maj2Driver[dev2maj[dev]] } # This relies on hardcoded sizes for the bdevsw and cdevsw structs. The # commands passed to crash only work with the crash shipped with 5.0 & later. function driverNames(maj2Driver,Namelist, major,crashStr,Cmd,Order,i,Driver,type) { # Get list of all major numbers that driver names are needed for i = 0 for (t in maj2Driver) { split(t,Elem,SUBSEP) major = Elem[1] type = Elem[2] Order[i++] = t crashStr = crashStr sprintf("ts *(" type "devsw+%x)\n", major*(type == "b" ? 24 : 32)) } if (Namelist == "") { ReadConfigFile(Values,Lines,"/etc/ps/booted.system","#","=",1) Namelist = Values["KERNEL"] } if (Namelist != "") Namelist = " -n " Namelist Cmd = "echo '" crashStr "' 2>&1 | crash" Namelist i = 0 while ((Cmd | getline) == 1 && (i in Order)) { # Get rid of header. Must do it here rather than immediately after # starting coprocess because new crash does not print the header until # the first line of other output. if ($0 ~ /^dumpfile/) continue if ($0 ~ /^Warning/) { print $0 > "/dev/stderr" continue } # Old crash will have a prompt as field 1, but since it will not work # here anyway, do not worry about it. if ($0 ~ /^[a-zA-Z0-9_]*open +\+ 0$/) { Driver = $1 sub("open$","",Driver) } else Driver = "?" maj2Driver[Order[i]] = Driver i++ } close(Cmd) } # Drivers is set to the driver name given in mdevice for the major # number that the device has. Note that this may be incorrect if mdevice is # out of sync with the booted kernel. # mdevice has this format: # 1 2 3 4 5 6 7 8 9 # driver-name funcs characteristics prefix bmajor cmajor minu maxu dma function getmdevice(char,block, mdevice,driver,Elem) { # Index by driver name because that is the field that is required to be # unique. Cannot index by bmajor or cmajor because some devices will have # one but not both. ReadLines("","/etc/conf/cf.d/mdevice",1,"[ \t]+",mdevice) for (driver in mdevice) { split(mdevice[driver],Elem,"[ \t]+") block[Elem[5]] = driver char[Elem[6]] = driver } } # Input vars: # Type[DriveNum,Partition]: Partition type (e.g. UNIX). # PDev[DriveNum,Partition]: Partition device names # Output vars: # For each XENIX or UNIX partition in Type, # do a divvy to get division info & put it in the following arrays: # Name[DriveNum,Part,Div]: division names (from device nodes). # DivType[DriveNum,Part,Div]: division types (e.g. AFS, EAFS, NON FS) # FirstBlock[DriveNum,Part,Div]: Partition block that division starts on. # LastBlock[DriveNum,Part,Div]: Partition block that division ends on. function GetDivisions(Type,PDev,Name,DivType,FirstBlock,LastBlock, DrivePart,DriveNum,Partition,DInfo,Division,line) { for (DrivePart in Type) if (Type[DrivePart] ~ "XENIX|UNIX") { split(DrivePart,Elem,SUBSEP) DriveNum = Elem[1] Partition = Elem[2] if (!((DriveNum,Partition) in PDev)) { printf "No device found for drive %d partition %d\n", DriveNum,Partition > "/dev/stderr" continue } # We have to echo commands into divvy because the -P option does # not display the node names. -N option is not available pre-5.0 cmd = "echo \"q\ne\"|divvy /dev/" PDev[DriveNum,Partition] if (Debug) ErrPrint("Getting division information with command:\n" cmd) cmd | getline line if (Debug) { ErrPrint("Output:") ErrPrint(line) } if (line ~ "No valid division table") { close(cmd) printf "NOTE: drive %d, partition %d is not divvied.\n", DriveNum,Partition continue } cmd | getline line if (Debug) ErrPrint(line) cmd | getline line if (Debug) ErrPrint(line) # 7th division info is used by -u option for (Division = 0; Division <= 7; Division++) { cmd | getline line if (Debug) ErrPrint(line) split(line,DInfo," *\\| *") if (DInfo[5] != Division) printf \ "Bad divvy output for drive %d, partition %d, division %d:\n%s\n", DriveNum,Partition,Division,line else if (DInfo[3] != "NOT USED") { Name[DriveNum,Partition,Division] = DInfo[2] DivType[DriveNum,Partition,Division] = DInfo[3] FirstBlock[DriveNum,Partition,Division] = DInfo[6] LastBlock[DriveNum,Partition,Division] = DInfo[7] } } close(cmd) } } function GetParams(DriveDevs,Cylinders,Heads,SPT, Drive) { for (DriveNum in DriveDevs) { # exit 0 to avoid gawk warning. Whether device could be opened can # be determined by whether data was read from the command. #CmdReadLine("dparam " DriveDevs[DriveNum] " 2>/dev/null; exit 0") if (CmdReadLine("exec dparam " DriveDevs[DriveNum] " 2>/dev/null",1) \ != 1) continue if (NF > 7) { Cylinders[DriveNum] = $1 Heads[DriveNum] = $2 SPT[DriveNum] = $8 } } } function ErrPrint(s) { printf "* %s\n",s > "/dev/stderr" } function Intersection(A,B,Inter, Elem,Count) { for (Elem in A) if (Elem in B) { Inter[Elem] Count++ } return Count } function Union(A,B,Both, Elem) { for (Elem in A) Both[Elem] for (Elem in B) Both[Elem] } # Deletes any elements that are in both Minuend and Subtrahend from Minuend. function SubtractSet(Minuend,Subtrahend, Elem) { for (Elem in Subtrahend) delete Minuend[Elem] } function CopySet(From,To, Elem) { for (Elem in From) To[Elem] } # Returns 1 if Set is empty, 0 if not. function IsEmpty(Set, i) { for (i in Set) return 0 return 1 } # MakeSet: make a set from a list. # An index with the name of each element of the list # is created in the given array. # Input variables: # Elements is a string containing the list of elements. # Sep is the character that separates the elements of the list. # Output variables: # Set is the array. # Return value: the number of elements added to the set. function MakeSet(Set,Elements,Sep, i,Num,Names) { Num = split(Elements,Names,Sep) for (i = 1; i <= Num; i++) Set[Names[i]] return Num } # Returns the number of elements in set Set function NumElem(Set, elem,Num) { for (elem in Set) Num++ return Num } # Occupy exactly 4 characters. function Percent4(Val) { if (Val+0 > 0.999) return " 100" else return sprintf("%4.1f",Val * 100) } # @(#) CmdReadLine 95/09/04 # Run Command, read a single line of output from it, then close it. # If Verbose is true, a complaint is issued if the read fails. # Output is returned in $*. $* will not be changed if Command fails, so it is # important to check the return value of this function. # The return value from getline is returned. It will be 1 on a successful # read; 0 if no lines were read due because the command produced no output # or could not be run. ERRNO is never set since pipes are run by a shell. function CmdReadLine(Command,Verbose, ret) { if (Debug) { print "* Issuing command: " Command "\n"\ "* Waiting for single line of output..." > "/dev/stderr" } ret = Command | getline if (Verbose && ret != 1) printf "Read from pipe \"%s\" failed\n",Command # close does not return a value under awk, only gawk close(Command) if (Debug && !ret) print "* Output: " $0 > "/dev/stderr" return ret } ### Start of ProcArgs library # @(#) ProcArgs 1.11 96/12/08 # 92/02/29 john h. dubois iii (john@armory.com) # 93/07/18 Added "#" arg type # 93/09/26 Do not count -h against MinArgs # 94/01/01 Stop scanning at first non-option arg. Added ">" option type. # Removed meaning of "+" or "-" by itself. # 94/03/08 Added & option and *()< option types. # 94/04/02 Added NoRCopt to Opts() # 94/06/11 Mark numeric variables as such. # 94/07/08 Opts(): Do not require any args if h option is given. # 95/01/22 Record options given more than once. Record option num in argv. # 95/06/08 Added ExclusiveOptions(). # 96/01/20 Let rcfiles be a colon-separated list of filenames. # Expand $VARNAME at the start of its filenames. # Let varname=0 and -option- turn off an option. # 96/05/05 Changed meaning of 7th arg to Opts; now can specify exactly how many # of the vars should be searched for in the environment. # Check for duplicate rcfiles. # 96/05/13 Return more specific error values. Note: ProcArgs() and InitOpts() # now return various negatives values on error, not just -1, and # Opts() may set Err to various positive values, not just 1. # Added AllowUnrecOpt. # 96/05/23 Check type given for & option # 96/06/15 Re-port to awk # 96/10/01 Moved file-reading code into ReadConfFile(), so that it can be # used by other functions. # 96/10/15 Added OptChars # 96/11/01 Added exOpts arg to Opts() # 96/11/16 Added ; type # 96/12/08 Added Opt2Set() & Opt2Sets() # 96/12/27 Added CmdLineOpt() # optlist is a string which contains all of the possible command line options. # A character followed by certain characters indicates that the option takes # an argument, with type as follows: # : String argument # ; Non-empty string argument # * Floating point argument # ( Non-negative floating point argument # ) Positive floating point argument # # Integer argument # < Non-negative integer argument # > Positive integer argument # The only difference the type of argument makes is in the runtime argument # error checking that is done. # The & option is a special case used to get numeric options without the # user having to give an option character. It is shorthand for [-+.0-9]. # If & is included in optlist and an option string that begins with one of # these characters is seen, the value given to "&" will include the first # char of the option. & must be followed by a type character other than ":" # or ";". # Note that if e.g. &> is given, an option of -.5 will produce an error. # Strings in argv[] which begin with "-" or "+" are taken to be # strings of options, except that a string which consists solely of "-" # or "+" is taken to be a non-option string; like other non-option strings, # it stops the scanning of argv and is left in argv[]. # An argument of "--" or "++" also stops the scanning of argv[] but is removed. # If an option takes an argument, the argument may either immediately # follow it or be given separately. # "-" and "+" options are treated the same. "+" is allowed because most awks # take any -options to be arguments to themselves. gawk 2.15 was enhanced to # stop scanning when it encounters an unrecognized option, though until 2.15.5 # this feature had a flaw that caused problems in some cases. See the OptChars # parameter to explicitly set the option-specifier characters. # If an option that does not take an argument is given, # an index with its name is created in Options and its value is set to the # number of times it occurs in argv[]. # If an option that does take an argument is given, an index with its name is # created in Options and its value is set to the value of the argument given # for it, and Options[option-name,"count"] is (initially) set to the 1. # If an option that takes an argument is given more than once, # Options[option-name,"count"] is incremented, and the value is assigned to # the index (option-name,instance) where instance is 2 for the second occurance # of the option, etc. # In other words, the first time an option with a value is encountered, the # value is assigned to an index consisting only of its name; for any further # occurances of the option, the value index has an extra (count) dimension. # The sequence number for each option found in argv[] is stored in # Options[option-name,"num",instance], where instance is 1 for the first # occurance of the option, etc. The sequence number starts at 1 and is # incremented for each option, both those that have a value and those that # do not. Options set from a config file have a value of 0 assigned to this. # Options and their arguments are deleted from argv. # Note that this means that there may be gaps left in the indices of argv[]. # If compress is nonzero, argv[] is packed by moving its elements so that # they have contiguous integer indices starting with 0. # Option processing will stop with the first unrecognized option, just as # though -- was given except that unlike -- the unrecognized option will not be # removed from ARGV[]. Normally, an error value is returned in this case. # If AllowUnrecOpt is true, it is not an error for an unrecognized option to # be found, so the number of remaining arguments is returned instead. # If OptChars is not a null string, it is the set of characters that indicate # that an argument is an option string if the string begins with one of the # characters. A string consisting solely of two of the same option-indicator # characters stops the scanning of argv[]. The default is "-+". # argv[0] is not examined. # The number of arguments left in argc is returned. # If an error occurs, the global string OptErr is set to an error message # and a negative value is returned. # Current error values: # -1: option that required an argument did not get it. # -2: argument of incorrect type supplied for an option. # -3: unrecognized (invalid) option. function ProcArgs(argc,argv,OptList,Options,compress,AllowUnrecOpt,OptChars, ArgNum,ArgsLeft,Arg,ArgLen,ArgInd,Option,Pos,NumOpt,Value,HadValue,specGiven, NeedNextOpt,GotValue,OptionNum,Escape,dest,src,count,c,OptTerm,OptCharSet) { # ArgNum is the index of the argument being processed. # ArgsLeft is the number of arguments left in argv. # Arg is the argument being processed. # ArgLen is the length of the argument being processed. # ArgInd is the position of the character in Arg being processed. # Option is the character in Arg being processed. # Pos is the position in OptList of the option being processed. # NumOpt is true if a numeric option may be given. ArgsLeft = argc NumOpt = index(OptList,"&") OptionNum = 0 if (OptChars == "") OptChars = "-+" while (OptChars != "") { c = substr(OptChars,1,1) OptChars = substr(OptChars,2) OptCharSet[c] OptTerm[c c] } for (ArgNum = 1; ArgNum < argc; ArgNum++) { Arg = argv[ArgNum] if (length(Arg) < 2 || !((specGiven = substr(Arg,1,1)) in OptCharSet)) break # Not an option; quit if (Arg in OptTerm) { delete argv[ArgNum] ArgsLeft-- break } ArgLen = length(Arg) for (ArgInd = 2; ArgInd <= ArgLen; ArgInd++) { Option = substr(Arg,ArgInd,1) if (NumOpt && Option ~ /[-+.0-9]/) { # If this option is a numeric option, make its flag be & and # its option string flag position be the position of & in # the option string. Option = "&" Pos = NumOpt # Prefix Arg with a char so that ArgInd will point to the # first char of the numeric option. Arg = "&" Arg ArgLen++ } # Find position of flag in option string, to get its type (if any). # Disallow & as literal flag. else if (!(Pos = index(OptList,Option)) || Option == "&") { if (AllowUnrecOpt) { Escape = 1 break } else { OptErr = "Invalid option: " specGiven Option return -3 } } # Find what the value of the option will be if it takes one. # NeedNextOpt is true if the option specifier is the last char of # this arg, which means that if the option requires a value it is # the next arg. if (NeedNextOpt = (ArgInd >= ArgLen)) { # Value is the next arg if (GotValue = ArgNum + 1 < argc) Value = argv[ArgNum+1] } else { # Value is included with option Value = substr(Arg,ArgInd + 1) GotValue = 1 } if (HadValue = AssignVal(Option,Value,Options, substr(OptList,Pos + 1,1),GotValue,"",++OptionNum,!NeedNextOpt, specGiven)) { if (HadValue < 0) # error occured return HadValue if (HadValue == 2) ArgInd++ # Account for the single-char value we used. else { if (NeedNextOpt) { # option took next arg as value delete argv[++ArgNum] ArgsLeft-- } break # This option has been used up } } } if (Escape) break # Do not delete arg until after processing of it, so that if it is not # recognized it can be left in ARGV[]. delete argv[ArgNum] ArgsLeft-- } if (compress != 0) { dest = 1 src = argc - ArgsLeft + 1 for (count = ArgsLeft - 1; count; count--) { ARGV[dest] = ARGV[src] dest++ src++ } } return ArgsLeft } # Assignment to values in Options[] occurs only in this function. # Option: Option specifier character. # Value: Value to be assigned to option, if it takes a value. # Options[]: Options array to return values in. # ArgType: Argument type specifier character. # GotValue: Whether any value is available to be assigned to this option. # Name: Name of option being processed. # OptionNum: Number of this option (starting with 1) if set in argv[], # or 0 if it was given in a config file or in the environment. # SingleOpt: true if the value (if any) that is available for this option was # given as part of the same command line arg as the option. Used only for # options from the command line. # specGiven is the option specifier character use, if any (e.g. - or +), # for use in error messages. # Global variables: OptErr # Return value: negative value on error, 0 if option did not require an # argument, 1 if it did & used the whole arg, 2 if it required just one char of # the arg. # Current error values: # -1: Option that required an argument did not get it. # -2: Value of incorrect type supplied for option. # -3: Bad type given for option & function AssignVal(Option,Value,Options,ArgType,GotValue,Name,OptionNum, SingleOpt,specGiven, UsedValue,Err,NumTypes) { # If option takes a value... [ NumTypes = "*()#<>]" if (Option == "&" && ArgType !~ "[" NumTypes) { # ] OptErr = "Bad type given for & option" return -3 } if (UsedValue = (ArgType ~ "[:;" NumTypes)) { # ] if (!GotValue) { if (Name != "") OptErr = "Variable requires a value -- " Name else OptErr = "option requires an argument -- " Option return -1 } if ((Err = CheckType(ArgType,Value,Option,Name,specGiven)) != "") { OptErr = Err return -2 } # Mark this as a numeric variable; will be propogated to Options[] val. if (ArgType != ":" && ArgType != ";") Value += 0 if ((Instance = ++Options[Option,"count"]) > 1) Options[Option,Instance] = Value else Options[Option] = Value } # If this is an environ or rcfile assignment & it was given a value... else if (!OptionNum && Value != "") { UsedValue = 1 # If the value is "0" or "-" and this is the first instance of it, # do not set Options[Option]; this allows an assignment in an rcfile to # turn off an option (for the simple "Option in Options" test) in such # a way that it cannot be turned on in a later file. if (!(Option in Options) && (Value == "0" || Value == "-")) Instance = 1 else Instance = ++Options[Option] # Save the value even though this is a flag Options[Option,Instance] = Value } # If this is a command line flag and has a - following it in the same arg, # it is being turned off. else if (OptionNum && SingleOpt && substr(Value,1,1) == "-") { UsedValue = 2 if (Option in Options) Instance = ++Options[Option] else Instance = 1 Options[Option,Instance] } # If this is a flag assignment without a value, increment the count for the # flag unless it was turned off. The indicator for a flag being turned off # is that the flag index has not been set in Options[] but it has an # instance count. else if (Option in Options || !((Option,1) in Options)) # Increment number of times this flag seen; will inc null value to 1 Instance = ++Options[Option] Options[Option,"num",Instance] = OptionNum return UsedValue } # Option is the option letter # Value is the value being assigned # Name is the var name of the option, if any # ArgType is one of: # : String argument # ; Non-null string argument # * Floating point argument # ( Non-negative floating point argument # ) Positive floating point argument # # Integer argument # < Non-negative integer argument # > Positive integer argument # specGiven is the option specifier character use, if any (e.g. - or +), # for use in error messages. # Returns null on success, err string on error function CheckType(ArgType,Value,Option,Name,specGiven, Err,ErrStr) { if (ArgType == ":") return "" if (ArgType == ";") { if (Value == "") Err = "must be a non-empty string" } # A number begins with optional + or -, and is followed by a string of # digits or a decimal with digits before it, after it, or both else if (Value !~ /^[-+]?([0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)$/) Err = "must be a number" else if (ArgType ~ "[#<>]" && Value ~ /\./) Err = "may not include a fraction" else if (ArgType ~ "[()<>]" && Value < 0) Err = "may not be negative" # ( else if (ArgType ~ "[)>]" && Value == 0) Err = "must be a positive number" if (Err != "") { ErrStr = "Bad value \"" Value "\". Value assigned to " if (Name != "") return ErrStr "variable " substr(Name,1,1) " " Err else { if (Option == "&") Option = Value return ErrStr "option " specGiven substr(Option,1,1) " " Err } } else return "" } # Note: only the above functions are needed by ProcArgs. # The rest of these functions call ProcArgs() and also do other # option-processing stuff. # Opts: Process command line arguments. # Opts processes command line arguments using ProcArgs() # and checks for errors. If an error occurs, a message is printed # and the program is exited. # # Input variables: # Name is the name of the program, for error messages. # Usage is a usage message, for error messages. # OptList the option description string, as used by ProcArgs(). # MinArgs is the minimum number of non-option arguments that this # program should have, non including ARGV[0] and +h. # If the program does not require any non-option arguments, # MinArgs should be omitted or given as 0. # rcFiles, if given, is a colon-seprated list of filenames to read for # variable initialization. If a filename begins with ~/, the ~ is replaced # by the value of the environment variable HOME. If a filename begins with # $, the part from the character after the $ up until (but not including) # the first character not in [a-zA-Z0-9_] will be searched for in the # environment; if found its value will be substituted, if not the filename will # be discarded. # rcfiles are read in the order given. # Values given in them will not override values given on the command line, # and values given in later files will not override those set in earlier # files, because AssignVal() will store each with a different instance index. # The first instance of each variable, either on the command line or in an # rcfile, will be stored with no instance index, and this is the value # normally used by programs that call this function. # VarNames is a comma-separated list of variable names to map to options, # in the same order as the options are given in OptList. # If EnvSearch is given and nonzero, the first EnvSearch variables will also be # searched for in the environment. If set to -1, all values will be searched # for in the environment. Values given in the environment will override # those given in the rcfiles but not those given on the command line. # NoRCopt, if given, is an additional letter option that if given on the # command line prevents the rcfiles from being read. # See ProcArgs() for a description of AllowUnRecOpt and optChars, and # ExclusiveOptions() for a description of exOpts. # Special options: # If x is made an option and is given, some debugging info is output. # h is assumed to be the help option. # Global variables: # The command line arguments are taken from ARGV[]. # The arguments that are option specifiers and values are removed from # ARGV[], leaving only ARGV[0] and the non-option arguments. # The number of elements in ARGV[] should be in ARGC. # After processing, ARGC is set to the number of elements left in ARGV[]. # The option values are put in Options[]. # On error, Err is set to a positive integer value so it can be checked for in # an END block. # Return value: The number of elements left in ARGV is returned. # Must keep OptErr global since it may be set by InitOpts(). function Opts(Name,Usage,OptList,MinArgs,rcFiles,VarNames,EnvSearch,NoRCopt, AllowUnrecOpt,optChars,exOpts, ArgsLeft,e) { if (MinArgs == "") MinArgs = 0 ArgsLeft = ProcArgs(ARGC,ARGV,OptList NoRCopt,Options,1,AllowUnrecOpt, optChars) if (ArgsLeft < (MinArgs+1) && !("h" in Options)) { if (ArgsLeft >= 0) { OptErr = "Not enough arguments" Err = 4 } else Err = -ArgsLeft printf "%s: %s.\nUse -h for help.\n%s\n", Name,OptErr,Usage > "/dev/stderr" exit 1 } if (rcFiles != "" && (NoRCopt == "" || !(NoRCopt in Options)) && (e = InitOpts(rcFiles,Options,OptList,VarNames,EnvSearch)) < 0) { print Name ": " OptErr ".\nUse -h for help." > "/dev/stderr" Err = -e exit 1 } if ((exOpts != "") && ((OptErr = ExclusiveOptions(exOpts,Options)) != "")) { printf "%s: Error: %s\n",Name,OptErr > "/dev/stderr" Err = 1 exit 1 } return ArgsLeft } # ReadConfFile(): Read a file containing var/value assignments, in the form #

. # Whitespace (spaces and tabs) around a variable (leading whitespace on the # line and whitespace between the variable name and the assignment character) # is stripped. Lines that do not contain an assignment operator or which # contain a null variable name are ignored, other than possibly being noted in # the return value. If more than one assignment is made to a variable, the # first assignment is used. # Input variables: # File is the file to read. # Comment is the line-comment character. If it is found as the first non- # whitespace character on a line, the line is ignored. # Assign is the assignment string. The first instance of Assign on a line # separates the variable name from its value. # If StripWhite is true, whitespace around the value (whitespace between the # assignment char and trailing whitespace on the line) is stripped. # VarPat is a pattern that variable names must match. # Example: "^[a-zA-Z][a-zA-Z0-9]+$" # If FlagsOK is true, variables are allowed to be "set" by being put alone on # a line; no assignment operator is needed. These variables are set in # the output array with a null value. Lines containing nothing but # whitespace are still ignored. # Output variables: # Values[] contains the assignments, with the indexes being the variable names # and the values being the assigned values. # Lines[] contains the line number that each variable occured on. A flag set # is record by giving it an index in Lines[] but not in Values[]. # Return value: # If any errors occur, a string consisting of descriptions of the errors # separated by newlines is returned. In no case will the string start with a # numeric value. If no errors occur, the number of lines read is returned. function ReadConfigFile(Values,Lines,File,Comment,Assign,StripWhite,VarPat, FlagsOK, Line,Status,Errs,AssignLen,LineNum,Var,Val) { if (Comment != "") Comment = "^" Comment AssignLen = length(Assign) if (VarPat == "") VarPat = "." # null varname not allowed while ((Status = (getline Line < File)) == 1) { LineNum++ sub("^[ \t]+","",Line) if (Line == "") # blank line continue if (Comment != "" && Line ~ Comment) continue if (Pos = index(Line,Assign)) { Var = substr(Line,1,Pos-1) Val = substr(Line,Pos+AssignLen) if (StripWhite) { sub("^[ \t]+","",Val) sub("[ \t]+$","",Val) } } else { Var = Line # If no value, var is entire line Val = "" } if (!FlagsOK && Val == "") { Errs = Errs \ sprintf("\nBad assignment on line %d of file %s: %s", LineNum,File,Line) continue } sub("[ \t]+$","",Var) if (Var !~ VarPat) { Errs = Errs sprintf("\nBad variable name on line %d of file %s: %s", LineNum,File,Var) continue } if (!(Var in Lines)) { Lines[Var] = LineNum if (Pos) Values[Var] = Val } } if (Status) Errs = Errs "\nCould not read file " File close(File) return Errs == "" ? LineNum : substr(Errs,2) # Skip first newline } # Variables: # Data is stored in Options[]. # rcFiles, OptList, VarNames, and EnvSearch are as as described for Opts(). # Global vars: # Sets OptErr. Uses ENVIRON[]. # If anything is read from any of the rcfiles, sets READ_RCFILE to 1. function InitOpts(rcFiles,Options,OptList,VarNames,EnvSearch, Line,Var,Pos,Vars,Map,CharOpt,NumVars,TypesInd,Types,Type,Ret,i,rcFile, fNames,numrcFiles,filesRead,Err,Values,retStr) { split("",filesRead,"") # make awk know this is an array NumVars = split(VarNames,Vars,",") TypesInd = Ret = 0 if (EnvSearch == -1) EnvSearch = NumVars for (i = 1; i <= NumVars; i++) { Var = Vars[i] CharOpt = substr(OptList,++TypesInd,1) if (CharOpt ~ "^[:;*()#<>&]$") CharOpt = substr(OptList,++TypesInd,1) Map[Var] = CharOpt Types[Var] = Type = substr(OptList,TypesInd+1,1) # Do not overwrite entries from environment if (i <= EnvSearch && Var in ENVIRON && (Err = AssignVal(CharOpt,ENVIRON[Var],Options,Type,1,Var,0)) < 0) return Err } numrcFiles = split(rcFiles,fNames,":") for (i = 1; i <= numrcFiles; i++) { rcFile = fNames[i] if (rcFile ~ "^~/") rcFile = ENVIRON["HOME"] substr(rcFile,2) else if (rcFile ~ /^\$/) { rcFile = substr(rcFile,2) match(rcFile,"^[a-zA-Z0-9_]*") envvar = substr(rcFile,1,RLENGTH) if (envvar in ENVIRON) rcFile = ENVIRON[envvar] substr(rcFile,RLENGTH+1) else continue } if (rcFile in filesRead) continue # rcfiles are liable to be given more than once, e.g. UHOME and HOME # may be the same filesRead[rcFile] if ("x" in Options) printf "Reading configuration file %s\n",rcFile > "/dev/stderr" retStr = ReadConfigFile(Values,Lines,rcFile,"#","=",0,"",1) if (retStr > 0) READ_RCFILE = 1 else if (ret != "") { OptErr = retStr Ret = -1 } for (Var in Lines) if (Var in Map) { if ((Err = AssignVal(Map[Var], Var in Values ? Values[Var] : "",Options,Types[Var], Var in Values,Var,0)) < 0) return Err } else { OptErr = sprintf(\ "Unknown var \"%s\" assigned to on line %d\nof file %s",Var, Lines[Var],rcFile) Ret = -1 } } if ("x" in Options) for (Var in Map) if (Map[Var] in Options) printf "(%s) %s=%s\n",Map[Var],Var,Options[Map[Var]] > \ "/dev/stderr" else printf "(%s) %s not set\n",Map[Var],Var > "/dev/stderr" return Ret } # OptSets is a semicolon-separated list of sets of option sets. # Within a list of option sets, the option sets are separated by commas. For # each set of sets, if any option in one of the sets is in Options[] AND any # option in one of the other sets is in Options[], an error string is returned. # If no conflicts are found, nothing is returned. # Example: if OptSets = "ab,def,g;i,j", an error will be returned due to # the exclusions presented by the first set of sets (ab,def,g) if: # (a or b is in Options[]) AND (d, e, or f is in Options[]) OR # (a or b is in Options[]) AND (g is in Options) OR # (d, e, or f is in Options[]) AND (g is in Options) # An error will be returned due to the exclusions presented by the second set # of sets (i,j) if: (i is in Options[]) AND (j is in Options[]). # todo: make options given on command line unset options given in config file # todo: that they conflict with. function ExclusiveOptions(OptSets,Options, Sets,SetSet,NumSets,Pos1,Pos2,Len,s1,s2,c1,c2,ErrStr,L1,L2,SetSets,NumSetSets, SetNum,OSetNum) { NumSetSets = split(OptSets,SetSets,";") # For each set of sets... for (SetSet = 1; SetSet <= NumSetSets; SetSet++) { # NumSets is the number of sets in this set of sets. NumSets = split(SetSets[SetSet],Sets,",") # For each set in a set of sets except the last... for (SetNum = 1; SetNum < NumSets; SetNum++) { s1 = Sets[SetNum] L1 = length(s1) for (Pos1 = 1; Pos1 <= L1; Pos1++) # If any of the options in this set was given, check whether # any of the options in the other sets was given. Only check # later sets since earlier sets will have already been checked # against this set. if ((c1 = substr(s1,Pos1,1)) in Options) for (OSetNum = SetNum+1; OSetNum <= NumSets; OSetNum++) { s2 = Sets[OSetNum] L2 = length(s2) for (Pos2 = 1; Pos2 <= L2; Pos2++) if ((c2 = substr(s2,Pos2,1)) in Options) ErrStr = ErrStr "\n"\ sprintf("Cannot give both %s and %s options.", c1,c2) } } } if (ErrStr != "") return substr(ErrStr,2) return "" } # The value of each instance of option Opt that occurs in Options[] is made an # index of Set[]. # The return value is the number of instances of Opt in Options. function Opt2Set(Options,Opt,Set, count) { if (!(Opt in Options)) return 0 Set[Options[Opt]] count = Options[Opt,"count"] for (; count > 1; count--) Set[Options[Opt,count]] return count } # The value of each instance of option Opt that occurs in Options[] that # begins with "!" is made an index of nSet[] (with the ! stripped from it). # Other values are made indexes of Set[]. # The return value is the number of instances of Opt in Options. function Opt2Sets(Options,Opt,Set,nSet, count,aSet,ret) { ret = Opt2Set(Options,Opt,aSet) for (value in aSet) if (substr(value,1,1) == "!") nSet[substr(value,2)] else Set[value] return ret } # Returns true if option Opt was given on the command line. function CmdLineOpt(Options,Opt, i) { for (i = 1; (Opt,"num",i) in Options; i++) if (Options[Opt,"num",i] != 0) return 1 return 0 } ### End of ProcArgs library ### Begin qsort routines # Arr[] is an array of values with arbitrary indices. # k[] is returned with numeric indices 1..n. # The values in k[] are the indices of Arr[], # ordered so that if Arr[] is stepped through # in the order Arr[k[1]] .. Arr[k[n]], it will be stepped # through in order of the values of its elements. # The return value is the number of elements in the arrays (n). function qsortArbIndByValue(Arr,k, ArrInd,ElNum) { ElNum = 0 for (ArrInd in Arr) k[++ElNum] = ArrInd qsortSegment(Arr,k,1,ElNum) return ElNum } # Sort a segment of an array. # Arr[] contains data with arbitrary indices. # k[] has indices 1..nelem, with the indices of arr[] as values. # This function sorts the elements of arr that are pointed to by # k[start..end], swapping the values of elements of k[] so that # when this function returns arr[k[start..end]] will be in order. function qsortSegment(Arr,k,start,end, left,right,sepval,tmp,tmpe,tmps) { # handle two-element case explicitly for a tiny speedup if ((end - start) == 1) { if (Arr[tmps = k[start]] > Arr[tmpe = k[end]]) { k[start] = tmpe k[end] = tmps } return } # Make sure comparisons act on these as numbers left = start+0 right = end+0 sepval = Arr[k[int((left + right) / 2)]] # Make every element <= sepval be to the left of every element > sepval while (left < right) { while (Arr[k[left]] < sepval) left++ while (Arr[k[right]] > sepval) right-- if (left < right) { tmp = k[left] k[left++] = k[right] k[right--] = tmp } } if (left == right) if (Arr[k[left]] < sepval) left++ else right-- if (start < right) qsortSegment(Arr,k,start,right) if (left < end) qsortSegment(Arr,k,left,end) } # Arr[] is an array of values with arbitrary indices. # k[] is returned with numeric indices 1..n. # The values in k are the indices of Arr[], # ordered so that if Arr[] is stepped through # in the order Arr[k[1]] .. Arr[k[n]], it will be stepped # through in order of the values of its indices. # The return value is the number of elements in the arrays (n). # If the indexes are numeric, Numeric should be true, so that they can be # compared as such rather than as strings. Numeric indexes do not have to be # contiguous. function qsortByArbIndex(Arr,k,Numeric, ArrInd,ElNum) { ElNum = 0 if (Numeric) # Indexes do not preserve numeric type, so must be forced for (ArrInd in Arr) k[++ElNum] = ArrInd+0 else for (ArrInd in Arr) k[++ElNum] = ArrInd qsortNumIndByValue(k,1,ElNum) return ElNum } # Arr is an array of elements with contiguous numeric indexes to be sorted # by value. # start and end are the starting and ending indexes of the range to be sorted. function qsortNumIndByValue(Arr,start,end, left,right,sepval,tmp,tmpe,tmps) { # handle two-element case explicitly for a tiny speedup if ((start - end) == 1) { if ((tmps = Arr[start]) > (tmpe = Arr[end])) { Arr[start] = tmpe Arr[end] = tmps } return } left = start+0 right = end+0 sepval = Arr[int((left + right) / 2)] while (left < right) { while (Arr[left] < sepval) left++ while (Arr[right] > sepval) right-- if (left <= right) { tmp = Arr[left] Arr[left++] = Arr[right] Arr[right--] = tmp } } if (start < right) qsortNumIndByValue(Arr,start,right) if (left < end) qsortNumIndByValue(Arr,left,end) } ### End qsort routines # If Command is non-null, run Command and read lines from it until it closes # its output. Otherwise, read File to the end. # Put the lines in Lines indexed by the FieldNum'th field; if a line does not # have at least FieldNum fields, the line is stored under the null index. # If FieldNum is <=0, the indexing field is the FieldNum'th from the last; # if a lines does not have at least -FieldNum+1 fields, the line is stored # under the null index. # If more than one line is read has a given index, the 2nd and later lines are # appended to the indexed value separated from it by a preceding newline. function ReadLines(Command,File,FieldNum,Sep,Lines,Debug, Line,Elem,ret,i) { if (Debug) if (Command != "") print "Issuing command: " Command > "/dev/stderr" else print "Reading file: " File > "/dev/stderr" i = FieldNum while ((ret = \ (Command != "" ? (Command | getline Line) : (getline Line < File))) == 1) { split(Line,Elem,Sep) if (FieldNum <= 0) i = NF - FieldNum if (i < 1 || NF < FieldNum) if ("" in Lines) Lines[""] = Lines[""] "\n" Line else Lines[""] = Line if (Debug) print "Output line, indexed by field " i " (" Elem[i] "):\n" \ Line > "/dev/stderr" if (Elem[i] in Lines) Lines[Elem[i]] = Lines[Elem[i]] "\n" Line else Lines[Elem[i]] = Line } close(Command != "" ? Command : File) return ret }