Update awk.html.markdown

This commit is contained in:
dtkerns 2018-09-11 13:52:30 -07:00 committed by GitHub
parent 4c36ee6943
commit c8284c9c95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -6,14 +6,15 @@ contributors:
--- ---
AWK is a standard tool on every POSIX-compliant UNIX system. It's like a AWK is a standard tool on every POSIX-compliant UNIX system. It's like
stripped-down Perl, perfect for text-processing tasks and other scripting flex/lex, from the command-line, perfect for text-processing tasks and
needs. It has a C-like syntax, but without semicolons, manual memory other scripting needs. It has a C-like syntax, but without mandatory
management, or static typing. It excels at text processing. You can call to it semicolons (although, you should use them anyway, because they are required
from a shell script, or you can use it as a stand-alone scripting language. when you're writing one-liners, something AWK excells at), manual memory
management, or static typing. It excels at text processing. You can call to
it from a shell script, or you can use it as a stand-alone scripting language.
Why use AWK instead of Perl? Mostly because AWK is part of UNIX. You can always Why use AWK instead of Perl? Readability. AWK is easier to read
count on it, whereas Perl's future is in question. AWK is also easier to read
than Perl. For simple text-processing scripts, particularly ones that read than Perl. For simple text-processing scripts, particularly ones that read
files line by line and split on delimiters, AWK is probably the right tool for files line by line and split on delimiters, AWK is probably the right tool for
the job. the job.
@ -23,8 +24,23 @@ the job.
# Comments are like this # Comments are like this
# AWK programs consist of a collection of patterns and actions. The most
# important pattern is called BEGIN. Actions go into brace blocks. # AWK programs consist of a collection of patterns and actions.
pattern1 { action; } # just like lex
pattern2 { action; }
# There is an implied loop and AWK automatically reads and parses each
# record of each file supplied. Each record is split by the FS delimiter,
# which defaults to white-space (multiple spaces,tabs count as one)
# You cann assign FS either on the command line (-F C) or in your BEGIN
# pattern
# One of the special patterns is BEGIN. The BEGIN pattern is true
# BEFORE any of the files are read. The END pattern is true after
# an End-of-file from the last file (or standard-in if no files specified)
# There is also an output field separator (OFS) that you can assign, which
# defaults to a single space
BEGIN { BEGIN {
# BEGIN will run at the beginning of the program. It's where you put all # BEGIN will run at the beginning of the program. It's where you put all
@ -32,114 +48,116 @@ BEGIN {
# have no text files, then think of BEGIN as the main entry point. # have no text files, then think of BEGIN as the main entry point.
# Variables are global. Just set them or use them, no need to declare.. # Variables are global. Just set them or use them, no need to declare..
count = 0 count = 0;
# Operators just like in C and friends # Operators just like in C and friends
a = count + 1 a = count + 1;
b = count - 1 b = count - 1;
c = count * 1 c = count * 1;
d = count / 1 # integer division d = count / 1; # integer division
e = count % 1 # modulus e = count % 1; # modulus
f = count ^ 1 # exponentiation f = count ^ 1; # exponentiation
a += 1 a += 1;
b -= 1 b -= 1;
c *= 1 c *= 1;
d /= 1 d /= 1;
e %= 1 e %= 1;
f ^= 1 f ^= 1;
# Incrementing and decrementing by one # Incrementing and decrementing by one
a++ a++;
b-- b--;
# As a prefix operator, it returns the incremented value # As a prefix operator, it returns the incremented value
++a ++a;
--b --b;
# Notice, also, no punctuation such as semicolons to terminate statements # Notice, also, no punctuation such as semicolons to terminate statements
# Control statements # Control statements
if (count == 0) if (count == 0)
print "Starting with count of 0" print "Starting with count of 0";
else else
print "Huh?" print "Huh?";
# Or you could use the ternary operator # Or you could use the ternary operator
print (count == 0) ? "Starting with count of 0" : "Huh?" print (count == 0) ? "Starting with count of 0" : "Huh?";
# Blocks consisting of multiple lines use braces # Blocks consisting of multiple lines use braces
while (a < 10) { while (a < 10) {
print "String concatenation is done" " with a series" " of" print "String concatenation is done" " with a series" " of"
" space-separated strings" " space-separated strings";
print a print a;
a++ a++;
} }
for (i = 0; i < 10; i++) for (i = 0; i < 10; i++)
print "Good ol' for loop" print "Good ol' for loop";
# As for comparisons, they're the standards: # As for comparisons, they're the standards:
a < b # Less than # a < b # Less than
a <= b # Less than or equal # a <= b # Less than or equal
a != b # Not equal # a != b # Not equal
a == b # Equal # a == b # Equal
a > b # Greater than # a > b # Greater than
a >= b # Greater than or equal # a >= b # Greater than or equal
# Logical operators as well # Logical operators as well
a && b # AND # a && b # AND
a || b # OR # a || b # OR
# In addition, there's the super useful regular expression match # In addition, there's the super useful regular expression match
if ("foo" ~ "^fo+$") if ("foo" ~ "^fo+$")
print "Fooey!" print "Fooey!";
if ("boo" !~ "^fo+$") if ("boo" !~ "^fo+$")
print "Boo!" print "Boo!";
# Arrays # Arrays
arr[0] = "foo" arr[0] = "foo";
arr[1] = "bar" arr[1] = "bar";
# Unfortunately, there is no other way to initialize an array. Ya just
# gotta chug through every value line by line like that. # You can also initialize an array with the built-in function split()
# You also have associative arrays n = split("foo:bar:baz", arr, ":");
assoc["foo"] = "bar"
assoc["bar"] = "baz" # You also have associative arrays (actually, they're all associative arrays)
assoc["foo"] = "bar";
assoc["bar"] = "baz";
# And multi-dimensional arrays, with some limitations I won't mention here # And multi-dimensional arrays, with some limitations I won't mention here
multidim[0,0] = "foo" multidim[0,0] = "foo";
multidim[0,1] = "bar" multidim[0,1] = "bar";
multidim[1,0] = "baz" multidim[1,0] = "baz";
multidim[1,1] = "boo" multidim[1,1] = "boo";
# You can test for array membership # You can test for array membership
if ("foo" in assoc) if ("foo" in assoc)
print "Fooey!" print "Fooey!";
# You can also use the 'in' operator to traverse the keys of an array # You can also use the 'in' operator to traverse the keys of an array
for (key in assoc) for (key in assoc)
print assoc[key] print assoc[key];
# The command line is in a special array called ARGV # The command line is in a special array called ARGV
for (argnum in ARGV) for (argnum in ARGV)
print ARGV[argnum] print ARGV[argnum];
# You can remove elements of an array # You can remove elements of an array
# This is particularly useful to prevent AWK from assuming the arguments # This is particularly useful to prevent AWK from assuming the arguments
# are files for it to process # are files for it to process
delete ARGV[1] delete ARGV[1];
# The number of command line arguments is in a variable called ARGC # The number of command line arguments is in a variable called ARGC
print ARGC print ARGC;
# AWK has several built-in functions. They fall into three categories. I'll # AWK has several built-in functions. They fall into three categories. I'll
# demonstrate each of them in their own functions, defined later. # demonstrate each of them in their own functions, defined later.
return_value = arithmetic_functions(a, b, c) return_value = arithmetic_functions(a, b, c);
string_functions() string_functions();
io_functions() io_functions();
} }
# Here's how you define a function # Here's how you define a function
@ -159,26 +177,26 @@ function arithmetic_functions(a, b, c, d) {
# Now, to demonstrate the arithmetic functions # Now, to demonstrate the arithmetic functions
# Most AWK implementations have some standard trig functions # Most AWK implementations have some standard trig functions
localvar = sin(a) localvar = sin(a);
localvar = cos(a) localvar = cos(a);
localvar = atan2(b, a) # arc tangent of b / a localvar = atan2(b, a); # arc tangent of b / a
# And logarithmic stuff # And logarithmic stuff
localvar = exp(a) localvar = exp(a);
localvar = log(a) localvar = log(a);
# Square root # Square root
localvar = sqrt(a) localvar = sqrt(a);
# Truncate floating point to integer # Truncate floating point to integer
localvar = int(5.34) # localvar => 5 localvar = int(5.34); # localvar => 5
# Random numbers # Random numbers
srand() # Supply a seed as an argument. By default, it uses the time of day srand(); # Supply a seed as an argument. By default, it uses the time of day
localvar = rand() # Random number between 0 and 1. localvar = rand(); # Random number between 0 and 1.
# Here's how to return a value # Here's how to return a value
return localvar return localvar;
} }
function string_functions( localvar, arr) { function string_functions( localvar, arr) {
@ -188,61 +206,66 @@ function string_functions( localvar, arr) {
# Search and replace, first instance (sub) or all instances (gsub) # Search and replace, first instance (sub) or all instances (gsub)
# Both return number of matches replaced # Both return number of matches replaced
localvar = "fooooobar" localvar = "fooooobar";
sub("fo+", "Meet me at the ", localvar) # localvar => "Meet me at the bar" sub("fo+", "Meet me at the ", localvar); # localvar => "Meet me at the bar"
gsub("e+", ".", localvar) # localvar => "m..t m. at th. bar" gsub("e+", ".", localvar); # localvar => "m..t m. at th. bar"
# Search for a string that matches a regular expression # Search for a string that matches a regular expression
# index() does the same thing, but doesn't allow a regular expression # index() does the same thing, but doesn't allow a regular expression
match(localvar, "t") # => 4, since the 't' is the fourth character match(localvar, "t"); # => 4, since the 't' is the fourth character
# Split on a delimiter # Split on a delimiter
split("foo-bar-baz", arr, "-") # a => ["foo", "bar", "baz"] n = split("foo-bar-baz", arr, "-"); # a[1] = "foo"; a[2] = "bar"; a[3] = "baz"; n = 3
# Other useful stuff # Other useful stuff
sprintf("%s %d %d %d", "Testing", 1, 2, 3) # => "Testing 1 2 3" sprintf("%s %d %d %d", "Testing", 1, 2, 3); # => "Testing 1 2 3"
substr("foobar", 2, 3) # => "oob" substr("foobar", 2, 3); # => "oob"
substr("foobar", 4) # => "bar" substr("foobar", 4); # => "bar"
length("foo") # => 3 length("foo"); # => 3
tolower("FOO") # => "foo" tolower("FOO"); # => "foo"
toupper("foo") # => "FOO" toupper("foo"); # => "FOO"
} }
function io_functions( localvar) { function io_functions( localvar) {
# You've already seen print # You've already seen print
print "Hello world" print "Hello world";
# There's also printf # There's also printf
printf("%s %d %d %d\n", "Testing", 1, 2, 3) printf("%s %d %d %d\n", "Testing", 1, 2, 3);
# AWK doesn't have file handles, per se. It will automatically open a file # AWK doesn't have file handles, per se. It will automatically open a file
# handle for you when you use something that needs one. The string you used # handle for you when you use something that needs one. The string you used
# for this can be treated as a file handle, for purposes of I/O. This makes # for this can be treated as a file handle, for purposes of I/O. This makes
# it feel sort of like shell scripting: # it feel sort of like shell scripting, but to get the same output, the string
# must match exactly, so use a vaiable:
outfile = "/tmp/foobar.txt";
print "foobar" >"/tmp/foobar.txt" print "foobar" > outfile;
# Now the string "/tmp/foobar.txt" is a file handle. You can close it: # Now the string outfile is a file handle. You can close it:
close("/tmp/foobar.txt") close(outfile);
# Here's how you run something in the shell # Here's how you run something in the shell
system("echo foobar") # => prints foobar system("echo foobar"); # => prints foobar
# Reads a line from standard input and stores in localvar # Reads a line from standard input and stores in localvar
getline localvar getline localvar;
# Reads a line from a pipe # Reads a line from a pipe (again, use a string so you close it properly)
"echo foobar" | getline localvar # localvar => "foobar" cmd = "echo foobar";
close("echo foobar") cmd | getline localvar; # localvar => "foobar"
close(cmd);
# Reads a line from a file and stores in localvar # Reads a line from a file and stores in localvar
getline localvar <"/tmp/foobar.txt" infile = "/tmp/foobar.txt";
close("/tmp/foobar.txt") getline localvar < infile;
close(infile);
} }
# As I said at the beginning, AWK programs consist of a collection of patterns # As I said at the beginning, AWK programs consist of a collection of patterns
# and actions. You've already seen the all-important BEGIN pattern. Other # and actions. You've already seen the BEGIN pattern. Other
# patterns are used only if you're processing lines from files or standard # patterns are used only if you're processing lines from files or standard
# input. # input.
# #
@ -257,7 +280,7 @@ function io_functions( localvar) {
# expression, /^fo+bar$/, and will be skipped for any line that fails to # expression, /^fo+bar$/, and will be skipped for any line that fails to
# match it. Let's just print the line: # match it. Let's just print the line:
print print;
# Whoa, no argument! That's because print has a default argument: $0. # Whoa, no argument! That's because print has a default argument: $0.
# $0 is the name of the current line being processed. It is created # $0 is the name of the current line being processed. It is created
@ -268,16 +291,16 @@ function io_functions( localvar) {
# does. And, like the shell, each field can be access with a dollar sign # does. And, like the shell, each field can be access with a dollar sign
# This will print the second and fourth fields in the line # This will print the second and fourth fields in the line
print $2, $4 print $2, $4;
# AWK automatically defines many other variables to help you inspect and # AWK automatically defines many other variables to help you inspect and
# process each line. The most important one is NF # process each line. The most important one is NF
# Prints the number of fields on this line # Prints the number of fields on this line
print NF print NF;
# Print the last field on this line # Print the last field on this line
print $NF print $NF;
} }
# Every pattern is actually a true/false test. The regular expression in the # Every pattern is actually a true/false test. The regular expression in the
@ -286,7 +309,7 @@ function io_functions( localvar) {
# currently processing. Thus, the complete version of it is this: # currently processing. Thus, the complete version of it is this:
$0 ~ /^fo+bar$/ { $0 ~ /^fo+bar$/ {
print "Equivalent to the last pattern" print "Equivalent to the last pattern";
} }
a > 0 { a > 0 {
@ -315,10 +338,10 @@ a > 0 {
BEGIN { BEGIN {
# First, ask the user for the name # First, ask the user for the name
print "What name would you like the average age for?" print "What name would you like the average age for?";
# Get a line from standard input, not from files on the command line # Get a line from standard input, not from files on the command line
getline name <"/dev/stdin" getline name < "/dev/stdin";
} }
# Now, match every line whose first field is the given name # Now, match every line whose first field is the given name
@ -335,8 +358,8 @@ $1 == name {
# ...etc. There are plenty more, documented in the man page. # ...etc. There are plenty more, documented in the man page.
# Keep track of a running total and how many lines matched # Keep track of a running total and how many lines matched
sum += $3 sum += $3;
nlines++ nlines++;
} }
# Another special pattern is called END. It will run after processing all the # Another special pattern is called END. It will run after processing all the
@ -348,7 +371,7 @@ $1 == name {
END { END {
if (nlines) if (nlines)
print "The average age for " name " is " sum / nlines print "The average age for " name " is " sum / nlines;
} }
``` ```
@ -357,3 +380,4 @@ Further Reading:
* [Awk tutorial](http://www.grymoire.com/Unix/Awk.html) * [Awk tutorial](http://www.grymoire.com/Unix/Awk.html)
* [Awk man page](https://linux.die.net/man/1/awk) * [Awk man page](https://linux.die.net/man/1/awk)
* [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems. * [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems.
* [AWK one-liner collection](http://tuxgraphics.org/~guido/scripts/awk-one-liner.html)