Merge pull request #3215 from dtkerns/master

[English/en] Update awk.html.markdown
2024-12-27 01:03:19 +03:00 · 2018-09-12 13:09:29 +05:30 · 2018-09-12 13:09:29 +05:30 · 75ec4fe24a
commit 75ec4fe24a
parent 73a5e522f4 c8284c9c95
1 changed files with 132 additions and 108 deletions
--- a/awk.html.markdown
+++ b/awk.html.markdown
@ -6,14 +6,15 @@ contributors:
 ---
-AWK is a standard tool on every POSIX-compliant UNIX system. It's like a
+AWK is a standard tool on every POSIX-compliant UNIX system. It's like
-stripped-down Perl, perfect for text-processing tasks and other scripting
+flex/lex, from the command-line, perfect for text-processing tasks and
-needs. It has a C-like syntax, but without semicolons, manual memory
+other scripting needs. It has a C-like syntax, but without mandatory
-management, or static typing. It excels at text processing. You can call to it
+semicolons (although, you should use them anyway, because they are required
-from a shell script, or you can use it as a stand-alone scripting language.
+when you're writing one-liners, something AWK excells at), manual memory
 management, or static typing. It excels at text processing. You can call to
 it from a shell script, or you can use it as a stand-alone scripting language.
-Why use AWK instead of Perl? Mostly because AWK is part of UNIX. You can always
+Why use AWK instead of Perl? Readability. AWK is easier to read
 count on it, whereas Perl's future is in question. AWK is also easier to read
 than Perl. For simple text-processing scripts, particularly ones that read
 files line by line and split on delimiters, AWK is probably the right tool for
 the job.
@ -23,8 +24,23 @@ the job.
 # Comments are like this
-# AWK programs consist of a collection of patterns and actions. The most
+
-# important pattern is called BEGIN. Actions go into brace blocks.
+# AWK programs consist of a collection of patterns and actions.
 pattern1 { action; } # just like lex
 pattern2 { action; }
 # There is an implied loop and AWK automatically reads and parses each
 # record of each file supplied. Each record is split by the FS delimiter,
 # which defaults to white-space (multiple spaces,tabs count as one)
 # You cann assign FS either on the command line (-F C) or in your BEGIN
 # pattern
 # One of the special patterns is BEGIN. The BEGIN pattern is true
 # BEFORE any of the files are read. The END pattern is true after
 # an End-of-file from the last file (or standard-in if no files specified)
 # There is also an output field separator (OFS) that you can assign, which
 # defaults to a single space
 BEGIN {
    # BEGIN will run at the beginning of the program. It's where you put all
@ -32,114 +48,116 @@ BEGIN {
    # have no text files, then think of BEGIN as the main entry point.
    # Variables are global. Just set them or use them, no need to declare..
-    count = 0
+    count = 0;
    # Operators just like in C and friends
-    a = count + 1
+    a = count + 1;
-    b = count - 1
+    b = count - 1;
-    c = count * 1
+    c = count * 1;
-    d = count / 1 # integer division
+    d = count / 1; # integer division
-    e = count % 1 # modulus
+    e = count % 1; # modulus
-    f = count ^ 1 # exponentiation
+    f = count ^ 1; # exponentiation
-    a += 1
+    a += 1;
-    b -= 1
+    b -= 1;
-    c *= 1
+    c *= 1;
-    d /= 1
+    d /= 1;
-    e %= 1
+    e %= 1;
-    f ^= 1
+    f ^= 1;
    # Incrementing and decrementing by one
-    a++
+    a++;
-    b--
+    b--;
    # As a prefix operator, it returns the incremented value
-    ++a
+    ++a;
-    --b
+    --b;
    # Notice, also, no punctuation such as semicolons to terminate statements
    # Control statements
    if (count == 0)
-        print "Starting with count of 0"
+        print "Starting with count of 0";
    else
-        print "Huh?"
+        print "Huh?";
    # Or you could use the ternary operator
-    print (count == 0) ? "Starting with count of 0" : "Huh?"
+    print (count == 0) ? "Starting with count of 0" : "Huh?";
    # Blocks consisting of multiple lines use braces
    while (a < 10) {
        print "String concatenation is done" " with a series" " of"
-            " space-separated strings"
+            " space-separated strings";
-        print a
+        print a;
-        a++
+        a++;
    }
    for (i = 0; i < 10; i++)
-        print "Good ol' for loop"
+        print "Good ol' for loop";
    # As for comparisons, they're the standards:
-    a < b   # Less than
+    # a < b   # Less than
-    a <= b  # Less than or equal
+    # a <= b  # Less than or equal
-    a != b  # Not equal
+    # a != b  # Not equal
-    a == b  # Equal
+    # a == b  # Equal
-    a > b   # Greater than
+    # a > b   # Greater than
-    a >= b  # Greater than or equal
+    # a >= b  # Greater than or equal
    # Logical operators as well
-    a && b  # AND
+    # a && b  # AND
-    a || b  # OR
+    # a || b  # OR
    # In addition, there's the super useful regular expression match
    if ("foo" ~ "^fo+$")
-        print "Fooey!"
+        print "Fooey!";
    if ("boo" !~ "^fo+$")
-        print "Boo!"
+        print "Boo!";
    # Arrays
-    arr[0] = "foo"
+    arr[0] = "foo";
-    arr[1] = "bar"
+    arr[1] = "bar";
-    # Unfortunately, there is no other way to initialize an array. Ya just
+    
-    # gotta chug through every value line by line like that.
+    # You can also initialize an array with the built-in function split()
-
+    
-    # You also have associative arrays
+    n = split("foo:bar:baz", arr, ":");
-    assoc["foo"] = "bar"
+   
-    assoc["bar"] = "baz"
+    # You also have associative arrays (actually, they're all associative arrays)
    assoc["foo"] = "bar";
    assoc["bar"] = "baz";
    # And multi-dimensional arrays, with some limitations I won't mention here
-    multidim[0,0] = "foo"
+    multidim[0,0] = "foo";
-    multidim[0,1] = "bar"
+    multidim[0,1] = "bar";
-    multidim[1,0] = "baz"
+    multidim[1,0] = "baz";
-    multidim[1,1] = "boo"
+    multidim[1,1] = "boo";
    # You can test for array membership
    if ("foo" in assoc)
-        print "Fooey!"
+        print "Fooey!";
    # You can also use the 'in' operator to traverse the keys of an array
    for (key in assoc)
-        print assoc[key]
+        print assoc[key];
    # The command line is in a special array called ARGV
    for (argnum in ARGV)
-        print ARGV[argnum]
+        print ARGV[argnum];
    # You can remove elements of an array
    # This is particularly useful to prevent AWK from assuming the arguments
    # are files for it to process
-    delete ARGV[1]
+    delete ARGV[1];
    # The number of command line arguments is in a variable called ARGC
-    print ARGC
+    print ARGC;
    # AWK has several built-in functions. They fall into three categories. I'll
    # demonstrate each of them in their own functions, defined later.
-    return_value = arithmetic_functions(a, b, c)
+    return_value = arithmetic_functions(a, b, c);
-    string_functions()
+    string_functions();
-    io_functions()
+    io_functions();
 }
 # Here's how you define a function
@ -159,26 +177,26 @@ function arithmetic_functions(a, b, c,     d) {
    # Now, to demonstrate the arithmetic functions
    # Most AWK implementations have some standard trig functions
-    localvar = sin(a)
+    localvar = sin(a);
-    localvar = cos(a)
+    localvar = cos(a);
-    localvar = atan2(b, a) # arc tangent of b / a
+    localvar = atan2(b, a); # arc tangent of b / a
    # And logarithmic stuff
-    localvar = exp(a)
+    localvar = exp(a);
-    localvar = log(a)
+    localvar = log(a);
    # Square root
-    localvar = sqrt(a)
+    localvar = sqrt(a);
    # Truncate floating point to integer
-    localvar = int(5.34) # localvar => 5
+    localvar = int(5.34); # localvar => 5
    # Random numbers
-    srand() # Supply a seed as an argument. By default, it uses the time of day
+    srand(); # Supply a seed as an argument. By default, it uses the time of day
-    localvar = rand() # Random number between 0 and 1.
+    localvar = rand(); # Random number between 0 and 1.
    # Here's how to return a value
-    return localvar
+    return localvar;
 }
 function string_functions(    localvar, arr) {
@ -188,61 +206,66 @@ function string_functions(    localvar, arr) {
    # Search and replace, first instance (sub) or all instances (gsub)
    # Both return number of matches replaced
-    localvar = "fooooobar"
+    localvar = "fooooobar";
-    sub("fo+", "Meet me at the ", localvar) # localvar => "Meet me at the bar"
+    sub("fo+", "Meet me at the ", localvar); # localvar => "Meet me at the bar"
-    gsub("e+", ".", localvar) # localvar => "m..t m. at th. bar"
+    gsub("e+", ".", localvar); # localvar => "m..t m. at th. bar"
    # Search for a string that matches a regular expression
    # index() does the same thing, but doesn't allow a regular expression
-    match(localvar, "t") # => 4, since the 't' is the fourth character
+    match(localvar, "t"); # => 4, since the 't' is the fourth character
    # Split on a delimiter
-    split("foo-bar-baz", arr, "-") # a => ["foo", "bar", "baz"]
+    n = split("foo-bar-baz", arr, "-"); # a[1] = "foo"; a[2] = "bar"; a[3] = "baz"; n = 3
    # Other useful stuff
-    sprintf("%s %d %d %d", "Testing", 1, 2, 3) # => "Testing 1 2 3"
+    sprintf("%s %d %d %d", "Testing", 1, 2, 3); # => "Testing 1 2 3"
-    substr("foobar", 2, 3) # => "oob"
+    substr("foobar", 2, 3); # => "oob"
-    substr("foobar", 4) # => "bar"
+    substr("foobar", 4); # => "bar"
-    length("foo") # => 3
+    length("foo"); # => 3
-    tolower("FOO") # => "foo"
+    tolower("FOO"); # => "foo"
-    toupper("foo") # => "FOO"
+    toupper("foo"); # => "FOO"
 }
 function io_functions(    localvar) {
    # You've already seen print
-    print "Hello world"
+    print "Hello world";
    # There's also printf
-    printf("%s %d %d %d\n", "Testing", 1, 2, 3)
+    printf("%s %d %d %d\n", "Testing", 1, 2, 3);
    # AWK doesn't have file handles, per se. It will automatically open a file
    # handle for you when you use something that needs one. The string you used
    # for this can be treated as a file handle, for purposes of I/O. This makes
-    # it feel sort of like shell scripting:
+    # it feel sort of like shell scripting, but to get the same output, the string
    # must match exactly, so use a vaiable:
    outfile = "/tmp/foobar.txt";
-    print "foobar" >"/tmp/foobar.txt"
+    print "foobar" > outfile;
-    # Now the string "/tmp/foobar.txt" is a file handle. You can close it:
+    # Now the string outfile is a file handle. You can close it:
-    close("/tmp/foobar.txt")
+    close(outfile);
    # Here's how you run something in the shell
-    system("echo foobar") # => prints foobar
+    system("echo foobar"); # => prints foobar
    # Reads a line from standard input and stores in localvar
-    getline localvar
+    getline localvar;
-    # Reads a line from a pipe
+    # Reads a line from a pipe (again, use a string so you close it properly)
-    "echo foobar" | getline localvar # localvar => "foobar"
+    cmd = "echo foobar";
-    close("echo foobar")
+    cmd | getline localvar; # localvar => "foobar"
    close(cmd);
    # Reads a line from a file and stores in localvar
-    getline localvar <"/tmp/foobar.txt"
+    infile = "/tmp/foobar.txt";
-    close("/tmp/foobar.txt")
+    getline localvar < infile; 
    close(infile);
 }
 # As I said at the beginning, AWK programs consist of a collection of patterns
-# and actions. You've already seen the all-important BEGIN pattern. Other
+# and actions. You've already seen the BEGIN pattern. Other
 # patterns are used only if you're processing lines from files or standard
 # input.
 #
@ -257,7 +280,7 @@ function io_functions(    localvar) {
    # expression, /^fo+bar$/, and will be skipped for any line that fails to
    # match it. Let's just print the line:
-    print
+    print;
    # Whoa, no argument! That's because print has a default argument: $0.
    # $0 is the name of the current line being processed. It is created
@ -268,16 +291,16 @@ function io_functions(    localvar) {
    # does. And, like the shell, each field can be access with a dollar sign
    # This will print the second and fourth fields in the line
-    print $2, $4
+    print $2, $4;
    # AWK automatically defines many other variables to help you inspect and
    # process each line. The most important one is NF
    # Prints the number of fields on this line
-    print NF
+    print NF;
    # Print the last field on this line
-    print $NF
+    print $NF;
 }
 # Every pattern is actually a true/false test. The regular expression in the
@ -286,7 +309,7 @@ function io_functions(    localvar) {
 # currently processing. Thus, the complete version of it is this:
 $0 ~ /^fo+bar$/ {
-    print "Equivalent to the last pattern"
+    print "Equivalent to the last pattern";
 }
 a > 0 {
@ -315,10 +338,10 @@ a > 0 {
 BEGIN {
    # First, ask the user for the name
-    print "What name would you like the average age for?"
+    print "What name would you like the average age for?";
    # Get a line from standard input, not from files on the command line
-    getline name <"/dev/stdin"
+    getline name < "/dev/stdin";
 }
 # Now, match every line whose first field is the given name
@ -335,8 +358,8 @@ $1 == name {
    # ...etc. There are plenty more, documented in the man page.
    # Keep track of a running total and how many lines matched
-    sum += $3
+    sum += $3;
-    nlines++
+    nlines++;
 }
 # Another special pattern is called END. It will run after processing all the
@ -348,7 +371,7 @@ $1 == name {
 END {
    if (nlines)
-        print "The average age for " name " is " sum / nlines
+        print "The average age for " name " is " sum / nlines;
 }
 ```
@ -357,3 +380,4 @@ Further Reading:
 * [Awk tutorial](http://www.grymoire.com/Unix/Awk.html)
 * [Awk man page](https://linux.die.net/man/1/awk)
 * [The GNU Awk User's Guide](https://www.gnu.org/software/gawk/manual/gawk.html) GNU Awk is found on most Linux systems.
 * [AWK one-liner collection](http://tuxgraphics.org/~guido/scripts/awk-one-liner.html)