From b2fb2b441ba323a6a5e369ca9fae72d952855d05 Mon Sep 17 00:00:00 2001 From: Ed Howland Date: Wed, 27 Dec 2023 14:04:52 -0500 Subject: [PATCH] Adds Modules/recursion : Examples of Fun with Recursive functions in Nu (#717) This directory contains some examples of running recursive algorithms in Nu. It also has the module : tramp which implements the Trampoline pattern to overcome the lack of Tail Call Optimization (TCO) avoiding unlimited stack growth. --------- Co-authored-by: Darren Schroeder <343840+fdncred@users.noreply.github.com> --- modules/recursion/README.md | 214 +++++++++++++++++++++++++++ modules/recursion/countdown.nu | 11 ++ modules/recursion/eggcaker-typeof.nu | 6 + modules/recursion/even-odd.nu | 30 ++++ modules/recursion/fact.nu | 9 ++ modules/recursion/fib.nu | 48 ++++++ modules/recursion/gcd.nu | 15 ++ modules/recursion/merge.nu | 36 +++++ modules/recursion/tramp.nu | 53 +++++++ modules/recursion/tree.nu | 54 +++++++ modules/recursion/typeof.nu | 21 +++ 11 files changed, 497 insertions(+) create mode 100644 modules/recursion/README.md create mode 100644 modules/recursion/countdown.nu create mode 100644 modules/recursion/eggcaker-typeof.nu create mode 100644 modules/recursion/even-odd.nu create mode 100644 modules/recursion/fact.nu create mode 100644 modules/recursion/fib.nu create mode 100644 modules/recursion/gcd.nu create mode 100644 modules/recursion/merge.nu create mode 100644 modules/recursion/tramp.nu create mode 100644 modules/recursion/tree.nu create mode 100644 modules/recursion/typeof.nu diff --git a/modules/recursion/README.md b/modules/recursion/README.md new file mode 100644 index 00000000..4c6b0de6 --- /dev/null +++ b/modules/recursion/README.md @@ -0,0 +1,214 @@ +# Recursion : Scripts to help with recursive calls in custom commands + +## Manifest + +- tramp.nu : Module for using the trampoline pattern +- countdown.nu : Simple countdowner that uses the tramp module +- even-odd.nu : Example of a mutually recursive pair of functions that use tramp +- gcd.nu: Recursive example of Euclid's greatest common divisor algorithm +- fact.nu : Factorial calculation that uses tramp module +- fib.nu: Fibonacci's recursive algorithm that uses the tramp module +- merge.nu: Recursive merge sort +- tree.nu: Recursively applies closure to every node in any input, structured or scalar + +## The Trampoline pattern + +Currently, Nu does not support Tail Call Optimization (TCO), so, using +recursion in your custom commands might cause a stack overflow panic. In versions +previous to 1.0 (and maybe after that) this error is non-recoverable. +With TCO, if implemented in Nu, this problem would be avoidable if recursive +custom commands were written using either Accumulator Passing Style (APS) or +Continuation Passing Style. (CPS) This is because in both of these styles +the recursive case is now physically in the tail position of the function body. +IOW, it is the last thing that happens before the function exits. and the +compiler can rewrite it to just be a jump, making an essentially a normal loop. + +However, their is a technique call the Trampoline pattern which can be used to +overcome the limitation in languages like Nu that lack TCO. + +If you already have your recursive case in the tail position you can wrap +this call in a thunk. +(A thunk is merely a closure of 0 arguments) + +The trampoline is a function that takes a recursive function that has been +"thunkified" in the above manner and iterates over it until it reaches the base +case in which it just returns the last result. + + +## Example usage + +```nu +use tramp.nu +# Compute the factorial of a number +# This version just returns either the value or a thunk. +# Meant to be used in a trampoline +# But still uses APS +def fact [n: int, acc=1] -> int { + if $n <= 1 { return $acc } else { + {|| fact ($n - 1) ($n * $acc) } # The thunk being returned to the trampoline + } +} +``` + + +Now use it: + +```nu +source fact.nu +tramp recurse (fact 19) +121645100408832000 +``` + + +Note: factorial here will blowup with values of n larger than 20 due to operator +overload errors not a stack overflow. + +## Provided examples + +### countdown.nu + +This function takes a value and then simply countdowns to 0 recursively. + +```nu +use tramp.nu +source countdown.nu +tramp recurse (countdown 100000000) +0 +``` + +Note, this might take some time to compute with large values of n. + +### even-odd.nu + +This pair of functions: even and odd, will return true if the passed in number +is either even or odd respectively. It does this by mutually recursively calling +its partner until a base case is reached. + +The logic is that a number is odd if it is 0 else if it not (odd ($n - 2)). +A number is odd if it is 1 or not 0 otherwise if it is not (even ($n - 2)) + +E.g. Say we pass 4 to even. +It is not 0, therefore call odd (4 -2) or 2. and then invert that result. +Odd asks if the number (2) is 1, it is not, so call even (2 -2 or 0 and invert that result. +Even knows that 0 is even so it hits the base case and returns true. +Odd returns false, the inverse of true. +The previous call to even then inverts this false result and returns true. +Thus, even 4 is true. + +## Example usage + +```nu +use tramp.nu +source even-odd.nu +tramp recurse (odd 1234567) +false +``` + + + +Be aware that this method of computing either an even or odd truth actually +will take about 1/2 the number of steps as the passed in initial value. even +will be called 1/4th of the number of the initial value and odd will +be called the other 1/4th of the times. +Thus, large values of n might take some seconds. + + +## Tips and Caveats + +Currently, in versions of Nushell less than 1.0 or about, writing normal +recursive functions that use stack depths of less than 700 will be Ok. +For larger values that might cause the stack to overflow modifying the structure +of the function could result in lower space complexity, especially with regard +to the stack. + +### Using Accumulator Passing Style + +The goal of restructuring functions to pass a growing accumulator is to move +the recursive call to the tail call position. If the language supports +tail call optimization, then that is all that is required. For other languages, +you can use the trampolining method described here. Essentially, you do: + +1. Restructure to APS +2. Wrap the recursive call, now in the tail position, in a thun, a no arg closure. +3. (Possibly) wrap the call to the trampoline function in another function. + +### Accumulator Passing Style APS + +1. Add a default final parameter to the function signature. +2. Give the accumulator the base value as the default value +3. Return the accumulator in the base case instead of the normal base value. +4. Invert the actual step in the tail position to compute the accumulator +5. Move the recursive call to the tail position. + +Using the example of factorial above, we can see the sub tree of the AST as +consisting of: + + + +In step 1, we do + +```nu +def fact-aps [n: int, acc: int=1] { +``` + +Note that, in multiplication recurrances, 1 is the identity value. +This comes up again and again in recursive functions. + + + +In steps 3 - 5, we invert the AST subtree to compute the accumulator +as we make deeper and deeper recursive calls. In many cases, we use the passed +in value of the previous accumulator in the further computation. + +E.g. for factorial: + +```nu +# the recursive call + } else { + fact ($n - 1) ($n * $acc) + } +``` + + +For factorial, as the stack grows taller, the values of $n reduce more and more +to the base case. The values of $acc grow larger until the base case is +reached, in which the $acc value is returned. +and the stack unwinds returning the accumulator computed value. + + + +Again, this does nothing to reduce stack growth, unless TCO is involved. +In that case, the stack only grows by 2 stack frames max. + +### Adding the Thunk + + + +The final step to use the Trampoline pattern is to wrap the final call in +the recursive case in a thunk. So, either your new function will return +its computed accumulator + or a closure that stores the next step to be performed. In some languages +this last action can be performed by the language itself or by some AST or macro +steps. + +## Double recursion + +For some algorithms, the recursion stack grows geometrically, e.g. by a factor +of 2 each time. Two such functions are Fibonacci and merge sort. Every +recursive call results in 2 additional recursive calls. + +Fibonacci can be turned into APS via a sliding double accumulator. +And once converted, it can be thunkified for trampoline purposes. See the file fib.nu +for an example. + +However, merge sort cannot so easily be converted into APS. +This is because it has a growing ever deeper binary tree until it reaches +its many base cases upon which it does all of its work (merging) as it collapses this +tree. + It does not, therefore have anywhere to gather intermediate results in a +accumulator. + +It should be possible, however, to use CPS or continuation passing style +to move calls into the tail position. This is left as an exercise for the reader. +It seems pointless, to the author at least to even attempt in Nu because +Nushell already has perfectly acceptable sort commands. diff --git a/modules/recursion/countdown.nu b/modules/recursion/countdown.nu new file mode 100644 index 00000000..8f42c454 --- /dev/null +++ b/modules/recursion/countdown.nu @@ -0,0 +1,11 @@ + +# Simple countdown counter from some number n to 0. Returns 0 at end +# Designed to be used with the tramp module to avoid stack overflows via the +# use of the Trampoline method. +def countdown [n: int] -> int { + if $n == 0 { + 0 + } else { + {|| countdown ($n - 1) } + } +} diff --git a/modules/recursion/eggcaker-typeof.nu b/modules/recursion/eggcaker-typeof.nu new file mode 100644 index 00000000..b058ff15 --- /dev/null +++ b/modules/recursion/eggcaker-typeof.nu @@ -0,0 +1,6 @@ +# From @eggcaker .. over on Discord + +# Returns the type of its input. Use -f for full description. +def typeof [ --full(-f) ] { + describe | if not $full { split row '<' | get 0 } else { $in } +} \ No newline at end of file diff --git a/modules/recursion/even-odd.nu b/modules/recursion/even-odd.nu new file mode 100644 index 00000000..d6a7a704 --- /dev/null +++ b/modules/recursion/even-odd.nu @@ -0,0 +1,30 @@ +# Mutually recursive versions of even and odd commands + + +# even returns true if passed in 0. odd returns returns true if passed in 1 +# Else, they subtract 2 and call the other fn: even calls odd ($n - 2) +# + + +# These functions are meant to be used with the tramp module which implements +# a trampoline wrapper closure. Thus, for each even, odd command, the +# normal recursive case will actually return a thunk.. + +# Return true if number is even. Calls mutually recursive odd function +# if number is greater than 1. +def even [n: int, acc=true] -> any { + if $n == 0 { return $acc } else if $n == 1 { + return (not $acc) } else { + {|| odd ($n - 2) (not $acc) } + } +} + + +# Returns true if number is odd. Will cooperate with even in a mutually recursive fashon. +# Warning: do not pass any numbers less than 0 +def odd [n: int, acc=true] -> bool { + if $n == 0 { return (not $acc) } else if $n == 1 { + return $acc } else { + {|| even ($n - 2) (not $acc) } + } +} diff --git a/modules/recursion/fact.nu b/modules/recursion/fact.nu new file mode 100644 index 00000000..c3fe10a6 --- /dev/null +++ b/modules/recursion/fact.nu @@ -0,0 +1,9 @@ +# Compute the factorial of a number +# This version just returns either the value or a thunk. +# Meant to be used in a trampoline +# But still uses APS +def fact [n: int, acc=1] -> int { + if $n <= 1 { return $acc } else { + {|| fact ($n - 1) ($n * $acc) } # The thunk being returned to the trampoline + } +} diff --git a/modules/recursion/fib.nu b/modules/recursion/fib.nu new file mode 100644 index 00000000..5f8affbd --- /dev/null +++ b/modules/recursion/fib.nu @@ -0,0 +1,48 @@ +# Recursive Fibonacci programs in Nu + +# Returns the Fibonacci number of its input n. +# This version is non-tail call optimized and might consume large values +# of stack space even for small values of n. It is also not memoized so run time +# performance for even quite small values of N is very poor. +def fib-nontail [n: int] -> int { + if $n == 0 { + 0 + } else if $n == 1 { + 1 + } else { + (fib-nontail ($n - 2)) + (fib-nontail ($n - 1)) + } +} + + + + + + +# Returns the Fibonacci number for the index n. Uses the double APS method to +# ensure the recursive call is in thetail position. +def fib-aps [n: int, acc: int=1, accp: int=1] -> int { + if ($n == 0) or ($n == 1) { + $n + } else if $n == 2 { + $acc + } else { + fib-aps ($n - 1) ($acc + $accp) $acc + } +} + + + +# Return the Fibonacci number for given index n +# This version relies on the trampoline helper +def fib [n: int, acc: int=1, accp: int=1] -> int { + if ($n == 0) or ($n == 1) { + $n + } else if $n == 2 { + $acc + } else { + {|| fib ($n - 1) ($acc + $accp) $acc } + } +} + + diff --git a/modules/recursion/gcd.nu b/modules/recursion/gcd.nu new file mode 100644 index 00000000..a7f71c1e --- /dev/null +++ b/modules/recursion/gcd.nu @@ -0,0 +1,15 @@ +# Euclid's algorythm for determining greatest common divisor between 2 positive integers +# Baed on this clear explanation from Rutgers: https://sites.math.rutgers.edu/~greenfie/gs2004/euclid.html + +# Returns the GCD of its 2 arguments +def gcd [i1: int, i2: int] -> int { + mut a = $i1; mut b = $i2 + if $a < $b { let tmp = $a; $a = $b; $b = $tmp } + let q = $a // $b; let r = $a mod $b + if $r == 0 { + $b + } else { + gcd $b $r + } +} + diff --git a/modules/recursion/merge.nu b/modules/recursion/merge.nu new file mode 100644 index 00000000..1f9237b8 --- /dev/null +++ b/modules/recursion/merge.nu @@ -0,0 +1,36 @@ +# merge 2 sorted lists + +# Merge 2 sorted lists +def merge-2 [l: list, r: list] -> list { + mut ol = [] +mut lprime = $l; mut rprime = $r + let mx = ($l | length) + ($r | length) + #print -e $"l: ($l), r: ($r)" + while ($ol | length) < $mx { + if ($lprime | is-empty) or ($rprime | is-empty) { break } + if $lprime.0 <= $rprime.0 { + + $ol = ($ol | append $lprime.0) + $lprime = ($lprime | skip) + } else { + $ol = ($ol | append $rprime.0) + $rprime = ($rprime | skip) + } + } + $ol | append $lprime | append $rprime +} + + +# Merge sort a list +# This version is non tail call optimized and might blow the stack for +# large lists. +def sort-nontail [x: list] -> list { + let $n = ($x | length) + let n_2: int = $n // 2 + + if $n <= 1 { + $x + } else { + merge-2 (sort-nontail ($x | first $n_2)) (sort-nontail ($x | skip $n_2)) + } +} diff --git a/modules/recursion/tramp.nu b/modules/recursion/tramp.nu new file mode 100644 index 00000000..e9292f58 --- /dev/null +++ b/modules/recursion/tramp.nu @@ -0,0 +1,53 @@ +# Trampoline module to allow for recursion functions that won't stack overflow. + + +# The tramp create command is to be used to return a closure that will perform +# the trampoline iteration. This closure can then be passed to some other +# command that will execute it for its own purposes. +# The tramp test command is one such command that will create the closure +# and then directly run it. It can be used to test your recursive functions +# that return thunks or terminating values. + +# Returns a closure that when called will iterate over the returned thunks +# from the function being trampolined. Must initially call the function +# which must return either a thunk or a terminating value. +export def create [thunk: any] { + return {|| + mut $inner_thunk = $thunk + while ($inner_thunk | describe) == closure { + $inner_thunk = (do $inner_thunk) + } + $inner_thunk + } +} + + +# Will run the trampoline closure whichis created # by performing a call to 'tramp create' withthe value of val. +# The parameter val must be either a terminating value or closure, which will get run until +# the terminating value is returned from the current closure which +# is returned from this function. +export def test [val: any] -> any { + let cl = (create $val) + do $cl +} + + + +# For those cases where you do not want to first create a trampoline closure +# but just want to run the recursive command directly. +# Example usage +# use tramp.nu +# source even-odd.nu +# tramp recurse (odd 9876543) +# true + +# Explicitly bounces the trampoline over a recursive function without first +# creating a closure . +export def recurse [val: any] -> any { + mut maybe_thunk = $val + while ($maybe_thunk | describe) == closure { + $maybe_thunk = (do $maybe_thunk) + } + $maybe_thunk +} + diff --git a/modules/recursion/tree.nu b/modules/recursion/tree.nu new file mode 100644 index 00000000..ad9e9264 --- /dev/null +++ b/modules/recursion/tree.nu @@ -0,0 +1,54 @@ +# tree.nu: module for working with trees +source typeof.nu # Requires Nushell version 0.88 or later + +# A tree is a recursive data structure. In Nu, we take the view that any single atomic value +# is a leaf. E.g. int, float, string, bool, etc. +# Any structured data is some kind of a tree. E.g. list, record or table. + + + + + + + +# Applies closure to atomic data. +def visit-scalar [act: closure] { + let data = $in + do $act $data +} + + +# Visit every element of list and apply closure +def visit-list [act: closure] { + let l = $in + $l |each {|x| $x | visit $act } +} + + +# Apply closure to every column and value of record in input. Does a visit on +# each key and then on each value. +def visit-record [cl: closure] { +items {|k, v| $k | visit $cl; $v | visit $cl } +} + + + + +# Applies closure to every row in table passed to input. Defers to visit-record +# for each row. +def visit-table [act: closure] { + each {|r| $r | visit-record $act } +} + + +# Applies closure to every node in tree passed to input recursively. +def visit [act: closure] { + let stream = $in + + match ($stream | typeof) { + 'list' => { do $act 'list'; $stream | visit-list $act }, + 'record' => { do $act 'record'; $stream | visit-record $act }, + 'table' => { do $act 'table'; $stream | visit-table $act }, + _ => { $stream | visit-scalar $act } + } +} diff --git a/modules/recursion/typeof.nu b/modules/recursion/typeof.nu new file mode 100644 index 00000000..a8aea83b --- /dev/null +++ b/modules/recursion/typeof.nu @@ -0,0 +1,21 @@ +# typeof command. Requires Nushell version 0.88 or later + +# Returns the typeof a value passed into input as a string +def typeof [--full (-f)] { + describe -d | if not $full { get type } else { $in } +} + + +# Performs typeof on input but humanizes structured types into simple type record +# value lengths are given by ints so downstream consumers do not have to +# parse string contents like in the raw output of describe -d +# E.g. { list: 2 } # list with 2 elements +# { record: 3 } # record with 3 fields +def structured-type [] { + let data = $in + match ($data | typeof -f) { + {type: list } => { {list: ($data | length) } }, + { type: record } => { {record: ($data | columns | length) } }, + _ => { $data | typeof } + } +}