1
1
mirror of https://github.com/kanaka/mal.git synced 2024-08-17 17:50:24 +03:00
mal/impls/bash/reader.sh
Andy Chu 02028e90a4 impls/bash: Minor changes to make it run under OSH
OSH is a bash-compatible shell: https://www.oilshell.org/

reader.sh:

- Put the constant regex pattern in a string literal.  This simplifies
  it by removing mixed regex vs. shell quoting, and implicit
  concatenation with $'\n'.

  This is suggested by the bash manual:
  https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs

  "Storing the regular expression in a shell variable is often a useful
  way to avoid problems with quoting characters that are special to the
  shell."

- Initialize __reader_tokens as an array, not a string.

  https://www.oilshell.org/release/0.8.pre6/doc/known-differences.html#values-are-tagged-with-types-not-cells

env.sh:

Simplify quoting in 'eval' expressions.  This quotes associative array keys,
which is required by OSH to avoid dynamic parsing.

  https://www.oilshell.org/release/0.8.pre6/doc/known-differences.html#strings-vs-bare-words-in-array-indices
  http://www.oilshell.org/blog/2016/10/20.html

core.sh:

Quote associative array keys.  '<' and '>' are shell operators and OSH doesn't
have a special case when inside [].

----

With this change, OSH can run tests just like bash, e.g.:

$ osh impls/bash/stepA_mal.sh tests/step4_if_fn_do.mal

----

Test results are the same before and after this change:

$ NO_DOCKER=1 ./.travis_test.sh test bash

FAILURES:
SOFT FAILED TEST (line 295): (f (+ 1 1)) -> ['',true]:
    Expected : '.*\ntrue'
    Got      : '(f (+ 1 1))\nfalse'

TEST RESULTS (for ../tests/stepA_mal.mal):
    1: soft failing tests
    0: failing tests
  106: passing tests
  107: total tests
2020-06-13 10:57:13 -07:00

168 lines
5.1 KiB
Bash

#
# mal (Make Lisp) Parser/Reader
#
if [ -z "${__mal_readerr_included__}" ]; then
__mal_readerr_included=true
source $(dirname $0)/types.sh
READ_ATOM () {
local token=${__reader_tokens[${__reader_idx}]}
__reader_idx=$(( __reader_idx + 1 ))
case "${token}" in
[0-9]*) _number "${token}" ;;
-[0-9]*) _number "${token}" ;;
\"*) if [[ ! "${token}" =~ ^\"(\\.|[^\\\"])*\"$ ]]; then
_error "expected '\"', got EOF"
return
fi
token="${token:1:-1}"
token="${token//\\\\/${__keyw}}"
token="${token//\\\"/\"}"
token="${token//\\n/$'\n'}"
token="${token//${__keyw}/\\}"
_string "${token}" ;;
:*) _keyword "${token:1}" ;;
nil) r="${__nil}" ;;
true) r="${__true}" ;;
false) r="${__false}" ;;
*) _symbol "${token}" ;;
esac
}
# Return seqence of tokens into r.
# ${1}: Type of r (vector, list)
# ${2}: starting symbol
# ${3}: ending symbol
READ_SEQ () {
local start="${1}"
local end="${2}"
local items=""
local token=${__reader_tokens[${__reader_idx}]}
__reader_idx=$(( __reader_idx + 1 ))
if [[ "${token}" != "${start}" ]]; then
r=
_error "expected '${start}'"
return
fi
token=${__reader_tokens[${__reader_idx}]}
while [[ "${token}" != "${end}" ]]; do
if [[ ! "${token}" ]]; then
r=
_error "expected '${end}', got EOF"
return
fi
READ_FORM
items="${items} ${r}"
token=${__reader_tokens[${__reader_idx}]}
done
__reader_idx=$(( __reader_idx + 1 ))
r="${items:1}"
}
# Return form in r
READ_FORM () {
local token=${__reader_tokens[${__reader_idx}]}
case "${token}" in
\') __reader_idx=$(( __reader_idx + 1 ))
_symbol quote; local q="${r}"
READ_FORM; local f="${r}"
_list "${q}" "${f}" ;;
\`) __reader_idx=$(( __reader_idx + 1 ))
_symbol quasiquote; local q="${r}"
READ_FORM; local f="${r}"
_list "${q}" "${f}" ;;
\~) __reader_idx=$(( __reader_idx + 1 ))
_symbol unquote; local q="${r}"
READ_FORM; local f="${r}"
_list "${q}" "${f}" ;;
\~\@) __reader_idx=$(( __reader_idx + 1 ))
_symbol splice-unquote; local q="${r}"
READ_FORM; local f="${r}"
_list "${q}" "${f}" ;;
^) __reader_idx=$(( __reader_idx + 1 ))
_symbol with-meta; local wm="${r}"
READ_FORM; local meta="${r}"
READ_FORM; local obj="${r}"
_list "${wm}" "${obj}" "${meta}" ;;
@) __reader_idx=$(( __reader_idx + 1 ))
_symbol deref; local d="${r}"
READ_FORM; local f="${r}"
_list "${d}" "${f}" ;;
\)) _error "unexpected ')'" ;;
\() READ_SEQ "(" ")"
_list ${r} ;;
\]) _error "unexpected ']'" ;;
\[) READ_SEQ "[" "]"
_vector ${r} ;;
\}) _error "unexpected '}'" ;;
\{) READ_SEQ "{" "}"
_hash_map ${r} ;;
*) READ_ATOM
esac
}
TOKEN_PAT=$'^^([][{}\\(\\)^@])|^(~@)|^("(\\\\.|[^\\"])*"?)|^(;[^\n]*)|^([~\'`])|^([^][ ~`\'";{}\\(\\)^@,\n]+)|^(,)|^([[:space:]]+)'
# Returns __reader_tokens as an indexed array of tokens
TOKENIZE () {
local data="${*}"
local datalen=${#data}
local idx=0
local chunk=0
local chunksz=500
local token=
local str=
__reader_idx=0
declare -a -g __reader_tokens=() # global array
while true; do
if (( ${#str} < ( chunksz / 2) )) && (( chunk < datalen )); then
str="${str}${data:${chunk}:${chunksz}}"
chunk=$(( chunk + ${chunksz} ))
fi
(( ${#str} == 0 )) && break
[[ "${str}" =~ ${TOKEN_PAT} ]]
token=${BASH_REMATCH[0]}
str="${str:${#token}}"
token="${token}"
#echo "MATCH: '${token}' / [${str}]"
if ! [[ "${token}" =~ (^[,]$|^[[:space:]]*;.*$|^[[:space:]]*$) ]]; then
__reader_tokens[${idx}]="${token}"
idx=$(( idx + 1 ))
fi
if [ -z "${token}" ]; then
_error "Tokenizing error at: ${str:0:50}"
return 1
fi
done
}
# read-str from a raw "string" or from a string object. Retruns object
# read in r.
READ_STR () {
declare -a __reader_tokens
TOKENIZE "${*}" || return 1 # sets __reader_tokens
#set | grep ^__reader_tokens
if [ -z "${__reader_tokens[0]}" ]; then
r=
return 1 # No tokens
fi
READ_FORM
#echo "Token: ${r}: <${ANON["${r}"]}>"
return
}
# Call readline and save the history. Returns the string read in r.
READLINE_EOF=
READLINE_HISTORY_FILE=${HOME}/.mal-history
READLINE () {
history -r "${READLINE_HISTORY_FILE}" 2>/dev/null || true
read -r -e -p "${1}" r || return "$?"
history -s -- "${r}"
history -a "${READLINE_HISTORY_FILE}" 2>/dev/null || true
}
fi