From 8c1999d656c425209660d26993ef3aa4d5d7f242 Mon Sep 17 00:00:00 2001 From: Veit Heller Date: Tue, 26 Jan 2021 06:18:16 +0100 Subject: [PATCH] feat: add fstr (#1142) * feat: add fstr * test: add fstr test * fix: memory error in test * fix: fix backslash parser * feat: add octal escape literals --- core/Dynamic.carp | 5 +++- core/Format.carp | 64 ++++++++++++++++++++++++++++++++++++++++++ core/Macros.carp | 3 ++ docs/Format_Strings.md | 59 ++++++++++++++++++++++++++++++++++++++ src/Emit.hs | 15 +++++++--- src/Parsing.hs | 38 +++++++++++++++++++++---- test/format.carp | 13 ++++++++- 7 files changed, 185 insertions(+), 12 deletions(-) create mode 100644 docs/Format_Strings.md diff --git a/core/Dynamic.carp b/core/Dynamic.carp index 495e2d98..f0ebf446 100644 --- a/core/Dynamic.carp +++ b/core/Dynamic.carp @@ -26,7 +26,10 @@ (defndynamic suffix [s from] (String.slice s from (String.length s))) - (defndynamic tail [s ] + (defndynamic head [s] + (String.prefix s 1)) + + (defndynamic tail [s] (String.suffix s 1)) ) ) diff --git a/core/Format.carp b/core/Format.carp index eecb137c..0551974d 100644 --- a/core/Format.carp +++ b/core/Format.carp @@ -41,3 +41,67 @@ (doc fmt "formats a string. It supports all of the string interpolations defined in format of the type that should be interpolated (e.g. %d and %x on integers).") (defmacro fmt [s :rest args] (list 'copy (fmt-internal s args))) + +(hidden f-parse-expr-string) +(defndynamic f-parse-expr-string [s r] + (if (= (String.length s) 0) + -1 + (let [h (String.head s)] + (if (= h "\"") + (inc r) + (let [i (if (= h "\\") 2 1)] + (f-parse-expr-string + (String.suffix s i) + (+ r i))))))) + +(hidden f-parse-expr) +(defndynamic f-parse-expr [s idx r] + (cond + (= idx 0) r + (= (String.length s) 0) -1 + + (let [h (String.head s) + t (String.tail s)] + (if (= h "\"") + (let [l (f-parse-expr-string t 0)] + (if (= l -1) + -1 + (f-parse-expr (String.suffix s (inc l)) idx (+ r (inc l))))) + (f-parse-expr + t + (cond + (= h "{") (inc idx) + (= h "}") (dec idx) + idx) + (inc r)))))) + +(hidden f-internal) +(defndynamic f-internal [s] + (let [idx (String.index-of s \{) + len (String.length s)] + (cond + (= idx -1) [(list 'copy s)] + + (= len 1) (macro-error "error in format string: expected expression after last {") + + (= \{ (String.char-at s (inc idx))) + (append [(list 'copy (String.prefix s (inc idx)))] (f-internal (String.suffix s (+ 2 idx)))) + + (let [ss (String.suffix s (inc idx)) + endx (f-parse-expr ss 1 0)] + (if (= endx -1) + (macro-error "error in format string: unclosed open bracket") + (append [(list 'copy (String.prefix s idx)) (list 'str (parse (String.prefix ss (dec endx))))] + (f-internal (String.suffix ss endx)))))))) + +(doc fstr "formats a string. It allows arbitrary expression to be intercalated. + +Example: +``` +(def x 1) +(def y \"hi\") + +(fstr \"this is x: {x}, and this is the first letter of y: {(head y)}\") +``") +(defmacro fstr [s] + (list 'String.concat (list 'ref (f-internal s)))) diff --git a/core/Macros.carp b/core/Macros.carp index 9740f69c..23d74e7b 100644 --- a/core/Macros.carp +++ b/core/Macros.carp @@ -18,6 +18,8 @@ (list-to-array-internal (cdr xs) (append acc (array (car xs)))))) (defmodule Dynamic + (defndynamic /= [a b] (not (= a b))) + (defndynamic caar [pair] (car (car pair))) (defndynamic cadr [pair] (car (cdr pair))) (defndynamic cdar [pair] (cdr (car pair))) @@ -47,6 +49,7 @@ (defndynamic cdddar [pair] (cdr (cdr (cdr (car pair))))) (defndynamic cddddr [pair] (cdr (cdr (cdr (cdr pair)))))) + (meta-set! doc "doc" "Set documentation for a binding.") (defmacro doc [name :rest strings] (let [newline " diff --git a/docs/Format_Strings.md b/docs/Format_Strings.md new file mode 100644 index 00000000..33175c3f --- /dev/null +++ b/docs/Format_Strings.md @@ -0,0 +1,59 @@ +# Format Strings + +Carp as to ways to format strings, `fmt` and `fstr`. In this document, we +explore both functions in detail. + +## `fmt` + +`fmt` allows for more control than `fstr`, but also requires more knowledge +about the data. + +```clojure +(fmt "this is an integer %d and this is a string %s." 1 "hi") +``` + +It’s works similarly to [`printf`](https://en.wikipedia.org/wiki/Printf_format_string) +in C. `fmt` will check that the amount of arguments and format specifiers in +the format string match. + +All arguments to `fmt` must implement the `format` interface, which is defined +as: + +```clojure +(definterface format (Fn [String a] String) +``` + +The types are expected to take a format specifier and format according to it. +As such, which format specifiers are supported is dependent on the +implementation of `format` on that type. Standard library types expose regular +format specifiers as in C. + +Please note that, because `format` is often implemented on top of `snprintf` +and similar functions, using faulty format specifiers might lead to problems. + +Also, all `fmt` format strings must be literals. + +## `fstr` + +Similarly to `fmt`, `fstr` takes a literal string. It uses a simpler interface +than `fmt`, however, in which the expressions are embedded directly into the +string and formatted using `str`. As such, the return types of all expressions +in a `fstr` must implement the `str` interface. + +```clojure +(def x 1) +(def y "hi") + +(fstr "this is an integer {x} and this is the first character of a string {(head x)}") +``` + +Any parseable expression may be embedded in a `fstr`. Expressions are +delimited using `{}`. Any lone `}` will be interpreted as a literal, whereas +literal `{` need to be escaped as `{{`. + +```clojure +(fstr "{{}") ; => {} +``` + +While possible, it is discouraged to use complicated or even multiline +expressions inside `fstr`. diff --git a/src/Emit.hs b/src/Emit.hs index 1be6b6a2..defc7c46 100644 --- a/src/Emit.hs +++ b/src/Emit.hs @@ -172,7 +172,7 @@ toC toCMode (Binder meta root) = emitterSrc (execState (visit startingIndent roo (Match _) -> dontVisit With -> dontVisit MetaStub -> dontVisit - visitStr' indent str i = + visitStr' indent str i shouldEscape = -- This will allocate a new string every time the code runs: -- do let var = freshVar i -- appendToSrc (addIndent indent ++ "String " ++ var ++ " = strdup(\"" ++ str ++ "\");\n") @@ -181,14 +181,21 @@ toC toCMode (Binder meta root) = emitterSrc (execState (visit startingIndent roo do let var = freshVar i varRef = freshVar i ++ "_ref" - appendToSrc (addIndent indent ++ "static String " ++ var ++ " = \"" ++ escapeString str ++ "\";\n") + appendToSrc (addIndent indent ++ "static String " ++ var ++ " = \"" ++ (if shouldEscape then escapeString str else str) ++ "\";\n") appendToSrc (addIndent indent ++ "String *" ++ varRef ++ " = &" ++ var ++ ";\n") pure varRef - visitString indent (XObj (Str str) (Just i) _) = visitStr' indent str i - visitString indent (XObj (Pattern str) (Just i) _) = visitStr' indent str i + visitString indent (XObj (Str str) (Just i) _) = visitStr' indent str i True + visitString indent (XObj (Pattern str) (Just i) _) = visitStr' indent str i False visitString _ _ = error "Not a string." escaper '\"' acc = "\\\"" ++ acc + escaper '\\' acc = "\\\\" ++ acc escaper '\n' acc = "\\n" ++ acc + escaper '\a' acc = "\\a" ++ acc + escaper '\b' acc = "\\b" ++ acc + escaper '\f' acc = "\\f" ++ acc + escaper '\r' acc = "\\r" ++ acc + escaper '\t' acc = "\\t" ++ acc + escaper '\v' acc = "\\v" ++ acc escaper x acc = x : acc escapeString = foldr escaper "" visitSymbol :: Int -> XObj -> State EmitterState String diff --git a/src/Parsing.hs b/src/Parsing.hs index 0f0092e2..911a7f06 100644 --- a/src/Parsing.hs +++ b/src/Parsing.hs @@ -11,7 +11,7 @@ import Data.Bits (shift) -- import Text.Parsec.Error (newErrorMessage, Message(..)) -- import Text.Parsec.Pos (newPos) -import Data.Char (ord) +import Data.Char (chr, ord) import Data.List (foldl') import Info import Numeric (readHex) @@ -277,11 +277,37 @@ pat = do escaped :: Parsec.Parsec String ParseState String escaped = do _ <- Parsec.char '\\' - c <- Parsec.oneOf ['\\', '\"'] - pure $ case c of - '\\' -> "\\\\" - '\"' -> "\"" - _ -> error "escaped" + c <- Parsec.anyChar + case c of + '\\' -> pure "\\" + '\"' -> pure "\"" + '\'' -> pure "\'" + 'a' -> pure "\a" + 'b' -> pure "\b" + 'f' -> pure "\f" + 'n' -> pure "\n" + 'r' -> pure "\r" + 't' -> pure "\t" + 'v' -> pure "\v" + 'x' -> do + hex <- Parsec.many1 (Parsec.oneOf "0123456789abcdefABCDEF") + let [(p, "")] = readHex hex + return [chr p] + 'u' -> do + hex <- Parsec.count 4 (Parsec.oneOf "0123456789abcdefABCDEF") + let [(p, "")] = readHex hex + return [chr p] + 'U' -> do + hex <- Parsec.count 8 (Parsec.oneOf "0123456789abcdefABCDEF") + let [(p, "")] = readHex hex + return [chr p] + _ -> + if elem c "01234567" + then do + hex <- Parsec.many1 (Parsec.oneOf "01234567") + let [(p, "")] = readHex (c : hex) + return [chr p] + else pure ('\\' : [c]) escapedQuoteChar :: Parsec.Parsec String ParseState Char escapedQuoteChar = do diff --git a/test/format.carp b/test/format.carp index b86a86d7..e3da7670 100644 --- a/test/format.carp +++ b/test/format.carp @@ -33,4 +33,15 @@ (assert-equal test "10 % 12.0 yay" &(fmt "%d %% %.1f %s" 10 12.0 "yay") - "fmt macro works")) + "fmt macro works") + (assert-equal test + "1 [2 3] h" + &(let [x 1 + y [2 3] + z "hi"] + (fstr "{x} {&y} {(head z)}")) + "fstr works") + (assert-equal test + "{}" + &(fstr "{{}") + "fstr escaping works"))