Support UTF8 strings

When writing to ttc, need to take the length in bytes rather than the
length in characters. Also need to write to scheme in the appropriate
format for each scheme system.

While we're at it, Idris 1 supports unicode identifiers (although we
don't encourage it :)) so this allows any characeter >127 in an
identifier.
This commit is contained in:
Edwin Brady 2019-09-28 14:08:23 +01:00
parent 0de9e91667
commit 1a4f424259
14 changed files with 154 additions and 63 deletions

View File

@ -116,6 +116,7 @@ modules =
TTImp.WithClause,
Utils.Binary,
Utils.Hex,
Utils.Shunting,
Yaffle.Main,

View File

@ -10,6 +10,7 @@ import Core.Directory
import Core.Name
import Core.Options
import Core.TT
import Utils.Hex
import Data.NameMap
import Data.Vect
@ -93,6 +94,21 @@ schHeader chez libs
schFooter : String
schFooter = ")"
showChezChar : Char -> String -> String
showChezChar '\\' = ("\\\\" ++)
showChezChar c
= if c < chr 32 || c > chr 126
then (("\\x" ++ asHex (cast c) ++ ";") ++)
else strCons c
showChezString : List Char -> String -> String
showChezString [] = id
showChezString ('"'::cs) = ("\\\"" ++) . showChezString cs
showChezString (c ::cs) = (showChezChar c) . showChezString cs
chezString : String -> String
chezString cs = strCons '"' (showChezString (unpack cs) "\"")
mutual
tySpec : CExp vars -> Core String
-- Primitive types have been converted to names for the purpose of matching
@ -124,7 +140,7 @@ mutual
= do args <- getFArgs fargs
argTypes <- traverse tySpec (map fst args)
retType <- tySpec ret
argsc <- traverse (schExp chezExtPrim 0 vs) (map snd args)
argsc <- traverse (schExp chezExtPrim chezString 0 vs) (map snd args)
pure $ handleRet retType ("((foreign-procedure #f " ++ show fn ++ " ("
++ showSep " " argTypes ++ ") " ++ retType ++ ") "
++ showSep " " argsc ++ ")")
@ -134,7 +150,7 @@ mutual
chezExtPrim i vs GetStr [world]
= pure $ mkWorld "(get-line (current-input-port))"
chezExtPrim i vs prim args
= schExtCommon chezExtPrim i vs prim args
= schExtCommon chezExtPrim chezString i vs prim args
-- Reference label for keeping track of loaded external libraries
data Loaded : Type where
@ -244,9 +260,9 @@ compileToSS c tm outfile
defs <- get Ctxt
l <- newRef {t = List String} Loaded ["libc", "libc 6"]
fgndefs <- traverse getFgnCall ns
compdefs <- traverse (getScheme chezExtPrim defs) ns
compdefs <- traverse (getScheme chezExtPrim chezString defs) ns
let code = concat (map snd fgndefs) ++ concat compdefs
main <- schExp chezExtPrim 0 [] !(compileExp tags tm)
main <- schExp chezExtPrim chezString 0 [] !(compileExp tags tm)
chez <- coreLift findChez
support <- readDataFile "chez/support.ss"
let scm = schHeader chez (map snd libs) ++

View File

@ -11,6 +11,8 @@ import Core.Name
import Core.Options
import Core.TT
import Utils.Hex
import Data.NameMap
import Data.Vect
import System
@ -32,12 +34,33 @@ schHeader ds
schFooter : String
schFooter = ")"
showChickenChar : Char -> String -> String
showChickenChar '\\' = ("\\\\" ++)
showChickenChar c
= if c < chr 32 || c > chr 126
then (("\\u" ++ pad (asHex (cast c))) ++)
else strCons c
where
pad : String -> String
pad str
= case isLTE (length str) 4 of
Yes _ => cast (List.replicate (4 - length str) '0') ++ str
No _ => str
showChickenString : List Char -> String -> String
showChickenString [] = id
showChickenString ('"'::cs) = ("\\\"" ++) . showChickenString cs
showChickenString (c ::cs) = (showChickenChar c) . showChickenString cs
chickenString : String -> String
chickenString cs = strCons '"' (showChickenString (unpack cs) "\"")
mutual
chickenPrim : Int -> SVars vars -> ExtPrim -> List (CExp vars) -> Core String
chickenPrim i vs CCall [ret, fn, args, world]
= throw (InternalError ("Can't compile C FFI calls to Chicken Scheme yet"))
chickenPrim i vs prim args
= schExtCommon chickenPrim i vs prim args
= schExtCommon chickenPrim chickenString i vs prim args
compileToSCM : Ref Ctxt Defs ->
ClosedTerm -> (outfile : String) -> Core ()
@ -45,9 +68,9 @@ compileToSCM c tm outfile
= do ds <- getDirectives Chicken
(ns, tags) <- findUsedNames tm
defs <- get Ctxt
compdefs <- traverse (getScheme chickenPrim defs) ns
compdefs <- traverse (getScheme chickenPrim chickenString defs) ns
let code = concat compdefs
main <- schExp chickenPrim 0 [] !(compileExp tags tm)
main <- schExp chickenPrim chickenString 0 [] !(compileExp tags tm)
support <- readDataFile "chicken/support.scm"
let scm = schHeader ds ++ support ++ code ++ main ++ schFooter
Right () <- coreLift $ writeFile outfile scm

View File

@ -205,19 +205,19 @@ export
mkWorld : String -> String
mkWorld res = schConstructor 0 ["#f", res, "#f"] -- MkIORes
schConstant : Constant -> String
schConstant (I x) = show x
schConstant (BI x) = show x
schConstant (Str x) = show x
schConstant (Ch x) = "#\\" ++ cast x
schConstant (Db x) = show x
schConstant WorldVal = "#f"
schConstant IntType = "#t"
schConstant IntegerType = "#t"
schConstant StringType = "#t"
schConstant CharType = "#t"
schConstant DoubleType = "#t"
schConstant WorldType = "#t"
schConstant : (String -> String) -> Constant -> String
schConstant _ (I x) = show x
schConstant _ (BI x) = show x
schConstant schString (Str x) = schString x
schConstant _ (Ch x) = "#\\" ++ cast x
schConstant _ (Db x) = show x
schConstant _ WorldVal = "#f"
schConstant _ IntType = "#t"
schConstant _ IntegerType = "#t"
schConstant _ StringType = "#t"
schConstant _ CharType = "#t"
schConstant _ DoubleType = "#t"
schConstant _ WorldType = "#t"
schCaseDef : Maybe String -> String
schCaseDef Nothing = ""
@ -229,7 +229,8 @@ schArglist [] = ""
schArglist [x] = x
schArglist (x :: xs) = x ++ " " ++ schArglist xs
parameters (schExtPrim : {vars : _} -> Int -> SVars vars -> ExtPrim -> List (CExp vars) -> Core String)
parameters (schExtPrim : {vars : _} -> Int -> SVars vars -> ExtPrim -> List (CExp vars) -> Core String,
schString : String -> String)
mutual
schConAlt : Int -> SVars vars -> String -> CConAlt vars -> Core String
schConAlt {vars} i vs target (MkConAlt n tag args sc)
@ -245,7 +246,7 @@ parameters (schExtPrim : {vars : _} -> Int -> SVars vars -> ExtPrim -> List (CEx
schConstAlt : Int -> SVars vars -> String -> CConstAlt vars -> Core String
schConstAlt i vs target (MkConstAlt c exp)
= pure $ "((equal? " ++ target ++ " " ++ schConstant c ++ ") " ++ !(schExp i vs exp) ++ ")"
= pure $ "((equal? " ++ target ++ " " ++ schConstant schString c ++ ") " ++ !(schExp i vs exp) ++ ")"
-- oops, no traverse for Vect in Core
schArgs : Int -> SVars vars -> Vect n (CExp vars) -> Core (Vect n String)
@ -291,7 +292,7 @@ parameters (schExtPrim : {vars : _} -> Int -> SVars vars -> ExtPrim -> List (CEx
pure $ "(let ((" ++ n ++ " " ++ tcode ++ ")) (cond "
++ showSep " " !(traverse (schConstAlt (i+1) vs n) alts)
++ schCaseDef defc ++ "))"
schExp i vs (CPrimVal fc c) = pure $ schConstant c
schExp i vs (CPrimVal fc c) = pure $ schConstant schString c
schExp i vs (CErased fc) = pure "'()"
schExp i vs (CCrash fc msg) = pure $ "(blodwen-error-quit " ++ show msg ++ ")"
@ -367,11 +368,12 @@ parameters (schExtPrim : {vars : _} -> Int -> SVars vars -> ExtPrim -> List (CEx
export
getScheme : {auto c : Ref Ctxt Defs} ->
(schExtPrim : {vars : _} -> Int -> SVars vars -> ExtPrim -> List (CExp vars) -> Core String) ->
(schString : String -> String) ->
Defs -> Name -> Core String
getScheme schExtPrim defs n
getScheme schExtPrim schString defs n
= case !(lookupCtxtExact n (gamma defs)) of
Nothing => throw (InternalError ("Compiling undefined name " ++ show n))
Just d => case compexpr d of
Nothing =>
throw (InternalError ("No compiled definition for " ++ show n))
Just d => schDef schExtPrim n d
Just d => schDef schExtPrim schString n d

View File

@ -10,6 +10,8 @@ import Core.Directory
import Core.Name
import Core.TT
import Utils.Hex
import Data.NameMap
import Data.Vect
import System
@ -34,12 +36,33 @@ schHeader libs
schFooter : String
schFooter = ")"
showRacketChar : Char -> String -> String
showRacketChar '\\' = ("\\\\" ++)
showRacketChar c
= if c < chr 32 || c > chr 126
then (("\\u" ++ pad (asHex (cast c))) ++)
else strCons c
where
pad : String -> String
pad str
= case isLTE (length str) 4 of
Yes _ => cast (List.replicate (4 - length str) '0') ++ str
No _ => str
showRacketString : List Char -> String -> String
showRacketString [] = id
showRacketString ('"'::cs) = ("\\\"" ++) . showRacketString cs
showRacketString (c ::cs) = (showRacketChar c) . showRacketString cs
racketString : String -> String
racketString cs = strCons '"' (showRacketString (unpack cs) "\"")
mutual
racketPrim : Int -> SVars vars -> ExtPrim -> List (CExp vars) -> Core String
racketPrim i vs CCall [ret, fn, args, world]
= throw (InternalError ("Can't compile C FFI calls to Racket yet"))
racketPrim i vs prim args
= schExtCommon racketPrim i vs prim args
= schExtCommon racketPrim racketString i vs prim args
-- Reference label for keeping track of loaded external libraries
data Loaded : Type where
@ -178,9 +201,9 @@ compileToRKT c tm outfile
defs <- get Ctxt
l <- newRef {t = List String} Loaded []
fgndefs <- traverse getFgnCall ns
compdefs <- traverse (getScheme racketPrim defs) ns
compdefs <- traverse (getScheme racketPrim racketString defs) ns
let code = concat (map snd fgndefs) ++ concat compdefs
main <- schExp racketPrim 0 [] !(compileExp tags tm)
main <- schExp racketPrim racketString 0 [] !(compileExp tags tm)
support <- readDataFile "racket/support.rkt"
let scm = schHeader (concat (map fst fgndefs)) ++
support ++ code ++

View File

@ -1,6 +1,5 @@
module Idris.IDEMode.Client
import Data.Primitives.Views
import System
import Idris.IDEMode.Commands
import Idris.IDEMode.Parser
@ -10,33 +9,7 @@ import Idris.REPL
import Idris.Socket.Data
import Idris.IDEMode.REPL
import Parser.Support
hexDigit : Int -> Char
hexDigit 0 = '0'
hexDigit 1 = '1'
hexDigit 2 = '2'
hexDigit 3 = '3'
hexDigit 4 = '4'
hexDigit 5 = '5'
hexDigit 6 = '6'
hexDigit 7 = '7'
hexDigit 8 = '8'
hexDigit 9 = '9'
hexDigit 10 = 'a'
hexDigit 11 = 'b'
hexDigit 12 = 'c'
hexDigit 13 = 'd'
hexDigit 14 = 'e'
hexDigit 15 = 'f'
||| Convert a positive integer into a list of (lower case) hexadecimal characters
asHex : Int -> String
asHex n = pack $ asHex' n []
where
asHex' : Int -> List Char -> List Char
asHex' 0 hex = hex
asHex' n hex with (n `divides` 16)
asHex' (16 * div + rem) hex | DivBy {div} {rem} _ = asHex' div (hexDigit rem :: hex)
import Utils.Hex
connectTo : String -> Int -> IO (Either String Socket)
connectTo host port = do

View File

@ -63,12 +63,12 @@ ident = pred startIdent <+> many (pred validIdent)
where
startIdent : Char -> Bool
startIdent '_' = True
startIdent x = isAlpha x
startIdent x = isAlpha x || x > chr 127
validIdent : Char -> Bool
validIdent '_' = True
validIdent '\'' = True
validIdent x = isAlphaNum x
validIdent x = isAlphaNum x || x > chr 127
holeIdent : Lexer
holeIdent = is '?' <+> ident

View File

@ -180,13 +180,15 @@ TTC Int where
pure val
else throw (TTCError (EndOfBuffer ("Int " ++ show (loc chunk, size chunk))))
strBytelen : String -> IO Int
strBytelen = foreign FFI_C "strlen" (String -> IO Int)
export
TTC String where
toBuf b val
-- TODO: If we're going to allow UTF-8 identifiers (and we are...) and
-- UTF-8 strings in general, this has to get the length of the C string in
-- bytes, not the length in characters.
= do let req : Int = cast (length val)
-- To support UTF-8 strings, this has to get the length of the C string
-- in bytes, not the length in characters.
= do req <- coreLift $ strBytelen val
toBuf b req
chunk <- get Bin
if avail chunk >= req

33
src/Utils/Hex.idr Normal file
View File

@ -0,0 +1,33 @@
module Utils.Hex
import Data.Primitives.Views
export
hexDigit : Int -> Char
hexDigit 0 = '0'
hexDigit 1 = '1'
hexDigit 2 = '2'
hexDigit 3 = '3'
hexDigit 4 = '4'
hexDigit 5 = '5'
hexDigit 6 = '6'
hexDigit 7 = '7'
hexDigit 8 = '8'
hexDigit 9 = '9'
hexDigit 10 = 'a'
hexDigit 11 = 'b'
hexDigit 12 = 'c'
hexDigit 13 = 'd'
hexDigit 14 = 'e'
hexDigit 15 = 'f'
||| Convert a positive integer into a list of (lower case) hexadecimal characters
export
asHex : Int -> String
asHex n = pack $ asHex' n []
where
asHex' : Int -> List Char -> List Char
asHex' 0 hex = hex
asHex' n hex with (n `divides` 16)
asHex' (16 * div + rem) hex | DivBy {div} {rem} _ = asHex' div (hexDigit rem :: hex)

View File

@ -62,7 +62,7 @@ typeddTests
chezTests : List String
chezTests
= ["chez001", "chez002", "chez003", "chez004", "chez005", "chez006",
"chez007", "chez008"]
"chez007", "chez008", "chez009"]
ideModeTests : List String
ideModeTests

View File

@ -0,0 +1,4 @@
42
ällo
1/1: Building uni (uni.idr)
Main> Main> Bye for now!

2
tests/chez/chez009/input Normal file
View File

@ -0,0 +1,2 @@
:exec main
:q

3
tests/chez/chez009/run Executable file
View File

@ -0,0 +1,3 @@
$1 --no-banner uni.idr < input
rm -rf build

View File

@ -0,0 +1,9 @@
foo : String
foo = "ällo"
ällo : Int
ällo = 42
main : IO ()
main = do printLn ällo
putStrLn "ällo"