Enforce that includes are UTF8

Previously, "include" used locale-dependent decoding of source files,
while the rest of Cryptol uses UTF8. This change makes "include"
consistent with the rest of Cryptol, and adds a test that checks for
malformed UTF8.
This commit is contained in:
David Thrane Christiansen 2020-06-09 09:02:22 -07:00
parent a28e7b95f7
commit 02dfc4a898
5 changed files with 28 additions and 4 deletions

View File

@ -18,9 +18,12 @@ import qualified Control.Applicative as A
import Control.DeepSeq
import qualified Control.Exception as X
import qualified Control.Monad.Fail as Fail
import qualified Data.ByteString as B
import Data.Either (partitionEithers)
import Data.Text(Text)
import qualified Data.Text.IO as T
import qualified Data.Text.Encoding as T (decodeUtf8')
import Data.Text.Encoding.Error (UnicodeException)
import GHC.Generics (Generic)
import MonadLib
import System.Directory (makeAbsolute)
@ -38,6 +41,7 @@ removeIncludesModule modPath m = runNoIncM modPath (noIncludeModule m)
data IncludeError
= IncludeFailed (Located FilePath)
| IncludeDecodeFailed (Located FilePath) UnicodeException
| IncludeParseError ParseError
| IncludeCycle [Located FilePath]
deriving (Show, Generic, NFData)
@ -50,6 +54,13 @@ ppIncludeError ie = case ie of
<+> pp (srcRange lp)
<+> text "was not found"
IncludeDecodeFailed lp err -> (char '`' <.> text (thing lp) <.> char '`')
<+> text "included at"
<+> pp (srcRange lp)
<+> text "contains invalid UTF-8."
<+> text "Details:"
$$ nest 2 (vcat (map text (lines (X.displayException err))))
IncludeParseError pe -> ppError pe
IncludeCycle is -> text "includes form a cycle:"
@ -181,9 +192,12 @@ resolveInclude lf = pushPath lf $ do
-- | Read a file referenced by an include.
readInclude :: Located FilePath -> NoIncM Text
readInclude path = do
file <- fromIncPath (thing path)
source <- T.readFile file `failsWith` handler
return source
file <- fromIncPath (thing path)
sourceBytes <- B.readFile file `failsWith` handler
sourceText <- X.evaluate (T.decodeUtf8' sourceBytes) `failsWith` handler
case sourceText of
Left encodingErr -> M (raise [IncludeDecodeFailed path encodingErr])
Right txt -> return txt
where
handler :: X.IOException -> NoIncM a
handler _ = includeFailed path

1
tests/modsys/T14.icry Normal file
View File

@ -0,0 +1 @@
:module T14::Main

View File

@ -0,0 +1,4 @@
Loading module Cryptol
Parse error at ./T14/Main.cry:3:9,
unexpected: MalformedUtf8

View File

@ -0,0 +1,3 @@
module T14::Main where
include MalformedUtf8.cry

View File

@ -0,0 +1,2 @@
foo : [8]
foo = 0