fix a couple more bugs in decodeFileName and its tests

side note, #1383 didn't seem to be working for me in this module;
help, @pete-ts :)
This commit is contained in:
Arya Irani 2020-04-09 13:02:46 -04:00
parent d5e2cf656e
commit ad4a743988
2 changed files with 30 additions and 19 deletions

View File

@ -200,7 +200,7 @@ typeMentionsIndexDir root r = typeMentionsIndexDir' root </> referenceToDir r
typeMentionsIndexDir' root = root </> codebasePath </> "type-mentions-index"
decodeFileName :: FilePath -> String
decodeFileName = go where
decodeFileName = let
go ('$':tl) = case span (/= '$') tl of
("forward-slash", _:tl) -> '/' : go tl
("back-slash", _:tl) -> '\\' : go tl
@ -220,10 +220,14 @@ decodeFileName = go where
decodeHex :: String -> String
decodeHex s = maybe s (Text.unpack . decodeUtf8)
. Hex.unhex . encodeUtf8 . Text.pack $ s
in \case
"$dot$" -> "."
"$dotdot$" -> ".."
t -> go t
-- https://superuser.com/questions/358855/what-characters-are-safe-in-cross-platform-file-names-for-linux-windows-and-os
encodeFileName :: String -> FilePath
encodeFileName t = let
encodeFileName = let
go ('/' : rem) = "$forward-slash$" <> go rem
go ('\\' : rem) = "$back-slash$" <> go rem
go (':' : rem) = "$colon$" <> go rem
@ -239,9 +243,10 @@ encodeFileName t = let
| otherwise = c : go rem
go [] = []
encodeHex = Text.unpack . decodeUtf8 . Hex.hex . encodeUtf8 . Text.pack
in if t == "." then "$dot$"
else if t == ".." then "$dotdot$"
else go t
in \case
"." -> "$dot$"
".." -> "$dotdot$"
t -> go t
termPath, typePath, declPath :: CodebasePath -> Reference.Id -> FilePath
termPath path r = termDir path r </> "compiled.ub"

View File

@ -3,7 +3,7 @@ module Unison.Test.Codebase.FileCodebase where
import EasyTest
import Unison.Codebase.FileCodebase.Common (encodeFileName, decodeFileName)
import qualified Data.Set as Set
import qualified Unison.Lexer as L
import Data.Char as Char
import Data.Foldable (toList)
test :: Test ()
@ -12,18 +12,19 @@ test = scope "FileCodebase" . tests $
[ encodeDecode "abc"
, encodeDecode "👍"
, encodeDecode "\xfff"
, pending $ encodeDecode ['!'..'~']
, pending $ specialEncode "."
, pending $ specialEncode ".."
, pending $
tests $ map specialEncodeChar (toList $ Set.delete '.' L.symbolyIdChars)
, pending $ tests $ map specialEncodeChar unsafeChars
, tests $ (encodeDecode . (:[])) <$> ['!'..'~']
, encodeDecode ("Universal." ++ ['!'..'~'])
, specialEncode "."
, specialEncode ".."
, tests $ map specialEncodeChar (toList specificallyBadChars)
, specialEncodeChar '👍'
, specialEncodeChar '\xfff'
]
]
specialEncode :: String -> Test ()
specialEncode s =
scope (s <> " gets special encoding") $ expect (encodeFileName s /= s)
scope (" " <> s <> " gets special encoding") $ expect (encodeFileName s /= s)
specialEncodeChar :: Char -> Test ()
specialEncodeChar = specialEncode . pure
@ -32,11 +33,16 @@ encodeDecode :: String -> Test ()
encodeDecode s =
let e = encodeFileName s
d = decodeFileName e
in scope s $ expect $ d == s && all (`Set.member` safeChars) e
in scope s $ expect $ d == s && all isSafeChar e
safeChars :: Set.Set Char
safeChars = Set.fromList $
['0'..'9'] ++ ['a'..'z'] ++ ['A'..'Z'] ++ "-._$"
-- In the past we had considered a much smaller set of safe chars:
-- [0-9,a-z,A-Z,-._] from https://superuser.com/a/748264
-- Currently we are going by https://superuser.com/a/358861
isSafeChar :: Char -> Bool
isSafeChar c = Set.notMember c specificallyBadChars
&& Char.isPrint c
&& Char.isAscii c
specificallyBadChars :: Set.Set Char
specificallyBadChars = Set.fromList "\\/:*?\"<>|"
unsafeChars :: [Char]
unsafeChars = toList $ (Set.fromList ['!'..'~'] `Set.difference` safeChars)