mirror of
https://github.com/composewell/streamly.git
synced 2024-09-19 07:29:02 +03:00
Check absolute path in append
Also add a few other utilities.
This commit is contained in:
parent
b1b66d040d
commit
ec8ebdf601
@ -26,6 +26,11 @@
|
||||
-- The 'Path' type can be converted to and from 'OsPath' type at zero cost
|
||||
-- since the underlying representation of both is the same.
|
||||
|
||||
-- We should be able to manipulate windows paths on posix and posix paths on
|
||||
-- windows as well. Therefore, we have WindowsPath and PosixPath types which
|
||||
-- are supported on both platforms. However, the Path module aliases Path to
|
||||
-- WindowsPath on Windows and PosixPath on Posix.
|
||||
--
|
||||
-- Conventions: A trailing separator on a path indicates that it is a directory.
|
||||
-- However, the absence of a trailing separator does not convey any
|
||||
-- information, it could either be a directory or a file.
|
||||
@ -33,10 +38,17 @@
|
||||
-- You may also find the 'str' quasiquoter from "Streamly.Unicode.String" to be
|
||||
-- useful in creating paths.
|
||||
--
|
||||
-- * https://en.wikipedia.org/wiki/Path_(computing)
|
||||
-- * https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
|
||||
-- * https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/62e862f4-2a51-452e-8eeb-dc4ff5ee33cc
|
||||
--
|
||||
module Streamly.Internal.FileSystem.Path
|
||||
(
|
||||
-- * OS
|
||||
OS (..)
|
||||
|
||||
-- * Path Types
|
||||
Path (..)
|
||||
, Path (..)
|
||||
, File
|
||||
, Dir
|
||||
, Abs
|
||||
@ -88,21 +100,29 @@ module Streamly.Internal.FileSystem.Path
|
||||
-- , primarySeparator
|
||||
-- , isSeparator
|
||||
, append
|
||||
, unsafeAppend
|
||||
, appendRel
|
||||
, dropTrailingSeparators
|
||||
, isRelativeRaw
|
||||
, isAbsoluteRaw
|
||||
)
|
||||
where
|
||||
|
||||
#include "assert.hs"
|
||||
|
||||
import Control.Exception (Exception)
|
||||
import Control.Monad.Catch (MonadThrow(..))
|
||||
import Data.Char (chr)
|
||||
import Data.Char (ord, isAlpha)
|
||||
import Data.Functor.Identity (Identity(..))
|
||||
import Data.Word (Word8)
|
||||
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
|
||||
import Data.Word (Word16)
|
||||
#endif
|
||||
import GHC.Base (unsafeChr)
|
||||
import Language.Haskell.TH (Q, Exp)
|
||||
import Language.Haskell.TH.Quote (QuasiQuoter)
|
||||
import Streamly.Internal.Data.Array (Array)
|
||||
import Streamly.Internal.Data.Array (Array(..))
|
||||
import Streamly.Internal.Data.MutByteArray (Unbox)
|
||||
import Streamly.Internal.Data.Stream (Stream)
|
||||
import System.IO.Unsafe (unsafePerformIO)
|
||||
|
||||
@ -114,18 +134,19 @@ import qualified Streamly.Internal.Unicode.Stream as Unicode
|
||||
|
||||
import Prelude hiding (abs)
|
||||
|
||||
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
|
||||
#define WORD_TYPE Word16
|
||||
#define SEPARATOR 92
|
||||
#else
|
||||
#define WORD_TYPE Word8
|
||||
#define SEPARATOR 47
|
||||
#endif
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Exceptions
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
data OS = Windows | Posix
|
||||
|
||||
currentOS :: OS
|
||||
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
|
||||
currentOS = Windows
|
||||
#else
|
||||
currentOS = Posix
|
||||
#endif
|
||||
|
||||
-- | Exceptions thrown by path operations.
|
||||
data PathException =
|
||||
InvalidPath String
|
||||
@ -137,6 +158,10 @@ data PathException =
|
||||
|
||||
instance Exception PathException
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Types
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
-- XXX Path must not contain null char on Posix. System calls consider the path
|
||||
-- as null terminated.
|
||||
-- XXX Maintain the Array with null termination because Unix system calls
|
||||
@ -144,18 +169,31 @@ instance Exception PathException
|
||||
-- as paths. Implementation of path append will have to handle the null
|
||||
-- termination. Or we can choose to always copy the array when using it in
|
||||
-- system calls.
|
||||
|
||||
-- XXX The eq instance needs to make sure that the paths are equivalent. If we
|
||||
-- normalize the paths we can do a byte comparison. However, on windows paths
|
||||
-- are case insensitive but the case is preserved, therefore, we cannot
|
||||
-- normalize and need to do case insensitive comparison.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Types
|
||||
------------------------------------------------------------------------------
|
||||
-- ".." is not processed, is kept as is.
|
||||
-- On Windows several characters other than null are not allowed but we do not
|
||||
-- validate that yet when parsing a path.
|
||||
|
||||
-- | A type representing file system paths for directories or files.
|
||||
newtype Path = Path (Array WORD_TYPE) -- deriving Eq
|
||||
--
|
||||
-- A Path is validated before construction unless unsafe constructors are used
|
||||
-- to create it. Rules and invariants maintained by the safe construction
|
||||
-- methods are as follows:
|
||||
--
|
||||
-- * Does not contain a null character.
|
||||
-- * Does not have a trailing separator except in the root path.
|
||||
-- * Does not have a trailing @.@ component.
|
||||
-- * Does not have consecutive separators except in UNC prefixes on Windows.
|
||||
-- * Does not contain @/./@ path components (can be replaced with @/@) except
|
||||
-- in a UNC prefix on windows.
|
||||
--
|
||||
-- Note that in some cases the file system may perform unicode normalization on
|
||||
-- paths (e.g. Apple HFS), it may cause surprising results as the path used by
|
||||
-- the user may not have the same bytes as later returned by the file system.
|
||||
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
|
||||
newtype Path = Path (Array Word16)
|
||||
#else
|
||||
newtype Path = Path (Array Word8)
|
||||
#endif
|
||||
|
||||
-- Show instance prints raw bytes without any decoding for rountdtripping.
|
||||
-- Should we print this as a string instead, may be useful for ascii chars but
|
||||
@ -166,6 +204,11 @@ instance Show Path where
|
||||
show (Path x) = show x
|
||||
-}
|
||||
|
||||
-- XXX The Eq instance needs to make sure that the paths are equivalent. If we
|
||||
-- normalize the paths we can do a byte comparison. However, on windows paths
|
||||
-- are case insensitive but the case is preserved, therefore, we cannot
|
||||
-- normalize and need to do case insensitive comparison.
|
||||
|
||||
-- XXX Do we need a type for file or dir Name as names cannot have the
|
||||
-- separator char and there may be other restrictions on names? For example,
|
||||
-- length restriction. A file name cannot be "." or "..". We can use the types
|
||||
@ -191,10 +234,13 @@ newtype Rel a = Rel a
|
||||
|
||||
-- | A member of 'IsPath' knows how to convert to and from the 'Path' type.
|
||||
class IsPath a where
|
||||
-- | Like 'fromPath' but does not check the properties of 'Path'. Provides
|
||||
-- performance and simplicity when we know that the properties of the path
|
||||
-- are already verified, for example, when we get the path from the file
|
||||
-- system or the OS APIs.
|
||||
-- | Like 'fromPath' but does not check the properties of 'Path'. The user
|
||||
-- is responsible to maintain the invariants mentioned in the definition of
|
||||
-- 'Path' type otherwise surprising behavior may result.
|
||||
--
|
||||
-- Provides performance and simplicity when we know that the properties of
|
||||
-- the path are already verified, for example, when we get the path from
|
||||
-- the file system or the OS APIs.
|
||||
unsafeFromPath :: Path -> a
|
||||
|
||||
-- | Convert a raw 'Path' to other forms of well-typed paths. It may fail
|
||||
@ -270,11 +316,12 @@ adapt p = fromPath $ toPath p
|
||||
-- been using it consistently in streamly. We use "bytes" for a stream of
|
||||
-- bytes.
|
||||
|
||||
-- | /Unsafe/: On Posix, a path cannot contain null characters. On Windows, the
|
||||
-- array passed must be a multiple of 2 bytes as the underlying representation
|
||||
-- uses 'Word16'.
|
||||
-- | /Unsafe/: The user is responsible to maintain the invariants mentioned in
|
||||
-- the definition of the 'Path' type. On Windows, the array passed must be a
|
||||
-- multiple of 2 bytes as the underlying representation uses 'Word16'.
|
||||
{-# INLINE unsafeFromChunk #-}
|
||||
unsafeFromChunk :: Array Word8 -> Path
|
||||
-- XXX add asserts to check safety
|
||||
unsafeFromChunk arr = Path (Array.castUnsafe arr)
|
||||
|
||||
-- | On Posix it may fail if the byte array contains null characters. On
|
||||
@ -300,6 +347,9 @@ toChunk (Path arr) = Array.asBytes arr
|
||||
-- | Encode a Unicode char stream to 'Path' using strict UTF-8 encoding on
|
||||
-- Posix. On Posix it may fail if the stream contains null characters.
|
||||
-- TBD: Use UTF16LE on Windows.
|
||||
--
|
||||
-- Unicode normalization is not done. If normalization is needed the user can
|
||||
-- normalize it and use the fromChunk API.
|
||||
fromChars :: MonadThrow m => Stream Identity Char -> m Path
|
||||
fromChars s =
|
||||
let n = runIdentity $ Stream.fold Fold.length s
|
||||
@ -454,8 +504,29 @@ mkRelFile = undefined
|
||||
-- Operations
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
separatorWord :: WORD_TYPE
|
||||
separatorWord = SEPARATOR
|
||||
posixSeparator :: Char
|
||||
posixSeparator = '/'
|
||||
|
||||
windowsSeparator :: Char
|
||||
windowsSeparator = '\\'
|
||||
|
||||
-- XXX We can use Enum type class to include the Char type as well so that the
|
||||
-- functions can work on Array Word8/Word16/Char but that may be slow.
|
||||
|
||||
-- | Unsafe, may tructate to shorter word types, can only be used safely for
|
||||
-- characters that fit in the given word size.
|
||||
charToWord :: Integral a => Char -> a
|
||||
charToWord c =
|
||||
let n = ord c
|
||||
in assert (n <= 255) (fromIntegral n)
|
||||
|
||||
-- | Unsafe, should be a valid character.
|
||||
wordToChar :: Integral a => a -> Char
|
||||
wordToChar = unsafeChr . fromIntegral
|
||||
|
||||
-- | Index a word in an array and convert it to Char.
|
||||
unsafeIndexChar :: (Unbox a, Integral a) => Int -> Array a -> Char
|
||||
unsafeIndexChar i a = wordToChar (Array.getIndexUnsafe i a)
|
||||
|
||||
-- Portable definition for exporting.
|
||||
|
||||
@ -463,60 +534,180 @@ separatorWord = SEPARATOR
|
||||
-- Windows supports @/@ too as a separator. Please use 'isSeparator' for
|
||||
-- testing if a char is a separator char.
|
||||
_primarySeparator :: Char
|
||||
_primarySeparator = chr (SEPARATOR)
|
||||
_primarySeparator = posixSeparator
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Path parsing utilities
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
-- | On Posix only @/@ is a path separator but in windows it could be either
|
||||
-- @/@ or @\\@.
|
||||
_isSeparator :: Char -> Bool
|
||||
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
|
||||
_isSeparator c = (c == '/') || (c == '\\')
|
||||
#else
|
||||
_isSeparator = (== '/')
|
||||
#endif
|
||||
{-# INLINE isSeparator #-}
|
||||
isSeparator :: OS -> Char -> Bool
|
||||
isSeparator Windows c = (c == windowsSeparator) || (c == posixSeparator)
|
||||
isSeparator Posix c = (c == posixSeparator)
|
||||
|
||||
-- If we append an absolute path it may fail with an error if the 'Path'
|
||||
-- implementation stores absolute path information (a leading separator char).
|
||||
-- However, the implementation may choose to store the path as a list of
|
||||
-- components in which case we cannot distinguish an absolute path from
|
||||
-- relative.
|
||||
{-# INLINE isSeparatorWord #-}
|
||||
isSeparatorWord :: Integral a => OS -> a -> Bool
|
||||
isSeparatorWord os = isSeparator os . wordToChar
|
||||
|
||||
countTrailingBy :: Unbox a => (a -> Bool) -> Array a -> Int
|
||||
countTrailingBy p arr =
|
||||
runIdentity
|
||||
$ Stream.fold Fold.length
|
||||
$ Stream.takeWhile p
|
||||
$ Array.readRev arr
|
||||
|
||||
-- | If the path is @//@ the result is @/@. If it is @a//@ then the result is
|
||||
-- @a@.
|
||||
dropTrailingBy :: Unbox a => (a -> Bool) -> Array a -> Array a
|
||||
dropTrailingBy p arr@(Array barr start end) =
|
||||
if end - start > 0
|
||||
then
|
||||
let n = countTrailingBy p arr
|
||||
in Array barr start (max 1 (end - n))
|
||||
else arr
|
||||
|
||||
-- | If the path is @//@ the result is @/@. If it is @a//@ then the result is
|
||||
-- @a@.
|
||||
{-# INLINE dropTrailingSeparators #-}
|
||||
dropTrailingSeparators :: OS -> Path -> Path
|
||||
dropTrailingSeparators os (Path arr) =
|
||||
Path (dropTrailingBy (isSeparator os . wordToChar) arr)
|
||||
|
||||
-- | @C:...@
|
||||
hasDrive :: (Unbox a, Integral a) => Array a -> Bool
|
||||
hasDrive a =
|
||||
if Array.byteLength a < 2
|
||||
then False
|
||||
-- Check colon first for quicker return
|
||||
else if (unsafeIndexChar 1 a /= ':')
|
||||
then False
|
||||
-- XXX If we found a colon anyway this cannot be a valid path unless it has
|
||||
-- a drive prefix. colon is not a valid path character.
|
||||
-- XXX check isAlpha perf
|
||||
else if not (isAlpha (unsafeIndexChar 0 a))
|
||||
then False -- XXX if we are here it is not a valid path
|
||||
else True
|
||||
|
||||
-- | On windows, the path starts with a separator.
|
||||
isAbsoluteInDrive :: (Unbox a, Integral a) => Array a -> Bool
|
||||
isAbsoluteInDrive a =
|
||||
-- Assuming the path is not empty.
|
||||
isSeparator Windows (wordToChar (Array.getIndexUnsafe 0 a))
|
||||
|
||||
-- | @C:\...@
|
||||
isAbsoluteDrive :: (Unbox a, Integral a) => Array a -> Bool
|
||||
isAbsoluteDrive a =
|
||||
if Array.byteLength a < 3
|
||||
then False
|
||||
-- Check colon first for quicker return
|
||||
else if (unsafeIndexChar 1 a /= ':')
|
||||
then False
|
||||
else if not (isSeparator Windows (unsafeIndexChar 2 a))
|
||||
then False
|
||||
-- XXX If we found a colon anyway this cannot be a valid path unless it has
|
||||
-- a drive prefix. colon is not a valid path character.
|
||||
-- XXX check isAlpha perf
|
||||
else if not (isAlpha (unsafeIndexChar 0 a))
|
||||
then False -- XXX if we are here it is not a valid path
|
||||
else True
|
||||
|
||||
-- | @\\\\...@
|
||||
isAbsoluteUNC :: (Unbox a, Integral a) => Array a -> Bool
|
||||
isAbsoluteUNC a =
|
||||
if Array.byteLength a < 2
|
||||
then False
|
||||
else if (unsafeIndexChar 0 a /= '\\')
|
||||
then False
|
||||
else if (unsafeIndexChar 1 a /= '\\')
|
||||
then False
|
||||
else True
|
||||
|
||||
-- | On Posix, a path starting with a separator is an absolute path.
|
||||
--
|
||||
-- On Windows:
|
||||
-- * @C:\\@ local absolute
|
||||
-- * @C:@ local relative
|
||||
-- * @\\@ local relative to current drive root
|
||||
-- * @\\\\@ UNC network path
|
||||
-- * @\\\\?\\C:\\@ Long UNC local path
|
||||
-- * @\\\\?\\UNC\\@ Long UNC server path
|
||||
-- * @\\\\.\\@ DOS local device namespace
|
||||
-- * @\\\\??\\@ DOS global namespace
|
||||
isAbsoluteRaw :: (Unbox a, Integral a) => OS -> Array a -> Bool
|
||||
isAbsoluteRaw Posix a =
|
||||
-- Assuming path is not empty.
|
||||
isSeparator Posix (wordToChar (Array.getIndexUnsafe 0 a))
|
||||
isAbsoluteRaw Windows a = isAbsoluteDrive a || isAbsoluteUNC a
|
||||
|
||||
isRelativeRaw :: (Unbox a, Integral a) => OS -> Array a -> Bool
|
||||
isRelativeRaw os = not . isAbsoluteRaw os
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
-- Operations of Path
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
-- XXX This can be generalized to an Array intersperse operation
|
||||
|
||||
-- | Like 'appendRel' but for the less restrictive 'Path' type, it always
|
||||
-- creates a syntactically valid 'Path' type but it may not be semantically
|
||||
-- valid because we may append an absolute path or we may append a path to a
|
||||
-- file path. The onus lies on the user to ensure that the first path is not a
|
||||
-- file and the second path is not absolute.
|
||||
{-# INLINE doAppend #-}
|
||||
doAppend :: (Unbox a, Integral a) => OS -> Array a -> Array a -> Array a
|
||||
doAppend os a b = unsafePerformIO $ do
|
||||
let lenA = Array.byteLength a
|
||||
lenB = Array.byteLength b
|
||||
assertM (lenA /= 0 && lenB /= 0)
|
||||
assertM (countTrailingBy (isSeparatorWord os) a == 0)
|
||||
let len = lenA + 1 + lenB
|
||||
arr <- MutArray.new len
|
||||
arr1 <- MutArray.spliceUnsafe arr (Array.unsafeThaw a)
|
||||
arr2 <- MutArray.snocUnsafe arr1 (charToWord posixSeparator)
|
||||
arr3 <- MutArray.spliceUnsafe arr2 (Array.unsafeThaw b)
|
||||
return (Array.unsafeFreeze arr3)
|
||||
|
||||
{-# INLINE withAppendCheck #-}
|
||||
withAppendCheck :: OS -> Path -> a -> a
|
||||
withAppendCheck Posix p2@(Path arr) f =
|
||||
if isAbsoluteRaw Posix arr
|
||||
then error $ "append: cannot append absolute path " ++ toString p2
|
||||
else f
|
||||
withAppendCheck Windows p2@(Path arr) f =
|
||||
if isAbsoluteInDrive arr
|
||||
then error $ "append: cannot append drive absolute path " ++ toString p2
|
||||
else if hasDrive arr
|
||||
then error $ "append: cannot append path with drive " ++ toString p2
|
||||
else if isAbsoluteUNC arr
|
||||
then error $ "append: cannot append absolute UNC path " ++ toString p2
|
||||
else f
|
||||
|
||||
-- | Does not check if any of the path is empty or if the second path is
|
||||
-- absolute.
|
||||
{-# INLINE unsafeAppendOS #-}
|
||||
unsafeAppendOS :: OS -> Path -> Path -> Path
|
||||
unsafeAppendOS os (Path a) p2@(Path b) =
|
||||
assert (withAppendCheck os p2 True) (Path $ doAppend os a b)
|
||||
|
||||
{-# INLINE unsafeAppend #-}
|
||||
unsafeAppend :: Path -> Path -> Path
|
||||
unsafeAppend = unsafeAppendOS currentOS
|
||||
|
||||
{-# INLINE appendOS #-}
|
||||
appendOS :: OS -> Path -> Path -> Path
|
||||
appendOS os (Path a) p2@(Path b) =
|
||||
withAppendCheck os p2 (Path $ doAppend os a b)
|
||||
|
||||
-- | Append a 'Path' to another. Fails if the second path is absolute.
|
||||
--
|
||||
-- Also see 'appendRel'.
|
||||
append :: Path -> Path -> Path
|
||||
append (Path a) (Path b) =
|
||||
let len = Array.byteLength a + 1 + Array.byteLength b
|
||||
-- XXX Check the leading separator or drive identifier. However,
|
||||
-- checking the drive letter may add an additional overhead (can it be
|
||||
-- arbitrarily long?), if it is significant we may want to have a
|
||||
-- separate combinePathChecked API for that.
|
||||
--
|
||||
-- Also, do not add a separator char if the first path has a trailing
|
||||
-- separator.
|
||||
newArr = unsafePerformIO $ do
|
||||
arr <- MutArray.new len
|
||||
arr1 <- MutArray.spliceUnsafe arr (Array.unsafeThaw a)
|
||||
arr2 <- MutArray.snocUnsafe arr1 separatorWord
|
||||
arr3 <- MutArray.spliceUnsafe arr2 (Array.unsafeThaw b)
|
||||
return (Array.unsafeFreeze arr3)
|
||||
in Path newArr
|
||||
append = appendOS currentOS
|
||||
|
||||
-- The only safety we need for paths is: (1) The first path can only be a Dir
|
||||
-- type path, and (2) second path can only be a Rel path.
|
||||
-- Can this be coerced to create unsafe versions?
|
||||
|
||||
-- | Extend a directory path by appending a relative path to it. This is the
|
||||
-- equivalent to the @</>@ operator from the @filepath@ package.
|
||||
-- | Append a 'Rel' 'Path' to a 'Dir' 'Path'. Never fails.
|
||||
--
|
||||
-- Also see 'append'.
|
||||
{-# INLINE appendRel #-}
|
||||
appendRel :: (IsPath (a (Dir Path)), IsPath b, IsPath (a b)) =>
|
||||
(a (Dir Path)) -> Rel b -> a b
|
||||
appendRel a (Rel b) =
|
||||
unsafeFromPath $ append (toPath a) (toPath b)
|
||||
appendRel a (Rel b) = unsafeFromPath $ unsafeAppend (toPath a) (toPath b)
|
||||
|
Loading…
Reference in New Issue
Block a user