Introduce database of US States and Territories

Co-authored-by: Eric Demko <edemko@layer3com.com>
This commit is contained in:
Zankoku Okuno 2021-09-03 11:37:45 -04:00 committed by GitHub
parent e03df9d452
commit 8f6df09b69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 513 additions and 41 deletions

View File

@ -1 +1,6 @@
packages: ./country, ./country-code-generation
packages: ./country, ./code-generation
source-repository-package
type: git
location: https://github.com/andrewthad/contiguous
tag: f7e10b86e22e1ad173ab54cc66bd6c2bf3322c11

View File

@ -0,0 +1,87 @@
{-# LANGUAGE LambdaCase #-}
module Main where
import Control.Monad (forM,forM_)
import Data.List.Split (splitOn)
import System.Environment (getArgs)
import System.Exit (exitFailure)
import System.IO (stderr,Handle,withFile,hPutStrLn,IOMode(..))
main :: IO ()
main = do
(inFile,outFile) <- getArgs >>= \case
[a, b] -> pure (a, b)
_ -> die "usage: two required filepath arguments (in, out)"
rows <- parse inFile
withFile outFile WriteMode $ \fp -> do
render fp rows
parse :: FilePath -> IO [(String, String, String)]
parse file = do
content <- readFile file
rawRows <- case lines content of
[] -> die "no file contents"
(header:body)
| header == expectedHeader -> pure body
| otherwise -> die $ "unrecognized header: " ++ show header
forM (splitOn "," <$> rawRows) $ \case
[x,y,z] -> pure (x,y,z)
_ -> die "bad data row"
render :: Handle -> [(String, String, String)] -> IO ()
render out xs = do
put "{-# LANGUAGE OverloadedStrings #-}"
put ""
put "-- This module is autogenerated. Do not edit it by hand.\n"
put "module Country.Unexposed.Subdivision"
put " ( codeArray"
put " , nameArray"
put " , categoryArray"
put " , actualNumberOfSubdivisions"
put " ) where"
put ""
put "import Data.Primitive.Contiguous (SmallArray)"
put "import Data.Text (Text)"
put ""
put "import qualified Data.Primitive.Contiguous as Arr"
put ""
put "codeArray :: SmallArray Text"
put $ "codeArray = Arr.fromListN " ++ show len
putCode '[' topX
forM_ restXs $ putCode ','
put " ]"
put "{-# NOINLINE codeArray #-}"
put ""
put "nameArray :: SmallArray Text"
put $ "nameArray = Arr.fromListN " ++ show len
putName '[' topX
forM_ restXs $ putName ','
put " ]"
put "{-# NOINLINE nameArray #-}"
put ""
put "categoryArray :: SmallArray Text"
put $ "categoryArray = Arr.fromListN " ++ show len
putCategory '[' topX
forM_ restXs $ putCategory ','
put " ]"
put "{-# NOINLINE categoryArray #-}"
put "actualNumberOfSubdivisions :: Int"
put $ "actualNumberOfSubdivisions = " ++ show len
where
put = hPutStrLn out
len = length xs
topX = head xs
restXs = tail xs
putThing c it = put $ " " ++ c:" " ++ show it
putCode c (code,_,_) = putThing c code
putName c (_,name,_) = putThing c name
putCategory c (_,_,category) = putThing c category
expectedHeader :: String
expectedHeader = "code,name,category"
die :: String -> IO a
die msg = do
hPutStrLn stderr msg
exitFailure

View File

@ -1,9 +1,10 @@
cabal-version: 3.0
name: country-code-generation
version: 0.1.0.0
-- synopsis:
-- description:
homepage: https://github.com/andrewthad/country#readme
license: BSD3
license: BSD-3-Clause
license-file: LICENSE
author: Andrew Martin
maintainer: andrew.thaddeus@gmail.com
@ -11,23 +12,30 @@ copyright: 2017 Andrew Martin
category: Web
build-type: Simple
extra-source-files: README.md, ../aliases.txt, ../countries.csv
cabal-version: >=1.10
executable country-code-generation
hs-source-dirs: app
hs-source-dirs: app-countries
main-is: Main.hs
build-depends:
, base
, bytestring >= 0.10
, colonnade >= 1.2.0.1
, containers
, disjoint-containers >= 0.2.3
, primitive
, siphon >= 0.8.1
, streaming
, streaming-bytestring
, text >= 1.2
, transformers
default-language: Haskell2010
executable subdivision-code-generation
hs-source-dirs: app-subdivisions
main-is: Main.hs
build-depends:
base
, streaming
, streaming-bytestring
, bytestring >= 0.10
, text >= 1.2
, siphon >= 0.8.1
, colonnade >= 1.2.0.1
, disjoint-containers >= 0.2.3
, containers
, transformers
, primitive
, split >=0.2
default-language: Haskell2010
source-repository head

View File

@ -44,17 +44,19 @@ tested-with: GHC==8.10.1, GHC==8.8.3, GHC==8.6.5, GHC==8.4.4
library
hs-source-dirs: src
exposed-modules:
Country
Country.Identifier
Country.Unsafe
Continent
Continent.Unsafe
Country
Country.Identifier
Country.Subdivision
Country.Unsafe
other-modules:
Country.Unexposed.Alias
Country.Unexposed.AlphaTwoPtr
Country.Unexposed.Continents
Country.Unexposed.Encode.English
Country.Unexposed.Names
Country.Unexposed.Subdivision
Country.Unexposed.Trie
Country.Unexposed.TrieByte
Country.Unexposed.Util
@ -66,12 +68,14 @@ library
, bytehash >=0.1 && <0.2
, byteslice >=0.2.3 && <0.3
, bytestring >= 0.10 && <0.12
, contiguous >=0.5.2
, deepseq >= 1.3.0.2 && <1.5
, entropy >=0.4.1.5 && <0.5
, hashable >=1.2 && <1.4
, primitive >= 0.6.4 && <0.8
, scientific >=0.3 && <0.4
, text >= 1.2 && <1.3
, text-short >=0.1.3
, unordered-containers >=0.2 && <0.3
default-language: Haskell2010
ghc-options: -Wall -O2

View File

@ -0,0 +1,88 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UnboxedTuples #-}
module Country.Subdivision
( Subdivision
-- , country -- TODO
-- * Accessors
, encodeAlpha
, encodeEnglish
, category
-- * Decoding
, decodeAlpha
, decodeName
) where
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import Data.Primitive.Contiguous (index)
import Data.Primitive.Types (Prim)
import Data.Text (Text)
import Data.Word (Word16)
import Foreign.Storable (Storable)
import qualified Country.Unexposed.Subdivision as Arrays
import qualified Data.HashMap.Strict as HM
import qualified Data.Primitive.Contiguous as Arr
import qualified Data.Text as T
newtype Subdivision = Subdivision Word16
deriving (Eq,Ord,Prim,Hashable,Storable)
instance Show Subdivision where
show = show . encodeAlpha
instance Enum Subdivision where
fromEnum (Subdivision w) = fromIntegral w
toEnum number = if number >= 0 && number < Arrays.actualNumberOfSubdivisions
then Subdivision (fromIntegral number)
else error ("toEnum: cannot convert " ++ show number ++ " to Subdivision")
instance Bounded Subdivision where
minBound = Subdivision 0
maxBound = Subdivision (fromIntegral $ Arrays.actualNumberOfSubdivisions - 1)
-- country :: Subdivision -> Country
-- country (Subdivision i) = index Arrays.countryArray i
encodeAlpha :: Subdivision -> Text
encodeAlpha (Subdivision i) = index Arrays.codeArray (fromIntegral @Word16 @Int i)
encodeEnglish :: Subdivision -> Text
encodeEnglish (Subdivision i) = index Arrays.nameArray (fromIntegral @Word16 @Int i)
category :: Subdivision -> Text
category (Subdivision i) = index Arrays.categoryArray (fromIntegral @Word16 @Int i)
-- | Decode a 'Subdivision' using its ISO subdivision code.
decodeAlpha :: Text -> Maybe Subdivision
decodeAlpha = flip HM.lookup alphaHashMap
alphaHashMap :: HashMap Text Subdivision
alphaHashMap = Arr.ifoldl'
(\hm i x ->
HM.insert x (Subdivision $ fromIntegral i)
-- $ HM.insert (T.pack [toLower c1, toLower c2, toLower c3]) (Country countryNum)
$ hm
)
HM.empty Arrays.codeArray
{-# NOINLINE alphaHashMap #-}
-- | Decode a 'Subdivision' using its ISO subdivision English name
-- It's not terribly forgiving, accepting only the official(?) names I found on wiki.
decodeName :: Text -> Maybe Subdivision
decodeName = flip HM.lookup englishHashMap
englishHashMap :: HashMap Text Subdivision
englishHashMap = Arr.ifoldl'
(\hm i x ->
let place = Subdivision (fromIntegral i)
in HM.insert x place
$ HM.insert (T.toLower $ x) place
$ hm
)
HM.empty Arrays.nameArray
{-# NOINLINE englishHashMap #-}

View File

@ -24,42 +24,43 @@ module Country.Unexposed.Names
, Country(..)
) where
import Control.Monad
import Control.Monad.ST
import Data.Word
import Control.DeepSeq (NFData)
import Data.Word (Word16)
import Data.Hashable (Hashable)
import Data.Primitive.Types (Prim)
import Data.HashMap.Strict (HashMap)
import Control.Exception (bracket)
import Country.Unexposed.Alias (aliases)
import Country.Unexposed.Encode.English (countryNameQuads)
import Data.Bytes.Types (Bytes(Bytes))
import Data.ByteString (ByteString)
import Data.Primitive (indexArray)
import Data.Char (toLower,isAlpha,toUpper)
import Data.Data
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import Data.Primitive (Array,indexArray,newArray,unsafeFreezeArray,writeArray)
import Data.Primitive (sizeOf)
import Data.Primitive (writeByteArray,indexByteArray,unsafeFreezeByteArray,newByteArray)
import Data.Primitive.Array (Array(..))
import Data.Primitive.ByteArray (ByteArray(..))
import Control.Monad
import Data.Primitive.Types (Prim)
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8)
import Country.Unexposed.Alias (aliases)
import Data.Word (Word16)
import Foreign.Storable (Storable)
import GHC.Generics (Generic)
import System.Entropy (openHandle,closeHandle)
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Text as T
import qualified Data.Aeson as AE
import qualified Data.Aeson.Types as AET
import qualified Data.HashMap.Strict as HM
import qualified Data.List as L
import Control.Monad.ST
import Foreign.Storable (Storable)
import Data.Text (Text)
import Data.Word
import Data.Char (toLower,isAlpha,toUpper)
import Country.Unexposed.Encode.English (countryNameQuads)
import Data.Primitive (Array,indexArray,newArray,unsafeFreezeArray,writeArray,
writeByteArray,indexByteArray,unsafeFreezeByteArray,newByteArray,sizeOf)
import qualified Data.Text as T
import qualified Data.Scientific as SCI
import qualified GHC.Exts as Exts
import qualified System.IO as IO
import GHC.Generics (Generic)
import Data.Data
import Data.Bytes.Types (Bytes(Bytes))
import Control.Exception (bracket)
import System.IO.Unsafe (unsafePerformIO)
import System.Entropy (openHandle,closeHandle)
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.HashMap.Word as BytesHashMap
import qualified Data.Text.Internal as Text
@ -252,4 +253,3 @@ alphaThreeHashMap = L.foldl'
)
HM.empty countryNameQuads
{-# NOINLINE alphaThreeHashMap #-}

View File

@ -0,0 +1,203 @@
{-# LANGUAGE OverloadedStrings #-}
-- This module is autogenerated. Do not edit it by hand.
module Country.Unexposed.Subdivision
( codeArray
, nameArray
, categoryArray
, actualNumberOfSubdivisions
) where
import Data.Primitive.Contiguous (SmallArray)
import Data.Text (Text)
import qualified Data.Primitive.Contiguous as Arr
codeArray :: SmallArray Text
codeArray = Arr.fromListN 57
[ "US-AL"
, "US-AK"
, "US-AZ"
, "US-AR"
, "US-CA"
, "US-CO"
, "US-CT"
, "US-DE"
, "US-FL"
, "US-GA"
, "US-HI"
, "US-ID"
, "US-IL"
, "US-IN"
, "US-IA"
, "US-KS"
, "US-KY"
, "US-LA"
, "US-ME"
, "US-MD"
, "US-MA"
, "US-MI"
, "US-MN"
, "US-MS"
, "US-MO"
, "US-MT"
, "US-NE"
, "US-NV"
, "US-NH"
, "US-NJ"
, "US-NM"
, "US-NY"
, "US-NC"
, "US-ND"
, "US-OH"
, "US-OK"
, "US-OR"
, "US-PA"
, "US-RI"
, "US-SC"
, "US-SD"
, "US-TN"
, "US-TX"
, "US-UT"
, "US-VT"
, "US-VA"
, "US-WA"
, "US-WV"
, "US-WI"
, "US-WY"
, "US-DC"
, "US-AS"
, "US-GU"
, "US-MP"
, "US-PR"
, "US-UM"
, "US-VI"
]
{-# NOINLINE codeArray #-}
nameArray :: SmallArray Text
nameArray = Arr.fromListN 57
[ "Alabama"
, "Alaska"
, "Arizona"
, "Arkansas"
, "California"
, "Colorado"
, "Connecticut"
, "Delaware"
, "Florida"
, "Georgia"
, "Hawaii"
, "Idaho"
, "Illinois"
, "Indiana"
, "Iowa"
, "Kansas"
, "Kentucky"
, "Louisiana"
, "Maine"
, "Maryland"
, "Massachusetts"
, "Michigan"
, "Minnesota"
, "Mississippi"
, "Missouri"
, "Montana"
, "Nebraska"
, "Nevada"
, "New Hampshire"
, "New Jersey"
, "New Mexico"
, "New York"
, "North Carolina"
, "North Dakota"
, "Ohio"
, "Oklahoma"
, "Oregon"
, "Pennsylvania"
, "Rhode Island"
, "South Carolina"
, "South Dakota"
, "Tennessee"
, "Texas"
, "Utah"
, "Vermont"
, "Virginia"
, "Washington"
, "West Virginia"
, "Wisconsin"
, "Wyoming"
, "District of Columbia"
, "American Samoa"
, "Guam"
, "Northern Mariana Islands"
, "Puerto Rico"
, "United States Minor Outlying Islands"
, "U.S. Virgin Islands"
]
{-# NOINLINE nameArray #-}
categoryArray :: SmallArray Text
categoryArray = Arr.fromListN 57
[ "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "state"
, "district"
, "outlying area"
, "outlying area"
, "outlying area"
, "outlying area"
, "outlying area"
, "outlying area"
]
{-# NOINLINE categoryArray #-}
actualNumberOfSubdivisions :: Int
actualNumberOfSubdivisions = 57

View File

@ -3,6 +3,7 @@
import Continent (Continent)
import Country (Country)
import Country.Subdivision (Subdivision)
import Data.Char (ord)
import Data.Maybe (fromJust)
import Data.Primitive.Ptr (indexOffPtr)
@ -13,6 +14,7 @@ import Test.Tasty.QuickCheck (testProperty,(===))
import qualified Continent
import qualified Country
import qualified Country.Subdivision as Subdivision
import qualified Data.Text as Text
import qualified Test.QuickCheck as QC
import qualified Test.QuickCheck.Classes as QCC
@ -70,6 +72,20 @@ main = defaultMain $ testGroup "Country" $
, testProperty "country-continent-smoke-china" $
Continent.continent (fromJust $ Country.decodeAlphaTwo "CN") === Continent.Asia
]
, testGroup "Subdivision" $
( map lawsToTest
$ map ($ Proxy @Subdivision)
$ [ QCC.boundedEnumLaws
, QCC.eqLaws
, QCC.ordLaws
, QCC.primLaws
, QCC.showLaws
, QCC.storableLaws
]
) ++
[ testProperty "encode-decode-alpha" $ \x ->
Just x === Subdivision.decodeAlpha (Subdivision.encodeAlpha x)
]
]
c2w :: Char -> Word8
@ -81,8 +97,11 @@ proxy = Proxy
lawsToTest :: QCC.Laws -> TestTree
lawsToTest (QCC.Laws name pairs) = testGroup name (map (uncurry TQC.testProperty) pairs)
instance QC.Arbitrary Continent where
arbitrary = QC.arbitraryBoundedEnum
instance QC.Arbitrary Country where
arbitrary = QC.arbitraryBoundedEnum
instance QC.Arbitrary Continent where
instance QC.Arbitrary Subdivision where
arbitrary = QC.arbitraryBoundedEnum

58
iso-3166-2-us.csv Normal file
View File

@ -0,0 +1,58 @@
code,name,category
US-AL,Alabama,state
US-AK,Alaska,state
US-AZ,Arizona,state
US-AR,Arkansas,state
US-CA,California,state
US-CO,Colorado,state
US-CT,Connecticut,state
US-DE,Delaware,state
US-FL,Florida,state
US-GA,Georgia,state
US-HI,Hawaii,state
US-ID,Idaho,state
US-IL,Illinois,state
US-IN,Indiana,state
US-IA,Iowa,state
US-KS,Kansas,state
US-KY,Kentucky,state
US-LA,Louisiana,state
US-ME,Maine,state
US-MD,Maryland,state
US-MA,Massachusetts,state
US-MI,Michigan,state
US-MN,Minnesota,state
US-MS,Mississippi,state
US-MO,Missouri,state
US-MT,Montana,state
US-NE,Nebraska,state
US-NV,Nevada,state
US-NH,New Hampshire,state
US-NJ,New Jersey,state
US-NM,New Mexico,state
US-NY,New York,state
US-NC,North Carolina,state
US-ND,North Dakota,state
US-OH,Ohio,state
US-OK,Oklahoma,state
US-OR,Oregon,state
US-PA,Pennsylvania,state
US-RI,Rhode Island,state
US-SC,South Carolina,state
US-SD,South Dakota,state
US-TN,Tennessee,state
US-TX,Texas,state
US-UT,Utah,state
US-VT,Vermont,state
US-VA,Virginia,state
US-WA,Washington,state
US-WV,West Virginia,state
US-WI,Wisconsin,state
US-WY,Wyoming,state
US-DC,District of Columbia,district
US-AS,American Samoa,outlying area
US-GU,Guam,outlying area
US-MP,Northern Mariana Islands,outlying area
US-PR,Puerto Rico,outlying area
US-UM,United States Minor Outlying Islands,outlying area
US-VI,U.S. Virgin Islands,outlying area
1 code name category
2 US-AL Alabama state
3 US-AK Alaska state
4 US-AZ Arizona state
5 US-AR Arkansas state
6 US-CA California state
7 US-CO Colorado state
8 US-CT Connecticut state
9 US-DE Delaware state
10 US-FL Florida state
11 US-GA Georgia state
12 US-HI Hawaii state
13 US-ID Idaho state
14 US-IL Illinois state
15 US-IN Indiana state
16 US-IA Iowa state
17 US-KS Kansas state
18 US-KY Kentucky state
19 US-LA Louisiana state
20 US-ME Maine state
21 US-MD Maryland state
22 US-MA Massachusetts state
23 US-MI Michigan state
24 US-MN Minnesota state
25 US-MS Mississippi state
26 US-MO Missouri state
27 US-MT Montana state
28 US-NE Nebraska state
29 US-NV Nevada state
30 US-NH New Hampshire state
31 US-NJ New Jersey state
32 US-NM New Mexico state
33 US-NY New York state
34 US-NC North Carolina state
35 US-ND North Dakota state
36 US-OH Ohio state
37 US-OK Oklahoma state
38 US-OR Oregon state
39 US-PA Pennsylvania state
40 US-RI Rhode Island state
41 US-SC South Carolina state
42 US-SD South Dakota state
43 US-TN Tennessee state
44 US-TX Texas state
45 US-UT Utah state
46 US-VT Vermont state
47 US-VA Virginia state
48 US-WA Washington state
49 US-WV West Virginia state
50 US-WI Wisconsin state
51 US-WY Wyoming state
52 US-DC District of Columbia district
53 US-AS American Samoa outlying area
54 US-GU Guam outlying area
55 US-MP Northern Mariana Islands outlying area
56 US-PR Puerto Rico outlying area
57 US-UM United States Minor Outlying Islands outlying area
58 US-VI U.S. Virgin Islands outlying area