mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-26 13:27:52 +03:00
2018 TIGER data conversion scripts, add documentation to /docs/data-sources
This commit is contained in:
parent
4e2fe6427c
commit
aa41b813b8
@ -105,7 +105,6 @@ set(CUSTOMFILES
|
||||
website/status.php
|
||||
utils/blocks.php
|
||||
utils/country_languages.php
|
||||
utils/imports.php
|
||||
utils/importWikipedia.php
|
||||
utils/export.php
|
||||
utils/query.php
|
||||
|
29
data-sources/us-tiger/README.md
Normal file
29
data-sources/us-tiger/README.md
Normal file
@ -0,0 +1,29 @@
|
||||
# US TIGER address data
|
||||
|
||||
Convert [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)/Line dataset of the US Census Bureau to SQL files which can be imported by Nominatim. The created tables in the Nominatim database are separate from OpenStreetMap tables and get queried at search time separately.
|
||||
|
||||
The dataset gets updated once per year. Downloading is prown to be slow (can take a full day) and converting them can take hours as well.
|
||||
|
||||
Replace '2018' with the current year throughout.
|
||||
|
||||
1. Install the GDAL library and python bindings and the unzip tool
|
||||
|
||||
# Ubuntu:
|
||||
sudo apt-get install python-gdal unzip
|
||||
# CentOS:
|
||||
sudo yum install gdal-python unzip
|
||||
|
||||
2. Get the TIGER 2018 data. You will need the EDGES files
|
||||
(3,233 zip files, 11GB total).
|
||||
|
||||
wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2018/EDGES/
|
||||
|
||||
3. Convert the data into SQL statements. Adjust the file paths in the scripts as needed
|
||||
|
||||
cd data-sources/us-tiger
|
||||
./convert.sh <input-path> <output-path>
|
||||
|
||||
4. Maybe: package the created files
|
||||
|
||||
tar -czf tiger2018-nominatim-preprocessed.tar.gz tiger
|
||||
|
48
data-sources/us-tiger/convert.sh
Executable file
48
data-sources/us-tiger/convert.sh
Executable file
@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
|
||||
INPATH=$1
|
||||
OUTPATH=$2
|
||||
|
||||
if [[ ! -d "$INPATH" ]]; then
|
||||
echo "input path does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -d "$OUTPATH" ]]; then
|
||||
echo "output path does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
INREGEX='_([0-9]{5})_edges.zip'
|
||||
WORKPATH="$OUTPATH/tmp-workdir/"
|
||||
mkdir -p "$WORKPATH"
|
||||
|
||||
|
||||
|
||||
INFILES=($INPATH/*.zip)
|
||||
echo "Found ${#INFILES[*]} files."
|
||||
|
||||
for F in ${INFILES[*]}; do
|
||||
# echo $F
|
||||
|
||||
if [[ "$F" =~ $INREGEX ]]; then
|
||||
COUNTYID=${BASH_REMATCH[1]}
|
||||
SHAPEFILE="$WORKPATH/$(basename $F '.zip').shp"
|
||||
SQLFILE="$OUTPATH/$COUNTYID.sql"
|
||||
|
||||
unzip -o -q -d "$WORKPATH" "$F"
|
||||
if [[ ! -e "$SHAPEFILE" ]]; then
|
||||
echo "Unzip failed. $SHAPEFILE not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
./tiger_address_convert.py "$SHAPEFILE" "$SQLFILE"
|
||||
|
||||
rm $WORKPATH/*
|
||||
fi
|
||||
done
|
||||
|
||||
OUTFILES=($OUTPATH/*.sql)
|
||||
echo "Wrote ${#OUTFILES[*]} files."
|
||||
|
||||
rmdir $WORKPATH
|
@ -10,8 +10,10 @@ ADD_CUSTOM_TARGET(doc
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/admin ${CMAKE_CURRENT_BINARY_DIR}/admin
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/develop ${CMAKE_CURRENT_BINARY_DIR}/develop
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_BINARY_DIR}/api
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/data-sources ${CMAKE_CURRENT_BINARY_DIR}/data-sources
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/index.md ${CMAKE_CURRENT_BINARY_DIR}/index.md
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/extra.css ${CMAKE_CURRENT_BINARY_DIR}/extra.css
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/us-tiger/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/US-Tiger.md
|
||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
|
||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-16.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-16.md
|
||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
|
||||
|
@ -101,52 +101,34 @@ Note that this command downloads the phrases from the wiki link above.
|
||||
|
||||
## Installing Tiger housenumber data for the US
|
||||
|
||||
Nominatim is able to use the official TIGER address set to complement the
|
||||
OSM house number data in the US. You can add TIGER data to your own Nominatim
|
||||
instance by following these steps:
|
||||
Nominatim is able to use the official [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)
|
||||
address set to complement the OSM house number data in the US. You can add
|
||||
TIGER data to your own Nominatim instance by following these steps. The
|
||||
entire US adds about 10GB to your database.
|
||||
|
||||
1. Install the GDAL library and python bindings and the unzip tool
|
||||
|
||||
* Ubuntu: `sudo apt-get install python-gdal unzip`
|
||||
* CentOS: `sudo yum install gdal-python unzip`
|
||||
|
||||
2. Get preprocessed TIGER 2017 data and unpack it into the
|
||||
1. Get preprocessed TIGER 2018 data and unpack it into the
|
||||
data directory in your Nominatim sources:
|
||||
|
||||
cd Nominatim/data
|
||||
wget https://nominatim.org/data/tiger2017-nominatim-preprocessed.tar.gz
|
||||
tar xf tiger2017-nominatim-preprocessed.tar.gz
|
||||
wget https://nominatim.org/data/tiger2018-nominatim-preprocessed.tar.gz
|
||||
tar xf tiger2018-nominatim-preprocessed.tar.gz
|
||||
|
||||
3. Import the data into your Nominatim database:
|
||||
`data-source/us-tiger/README.md` explains how the data got preprocessed.
|
||||
|
||||
2. Import the data into your Nominatim database:
|
||||
|
||||
./utils/setup.php --import-tiger-data
|
||||
|
||||
4. Enable use of the Tiger data in your `settings/local.php` by adding:
|
||||
3. Enable use of the Tiger data in your `settings/local.php` by adding:
|
||||
|
||||
@define('CONST_Use_US_Tiger_Data', true);
|
||||
|
||||
5. Apply the new settings:
|
||||
4. Apply the new settings:
|
||||
|
||||
```sh
|
||||
./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
|
||||
```
|
||||
|
||||
The entire US adds about 10GB to your database.
|
||||
|
||||
You can also process the data from the original TIGER data to create the
|
||||
SQL files, Nominatim needs for the import:
|
||||
|
||||
1. Get the TIGER 2017 data. You will need the EDGES files
|
||||
(3,234 zip files, 11GB total).
|
||||
|
||||
wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2017/EDGES/
|
||||
|
||||
2. Convert the data into SQL statements:
|
||||
|
||||
./utils/imports.php --parse-tiger <tiger edge data directory>
|
||||
|
||||
Be warned that this can take quite a long time. After this process is finished,
|
||||
the same preprocessed files as above are available in `data/tiger`.
|
||||
|
||||
## Updates
|
||||
|
||||
|
4
docs/data-sources/overview.md
Normal file
4
docs/data-sources/overview.md
Normal file
@ -0,0 +1,4 @@
|
||||
# Additional Data Sources
|
||||
|
||||
This guide explains how data sources other than OpenStreetMap mentioned in
|
||||
the install instructions got obtained and converted.
|
@ -20,6 +20,9 @@ pages:
|
||||
- 'Troubleshooting' : 'admin/Faq.md'
|
||||
- 'Developers Guide':
|
||||
- 'Overview' : 'develop/overview.md'
|
||||
- 'Data Sources':
|
||||
- 'Overview' : 'data-sources/overview.md'
|
||||
- 'US Census (Tiger)': data-sources/US-Tiger.md'
|
||||
- 'Appendix':
|
||||
- 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
|
||||
- 'Installation on Ubuntu 16' : 'appendix/Install-on-Ubuntu-16.md'
|
||||
|
@ -10,7 +10,7 @@ Feature: Reverse geocoding
|
||||
| way | place | house |
|
||||
And result addresses contain
|
||||
| house_number | road | postcode | country_code |
|
||||
| 906 | West 1st Street | 57274 | us |
|
||||
| 909 | West 1st Street | 57274 | us |
|
||||
|
||||
@Tiger
|
||||
Scenario: No TIGER house number for zoom < 18
|
||||
|
@ -1,56 +0,0 @@
|
||||
#!@PHP_BIN@ -Cq
|
||||
<?php
|
||||
|
||||
require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
|
||||
require_once(CONST_BasePath.'/lib/init-cmd.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
$aCMDOptions
|
||||
= array(
|
||||
'Create and setup nominatim search system',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
|
||||
array('parse-tiger', '', 0, 1, 1, 1, 'realpath', 'Convert tiger edge files to nominatim sql import - datafiles from 2011 or later (source: edges directory of tiger data)'),
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
|
||||
|
||||
if (isset($aCMDResult['parse-tiger'])) {
|
||||
if (!file_exists(CONST_Tiger_Data_Path)) mkdir(CONST_Tiger_Data_Path);
|
||||
|
||||
$sTempDir = tempnam('/tmp', 'tiger');
|
||||
unlink($sTempDir);
|
||||
mkdir($sTempDir);
|
||||
|
||||
foreach (glob($aCMDResult['parse-tiger'].'/tl_20??_?????_edges.zip', 0) as $sImportFile) {
|
||||
set_time_limit(30);
|
||||
preg_match('#([0-9]{5})_(.*)#', basename($sImportFile), $aMatch);
|
||||
$sCountyID = $aMatch[1];
|
||||
|
||||
echo 'Processing '.$sCountyID."...\n";
|
||||
$sUnzipCmd = "unzip -d $sTempDir $sImportFile";
|
||||
exec($sUnzipCmd);
|
||||
|
||||
$sShapeFilename = $sTempDir.'/'.basename($sImportFile, '.zip').'.shp';
|
||||
$sSqlFilenameTmp = $sTempDir.'/'.$sCountyID.'.sql';
|
||||
$sSqlFilename = CONST_Tiger_Data_Path.'/'.$sCountyID.'.sql';
|
||||
|
||||
if (!file_exists($sShapeFilename)) {
|
||||
echo "Failed unzip ($sImportFile)\n";
|
||||
} else {
|
||||
$sParseCmd = CONST_BasePath.'/utils/tigerAddressImport.py '.$sShapeFilename.' '.$sSqlFilenameTmp;
|
||||
exec($sParseCmd);
|
||||
if (!file_exists($sSqlFilenameTmp)) {
|
||||
echo "Failed parse ($sImportFile)\n";
|
||||
} else {
|
||||
copy($sSqlFilenameTmp, $sSqlFilename);
|
||||
}
|
||||
}
|
||||
// Cleanup
|
||||
foreach (glob($sTempDir.'/*') as $sTmpFile) {
|
||||
unlink($sTmpFile);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user