sort.sh - osm-zipcodes - Extract (dutch) addresses from OpenStreetMap OSM XML
(HTM) git clone git://git.codemadness.org/osm-zipcodes
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
sort.sh (469B)
---
1 #!/bin/sh
2 # override malloc options for sort, else it is too slow.
3 export MALLOC_OPTIONS="scfgju"
4 # simple/binary collation.
5 export LC_ALL=C
6
7 # sort on city, street, housenumber
8 sort \
9 --mmap \
10 -k 7,7 -k 5,5 -k 6,6n \
11 -t "$(printf '\t')" \
12 -S 1G \
13 -T "$(pwd)" \
14 -o addr_sort.csv addr.csv
15
16 # unique entries, ignore latitude, longitude.
17 uniq -f 3 addr_sort.csv > addr_sort_uniq.csv
18
19 # DEBUG: show duplicate entries.
20 #uniq -d -f 3 addr_sort.csv > addr_sort_uniq.csv