-
User expired authoredUser expired authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
importData.sh 13.45 KiB
#! /bin/bash
#############################
# This script imports data (e.g. from the road network of bolzano) that is
# downloaded from osm (OpenStreetMap) and then imported into available
# databases (neo4j_spatial, postgis and spatialite).
#############################
if [ "$EUID" -ne "0" ]; then
echo '- This script must be run as root!'
exit 1
fi
############################
# Variable definitions #
############################
SCRIPT="$(readlink -f ${BASH_SOURCE[0]})"
SCRIPT_DIR="$(dirname ${SCRIPT})"
if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then
# If config.properties is not besides the bash-script (most likely because vagrant uploaded it into the guest)
# we will try to find it in the shared folder
SCRIPT_DIR="/vagrant/bootstrap"
fi
if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then
echo '- No variable declarations found (config.properties file not found)!'
exit 1;
fi
source "${SCRIPT_DIR}/config.sh"
if ! $IMPORT_DATA_NEO4J_SPATIAL && ! $IMPORT_DATA_POSTGIS; then
exit 0
fi
fn_arg2string PG_DB_USER "${1}" "${PG_DB_USER}"
fn_arg2string PG_DB_PASSWORD "${2}" "${PG_DB_PASSWORD}"
fn_arg2boolean IS_LOCAL_TEST_DEPLOY "${3}" "${IS_LOCAL_TEST_DEPLOY}"
######################################
# Configuration (tool versions, ...) #
######################################
CITY_POLYGON_DIR="${SHARED_CONF_DIR}/cityPolygons"
OSM_BOUNDING_BOLZANO="${CITY_POLYGON_DIR}/bolzano.poly"
OSM_BOUNDING_INNSBRUCK="${CITY_POLYGON_DIR}/innsbruck.poly"
OSM_BOUNDING_ITALY="${CITY_POLYGON_DIR}/italy.poly"
OSM_BOUNDING_SALZBURG="${CITY_POLYGON_DIR}/salzburg.poly"
OSM_BOUNDING_SANFRANCISCO="${CITY_POLYGON_DIR}/sanfrancsico.poly"
OSM_BOUNDING_TRENTOALTOADIGE="${CITY_POLYGON_DIR}/trentinoaltoadige.poly"
OSM_DOWNLOAD_MIRROR="http://download.geofabrik.de"
OSM_FILE_AUSTRIA="europe/austria-140301.osm.pbf"
OSM_FILE_CALIFORNIA="north-america/us/california-140301.osm.pbf"
OSM_FILE_BOLZANO_FILTERED="bozen-140301-filtered.osm"
OSM_FILE_INNSBRUCK_FILTERED="innsbruck-140301-filtered.osm"
OSM_FILE_ITALY="europe/italy-140301.osm.pbf"
OSM_FILE_ITALY_FILTERED="italy-140301-filtered.osm"
OSM_FILE_SALZBURG_FILTERED="salzburg-140301-filtered.osm"
OSM_FILE_SANFRANCISCO_FILTERED="sanfrancisco-140301-filtered.osm"
OSM_FILE_TRENTOALTOADIGE_FILTERED="southtyrol-140301-filtered.osm"
OSM_KEYS="aerialway,highway,public_transport,railway,route"
############################
# Function definitions #
############################
function fn_import_dataset() {
local NAME="$1"
local OSM_FILE="$2"
local OSM_FILE_FILTERED="$3"
local BOUNDING="$4"
local SQL_EXPORT_FILE="${NAME,,}_export.sql.gz"
local VALID_SQL=false
if $IMPORT_DATA_POSTGIS && $AVAILABLE_POSTGIS; then
if [ -n "$SQL_EXPORT_FILE" ]; then
VALID_SQL=true
fi
fi
echo "Importing data for region of $NAME"
if ! ( $VALID_SQL || $IMPORT_DATA_NEO4J_SPATIAL ); then
echo " - skipping import"
return 1
fi
if $VALID_SQL; then
fn_import_dataset_postgis "$NAME" "$SQL_EXPORT_FILE"
fi
if $IMPORT_DATA_NEO4J_SPATIAL; then
fn_import_dataset_neo4j_spatial "$NAME" "$OSM_FILE" "$OSM_FILE_FILTERED" "$BOUNDING"
fi
}
function fn_import_dataset_neo4j_spatial() {
local NAME="$1"
local OSM_FILE="$2"
local OSM_FILE_FILTERED="$3"
local BOUNDING="$4"
local CITY=${NAME// /}
CITY=${CITY,,}
if !$UPDATE_DATA; then
echo " - downloading cached OpenStreetMap file"
fn_download_newer $DOWNLOAD_DIR/$OSM_FILE_FILTERED $CACHE_DIR_REMOTE_DATA/$OSM_FILE_FILTERED
cp $DOWNLOAD_DIR/$OSM_FILE_FILTERED $DATA_DIR/$OSM_FILE_FILTERED >> /dev/null 2>&1
fi
if $UPDATE_DATA && [ -f "$DATA_DIR/$OSM_FILE_FILTERED" ]; then
echo " - deleting outdated OpenStreetMap file"
rm -rf "$DATA_DIR/$OSM_FILE_FILTERED"
fi
if $AVAILABLE_NEO4J_SPATIAL && [ ! -f "$DATA_DIR/$OSM_FILE_FILTERED" ]; then
echo " - downloading OpenStreetMap file (no cached file found)"
OSM_FILENAME=$DOWNLOAD_DIR/${OSM_FILE##*/}
fn_download_newer $OSM_FILENAME $OSM_DOWNLOAD_MIRROR/$OSM_FILE
echo " - filtering OpenStreetMap data"
osmosis --read-pbf file="$OSM_FILENAME" --bounding-polygon file="$BOUNDING" --way-key keyList="$OSM_KEYS" --used-node --write-xml file="$DATA_DIR/$OSM_FILE_FILTERED" >> "$WORKING_DIR/create_neo4jData_$CITY.log" 2>&1
echo " - importing filtered OpenStreetMap data into neo4j-spatial"
cd $DEPLOY_DIR/neo4j-spatial
mvn exec:java -Dexec.mainClass=org.neo4j.gis.spatial.osm.OSMImporter -Dexec.args="$DEPLOY_DIR/neo4j/data/graph.db $DATA_DIR/$OSM_FILE_FILTERED" >> "$WORKING_DIR/create_neo4jData_$CITY.log" 2>&1
cd $WORKING_DIR
fi
}
function fn_import_dataset_postgis() {
local NAME="$1"
local SQL_EXPORT_FILE="$2"
local CITY=${NAME// /}
CITY=${CITY,,}
if ! $UPDATE_DATA; then
echo " - downloading SQL export"
fn_download_newer $DOWNLOAD_DIR/$SQL_EXPORT_FILE $CACHE_DIR_REMOTE_DATA/$SQL_EXPORT_FILE
cp $DOWNLOAD_DIR/$SQL_EXPORT_FILE $DATA_DIR/$SQL_EXPORT_FILE >> /dev/null 2>&1
echo " - importing SQL export into PostGIS database"
gunzip -c $DATA_DIR/$SQL_EXPORT_FILE | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost "$PG_DB_NAME" >> "$WORKING_DIR/import_datamodel_$CITY.log" 2>&1
echo " - getting table permissions"
tables=`PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "SELECT tablename FROM pg_tables WHERE schemaname = 'public' AND tableowner = 'postgres';" "$PG_DB_NAME"`
echo " - fixing table permissions"
for tbl in $tables ; do
PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "ALTER TABLE $tbl OWNER TO $PG_DB_USER" "$PG_DB_NAME";
done
fi
if $UPDATE_DATA && [ -f "$DATA_DIR/$SQL_EXPORT_FILE" ]; then
echo " - deleting outdated SQL export (forced data update)"
rm -rf "$DATA_DIR/$SQL_EXPORT_FILE"
fi
if $AVAILABLE_ISOCHRONE_DATAMODEL && [ ! -f "$DATA_DIR/$SQL_EXPORT_FILE" ]; then
echo " - creating datamodel using isochrone-datamodel"
# Create datamodel using isochrone-datamodel project
DB_USERNAME="$PG_DB_USER" DB_PASSWORD="$PG_DB_PASSWORD" "$DEPLOY_DIR/isochrone-datamodel/builder.sh" -d -s -b -l -t -c${CITY} >> "$WORKING_DIR/create_datamodel_$CITY.log" 2>&1
echo " - copying tables to isochrone database"
PGPASSWORD="spatial" pg_dump -U spatial -h localhost -p 5432 -x -O -d spatial -t "transformed.${CITY}_*" | sed -e "s/transformed/public/g" | PGPASSWORD="$PG_DB_PASSWORD" psql -h localhost -U "$PG_DB_USER" -p 5432 -d isochrone >> "$WORKING_DIR/create_datamodel_$CITY.log" 2>&1
# Exporting resulting database to $DATA_DIR
echo " - exporting database dump for ${CITY}"
PGPASSWORD="$PG_DB_PASSWORD" pg_dump -U "$PG_DB_USER" -h localhost -p 5432 -d isochrone --clean --compress=5 --if-exists -t "${CITY}_*" --file="$DATA_DIR/$SQL_EXPORT_FILE" >> "$WORKING_DIR/create_datamodel_$CITY.log" 2>&1
fi
if $AVAILABLE_POSTGIS && $AVAILABLE_ISOCHRONE_TOOLS; then
java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t "$CITY" -d 60,120,180,240,300 >> "$WORKING_DIR/create_datamodel_$CITY.log" 2>&1
fi
}
function fn_init_geoserver() {
if $AVAILABLE_NEO4J_SPATIAL; then
echo "Configuring neo4j data permissions for geoserver"
chown -R $TOMCAT_USER:$TOMCAT_USER $DEPLOY_DIR/neo4j/data/graph.db
fi
echo "Configuring geoserver $GEOSERVER_VERSION using geoserver-shell $GEOSERVER_SHELL_VERSION"
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_ws.gs >> setup_geoserver_workspace.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_styles.gs >> setup_geoserver_styles.log 2>&1
cp $SHARED_IMG_DIR/* $DEPLOY_DIR/geoserver/data/styles >> setup_geoserver_styles.log 2>&1
if $AVAILABLE_NEO4J_SPATIAL; then
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ds.gs >> setup_geoserver_neo4j.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_bz.gs >> setup_geoserver_neo4j.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_ibk.gs >> setup_geoserver_neo4j.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_sf.gs >> setup_geoserver_neo4j.log 2>&1
if ! $IS_LOCAL_TEST_DEPLOY; then
# sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_it.gs >> setup_geoserver_neo4j.log 2>&1
# sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_st.gs >> setup_geoserver_neo4j.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft.gs >> setup_geoserver_neo4j.log 2>&1
fi
fi
if $AVAILABLE_POSTGIS; then
# copy geoserver_setup_postgis_ds and replace username/password
cp $SHARED_CONF_DIR/geoserver_setup_postgis_ds.gs $WORKING_DIR/geoserver_setup_postgis_ds.gs
sed -i "s/@db_username@/$PG_DB_USER/" $WORKING_DIR/geoserver_setup_postgis_ds.gs
sed -i "s/@db_password@/$PG_DB_PASSWORD/" $WORKING_DIR/geoserver_setup_postgis_ds.gs
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $WORKING_DIR/geoserver_setup_postgis_ds.gs >> setup_geoserver_postgis.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_bz.gs >> setup_geoserver_postgis.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_ibk.gs >> setup_geoserver_postgis.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_sf.gs >> setup_geoserver_postgis.log 2>&1
if ! $IS_LOCAL_TEST_DEPLOY; then
# sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_it.gs >> setup_geoserver_postgis.log 2>&1
# sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_st.gs >> setup_geoserver_postgis.log 2>&1
sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft.gs >> setup_geoserver_postgis.log 2>&1
fi
fi
}
function fn_import_spatialite() {
SPATIALITE_FILENAME="isochrone.spatialite"
if ! $UPDATE_DATA; then
echo "Importing spatialite database"
fn_download_newer $DOWNLOAD_DIR/$SPATIALITE_FILENAME $CACHE_DIR_REMOTE_DATA/$SPATIALITE_FILENAME
cp -f "$DOWNLOAD_DIR/$SPATIALITE_FILENAME" "$DATA_DIR/"
fi
if $UPDATE_DATA && [ -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then
echo "Deleting outdated spatialite database (forced data update)"
rm -rf "$DATA_DIR/$SPATIALITE_FILENAME"
fi
if $AVAILABLE_GDAL && [ ! -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then
echo "Exporting PostGIS data into spatialite database"
$DEPLOY_DIR/gdal/apps/ogr2ogr --config PG_LIST_ALL_TABLES YES --config PG_SKIP_VIEWS YES -progress -f "SQLite" "$DATA_DIR/$SPATIALITE_FILENAME" PG:"host=localhost dbname=isochrone user=$PG_DB_USER password=$PG_DB_PASSWORD" -lco LAUNDER=yes -dsco SPATIALITE=yes -lco SPATIAL_INDEX=yes -gt 65536 >> "$WORKING_DIR/setup_spatialite.log" 2>&1
fi
if [ -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then
echo " - setting spatialite database permissions"
chown -R apache:apache "$DATA_DIR/$SPATIALITE_FILENAME"
chmod -R -g+rwX "$DATA_DIR/$SPATIALITE_FILENAME"
else
echo " - spatialite database could not be created!" >&2
fi
}
################
# Data import #
################
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo " Importing datasets ($(date +%H:%M:%S)):"
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
START=$(date +%s)
mkdir -p $DOWNLOAD_DIR
mkdir -p $WORKING_DIR
cd "$WORKING_DIR"
# Import real world datasets
fn_import_dataset "Bolzano" "$OSM_FILE_ITALY" "$OSM_FILE_BOLZANO_FILTERED" "$OSM_BOUNDING_BOLZANO"
fn_import_dataset "Innsbruck" "$OSM_FILE_AUSTRIA" "$OSM_FILE_INNSBRUCK_FILTERED" "$OSM_BOUNDING_INNSBRUCK"
fn_import_dataset "Salzburg" "$OSM_FILE_AUSTRIA" "$OSM_FILE_SALZBURG_FILTERED" "$OSM_BOUNDING_SALZBURG"
fn_import_dataset "SanFrancisco" "$OSM_FILE_CALIFORNIA" "$OSM_FILE_SANFRANCISCO_FILTERED" "$OSM_BOUNDING_SANFRANCISCO"
#if ! $IS_LOCAL_TEST_DEPLOY; then
# fn_import_dataset "Italy" "$OSM_FILE_ITALY" "$OSM_FILE_ITALY_FILTERED" "$OSM_BOUNDING_ITALY"
# fn_import_dataset "TrentoAltoAdige" "$OSM_FILE_ITALY" "$OSM_FILE_TRENTOALTOADIGE_FILTERED" "$OSM_BOUNDING_TRENTOALTOADIGE"
#fi
# Generate synthetic datasets (and densities for them) -> they are always created from scratch (never cached)
if $AVAILABLE_POSTGIS && $AVAILABLE_ISOCHRONE_TOOLS; then
java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.GridNetworkGenerator -d 100 -l 60 > "$WORKING_DIR/generate_gridNetwork.log" 2>&1
java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.SpiderNetworkGenerator -d 6 -lvl 1000 -l 60 > "$WORKING_DIR/generate_spiderNetwork.log" 2>&1
java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t grid_s100 -d 60,120,180,240,300 >> "$WORKING_DIR/generate_gridNetwork.log" 2>&1
java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t spider_l1000 -d 60,120,180,240,300 >> "$WORKING_DIR/generate_spiderNetwork.log" 2>&1
fi
# Not importing datasets directly into spatialite -> we copy data from postgis to spatialite (after postgis data import)
if $IMPORT_DATA_SPATIALITE; then
fn_import_spatialite
fi
# After data import we have to initialize geoserver layers (if geoserver is used)
if $AVAILABLE_GEOSERVER; then
fn_init_geoserver
fi
END=$(date +%s)
TOTAL=$(( $END - $START ))
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo " Datasets imported"
printf ' Time to import the datasets: %dh:%dm:%ds\n' $(($TOTAL/3600)) $(($TOTAL%3600/60)) $(($TOTAL%60))
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"