#! /bin/bash ############################# # This script imports data (e.g. from the road network of bolzano) that is # downloaded from osm (OpenStreetMap) and then imported into available # databases (neo4j_spatial, postgis and spatialite). ############################# if [ "$EUID" -ne "0" ]; then echo '- This script must be run as root!' exit 1 fi ############################ # Variable definitions # ############################ SCRIPT="$(readlink -f ${BASH_SOURCE[0]})" SCRIPT_DIR="$(dirname ${SCRIPT})" if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then # If config.properties is not besides the bash-script (most likely because vagrant uploaded it into the guest) # we will try to find it in the shared folder SCRIPT_DIR="/vagrant/bootstrap" fi if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then echo '- No variable declarations found (config.properties file not found)!' exit 1; fi source "${SCRIPT_DIR}/config.sh" if ! $IMPORT_DATA_POSTGIS; then exit 0 fi fn_arg2string PG_DB_USER "${1}" "${PG_DB_USER}" fn_arg2string PG_DB_PASSWORD "${2}" "${PG_DB_PASSWORD}" fn_arg2boolean DEPLOY_ALL_DATASETS "${3}" "${DEPLOY_ALL_DATASETS}" fn_arg2boolean DEPLOY_ANY_DATASET "${4}" "true" ###################################### # Configuration (tool versions, ...) # ###################################### CITY_POLYGON_DIR="${SHARED_CONF_DIR}/cityPolygons" #DENSITY="60,120,180,240,300" DENSITY="100,200,300,400,500,600,700,800,900,1000" OSM_DOWNLOAD_MIRROR="http://download.geofabrik.de" OSM_FILE_AUSTRIA="europe/austria-latest.osm.pbf" OSM_FILE_ITALY="europe/italy-latest.osm.pbf" OSM_FILE_US_CALIFORNIA="north-america/us/california-latest.osm.pbf" OSM_FILE_US_DC="north-america/us/district-of-columbia-latest.osm.pbf" OSM_KEYS="aerialway,highway,public_transport,railway,route" ############################ # Function definitions # ############################ function fn_create_synthetic_networks() { if $AVAILABLE_ISOCHRONE_TOOLS; then echo "Importing data for synthetic networks" # GridNetwork java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.GridNetworkGenerator -d 100 -l 60 > "$WORKING_DIR/generate_gridNetwork.log" 2>&1 if [ $? -ne 0 ]; then printf "%s\n" "[ERROR] An error occurred while creating the grid network. The process will exit now with status $?" exit $? fi java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t grid_s100 -d "$DENSITY" >> "$WORKING_DIR/generate_gridNetwork.log" 2>&1 if [ $? -ne 0 ]; then printf "%s\n" "[ERROR] An error occurred while creating the node_density table for 'grid_s100' using isochrone-tools. The process will exit now with status $?" exit $? fi # SpiderNetwork java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.SpiderNetworkGenerator -d 6 -lvl 1000 -l 60 > "$WORKING_DIR/generate_spiderNetwork.log" 2>&1 if [ $? -ne 0 ]; then printf "%s\n" "[ERROR] An error occurred while creating the spider network. The process will exit now with status $?" exit $? fi java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t spider_l1000 -d "$DENSITY" >> "$WORKING_DIR/generate_spiderNetwork.log" 2>&1 if [ $? -ne 0 ]; then printf "%s\n" "[ERROR] An error occurred while creating the node_density table for 'spirder_l1000' using isochrone-tools. The process will exit now with status $?" exit $? fi fi } function fn_import_dataset() { local NAME="$1" local SRID="$2" echo "Importing data for region of $NAME (EPSG:${SRID})" if $IMPORT_DATA_POSTGIS; then echo " - importing into postgis" fn_import_dataset_postgis "$NAME" "$SRID" fi if $IMPORT_DATA_NEO4J_SPATIAL; then echo " - importing into neo4j" fn_import_dataset_neo4j_spatial "$NAME" fi } function fn_import_dataset_neo4j_spatial() { local NAME="$1" local CITY=${NAME// /} CITY=${CITY,,} # We copy data from postgis for specified dataset (after postgis data import) java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.exporter.ImportDataNeo4j -t "${CITY}" > "$WORKING_DIR/import_neo4j_$CITY.log" 2>&1 if [ $? -ne 0 ]; then printf "%s\n" "[ERROR] An error occurred while importing data into neo4j using isochrone-tools. The process will exit now with status $?" exit $? fi } function fn_import_dataset_postgis() { local NAME="$1" local SRID="$2" local SQL_EXPORT_FILE="${NAME,,}_export_${SRID}.sql.gz" local CITY=${NAME// /} CITY=${CITY,,} if ! $UPDATE_DATA; then echo " - downloading SQL export" fn_download_newer $DOWNLOAD_DIR/$SQL_EXPORT_FILE "${CACHE_DIR_REMOTE_DATA}${SQL_EXPORT_FILE}" cp $DOWNLOAD_DIR/$SQL_EXPORT_FILE $DATA_DIR/$SQL_EXPORT_FILE >> /dev/null 2>&1 echo " - importing SQL export into PostGIS database" gunzip -c $DATA_DIR/$SQL_EXPORT_FILE | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost "$PG_DB_NAME" >> "$WORKING_DIR/import_datamodel_$CITY_$SRID.log" 2>&1 echo " - getting table permissions" tables=`PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "SELECT tablename FROM pg_tables WHERE schemaname = 'public' AND tableowner = 'postgres';" "$PG_DB_NAME"` echo " - fixing table permissions" for tbl in $tables; do PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "ALTER TABLE $tbl OWNER TO $PG_DB_USER" "$PG_DB_NAME" done fi if $UPDATE_DATA && [ -f "$DATA_DIR/$SQL_EXPORT_FILE" ]; then echo " - deleting outdated SQL export (forced data update)" rm -rf "$DATA_DIR/$SQL_EXPORT_FILE" fi if $AVAILABLE_ISOCHRONE_DATAMODEL && [ ! -f "$DATA_DIR/$SQL_EXPORT_FILE" ]; then echo " - creating datamodel using isochrone-datamodel" # Create datamodel using isochrone-datamodel project DB_USERNAME="$PG_DB_USER" DB_PASSWORD="$PG_DB_PASSWORD" "$DEPLOY_DIR/isochrone-datamodel/osmPti2mmds.sh" -d -s -b -l -t${SRID} -c${CITY} >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 if [ $? -ne 0 ]; then printf "%s\n" "[ERROR] An error occurred while creating datamodel using isochrone-datamodel. The process will exit now with status $?" exit $? fi echo " - copying tables to isochrone database" # If working with multiple TARGET_SRIDs we have to delete an eventually existing nodes_density table here, so on import later a DROP TABLE ${CITY}_nodes works wihtout CASCADE (pg_dump does not use CASCADE) PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -d isochrone -c "DROP TABLE IF EXISTS ${CITY}_nodes_density" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 PGPASSWORD="spatial" pg_dump -U spatial -h localhost -d spatial --clean --if-exists --no-privileges --no-owner -t "transformed.${CITY}_*" | sed -e "s/transformed/public/g" | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost -d isochrone >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 fi if $AVAILABLE_POSTGIS && $AVAILABLE_ISOCHRONE_TOOLS; then java -cp "${DEPLOY_DIR}/isochrone-tools.jar" "at.uibk.dbis.isochrone.generator.density.DensityGenerator" -t "${CITY}" -d "${DENSITY}" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 if [ $? -ne 0 ]; then printf "%s\n" "[ERROR] An error occurred while creating the node_density table for '${CITY}' using isochrone-tools. The process will exit now with status $?" exit $? fi fi if $AVAILABLE_ISOCHRONE_DATAMODEL; then # Exporting resulting database to $DATA_DIR echo " - exporting database dump" PGPASSWORD="$PG_DB_PASSWORD" pg_dump -U "$PG_DB_USER" -h localhost -d isochrone --clean --compress=5 --if-exists -t "${CITY}_*" --file="$DATA_DIR/$SQL_EXPORT_FILE" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 fi } function fn_init_geoserver() { # TODO: Do we really want to import data from OSM, so that it is available in geoserver (we publish data from within isochron-web to geoserver, but why should we do that with the raw data -> pgRouing comparison or similar?) # TODO: Needs testing if $AVAILABLE_NEO4J_SPATIAL; then echo "Configuring neo4j data permissions for geoserver" chown -R $TOMCAT_USER:$TOMCAT_USER $DEPLOY_DIR/neo4j/data/graph.db fi echo "Preparing data from OpenStreetMap to be imported into geoserver" fn_filter_osm_data "Bolzano" "${OSM_FILE_ITALY}" fn_filter_osm_data "Innsbruck" "${OSM_FILE_AUSTRIA}" fn_filter_osm_data "Salzburg" "${OSM_FILE_AUSTRIA}" fn_filter_osm_data "SanFrancisco" "${OSM_FILE_US_CALIFORNIA}" fn_filter_osm_data "WashingtonDC" "${OSM_FILE_US_DC}" if $DEPLOY_ALL_DATASETS; then # fn_filter_osm_data "Italy" "${OSM_FILE_ITALY}" fn_filter_osm_data "AltoAdige" "${OSM_FILE_ITALY}" fi # TODO: We need to import the data into the database before issuing to geoserver that there are tables available echo "Configuring geoserver $GEOSERVER_VERSION using geoserver-shell $GEOSERVER_SHELL_VERSION" sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_ws.gs >> setup_geoserver_workspace.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_styles.gs >> setup_geoserver_styles.log 2>&1 cp $SHARED_IMG_DIR/* $DEPLOY_DIR/geoserver/data/styles >> setup_geoserver_styles.log 2>&1 if $AVAILABLE_NEO4J_SPATIAL; then sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ds.gs >> setup_geoserver_neo4j.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_bolzano.gs >> setup_geoserver_neo4j.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_innsbruck.gs >> setup_geoserver_neo4j.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_sanfrancisco.gs >> setup_geoserver_neo4j.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_washingtondc.gs >> setup_geoserver_neo4j.log 2>&1 if $DEPLOY_ALL_DATASETS; then # sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_italy.gs >> setup_geoserver_neo4j.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_altoadige.gs >> setup_geoserver_neo4j.log 2>&1 fi sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft.gs >> setup_geoserver_neo4j.log 2>&1 fi if $AVAILABLE_POSTGIS; then # copy geoserver_setup_postgis_ds and replace username/password cp $SHARED_CONF_DIR/geoserver_setup_postgis_ds.gs $WORKING_DIR/geoserver_setup_postgis_ds.gs sed -i "s/@db_username@/$PG_DB_USER/" $WORKING_DIR/geoserver_setup_postgis_ds.gs sed -i "s/@db_password@/$PG_DB_PASSWORD/" $WORKING_DIR/geoserver_setup_postgis_ds.gs sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $WORKING_DIR/geoserver_setup_postgis_ds.gs >> setup_geoserver_postgis.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_bolzano.gs >> setup_geoserver_postgis.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_innsbruck.gs >> setup_geoserver_postgis.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_sanfrancisco.gs >> setup_geoserver_postgis.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_washingtondc.gs >> setup_geoserver_postgis.log 2>&1 if $DEPLOY_ALL_DATASETS; then # sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_italy.gs >> setup_geoserver_postgis.log 2>&1 sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_altoadige.gs >> setup_geoserver_postgis.log 2>&1 fi sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft.gs >> setup_geoserver_postgis.log 2>&1 fi } function fn_import_spatialite() { local SRID="$1" SPATIALITE_FILENAME="isochrone_${SRID}.spatialite" if ! $UPDATE_DATA; then echo "Importing spatialite database" fn_download_newer $DOWNLOAD_DIR/$SPATIALITE_FILENAME "${CACHE_DIR_REMOTE_DATA}${SPATIALITE_FILENAME}" cp -f "$DOWNLOAD_DIR/$SPATIALITE_FILENAME" "$DATA_DIR/" fi if $UPDATE_DATA && [ -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then echo "Deleting outdated spatialite database (forced data update)" rm -rf "$DATA_DIR/$SPATIALITE_FILENAME" fi if $AVAILABLE_GDAL && [ ! -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then echo "Exporting PostGIS data into spatialite database" $DEPLOY_DIR/gdal/apps/ogr2ogr --config PG_LIST_ALL_TABLES YES --config PG_SKIP_VIEWS YES -progress -f "SQLite" "$DATA_DIR/$SPATIALITE_FILENAME" PG:"host=localhost dbname=isochrone user=$PG_DB_USER password=$PG_DB_PASSWORD" -lco LAUNDER=yes -dsco SPATIALITE=yes -lco SPATIAL_INDEX=yes -gt 65536 >> "$WORKING_DIR/import_spatialite.log" 2>&1 fi if [ -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then echo " - setting spatialite database permissions" chown -R apache:apache "$DATA_DIR/$SPATIALITE_FILENAME" chmod -R -g+rwX "$DATA_DIR/$SPATIALITE_FILENAME" else echo " - spatialite database could not be created!" >&2 fi } function fn_filter_osm_data() { local NAME="$1" local OSM_FILE="$2" local CITY=${NAME// /} CITY=${CITY,,} local OSM_FILE_FILTERED="${CITY}_filtered.osm" if $UPDATE_DATA && [ -f "$DATA_DIR/$OSM_FILE_FILTERED" ]; then echo " - deleting outdated osm export (forced data update)" rm -rf "$DATA_DIR/$OSM_FILE_FILTERED" fi if [ ! -f "$DATA_DIR/$OSM_FILE_FILTERED" ]; then echo " - downloading cached OpenStreetMap file" fn_download_newer $DOWNLOAD_DIR/$OSM_FILE_FILTERED $CACHE_DIR_REMOTE_DATA/$OSM_FILE_FILTERED cp $DOWNLOAD_DIR/$OSM_FILE_FILTERED $DATA_DIR/$OSM_FILE_FILTERED >> /dev/null 2>&1 fi if [ ! -f $DATA_DIR/$OSM_FILE_FILTERED ]; then echo " - downloading OpenStreetMap file (no cached file found)" OSM_FILENAME=$DOWNLOAD_DIR/${OSM_FILE##*/} BOUNDING="${CITY_POLYGON_DIR}/${city}.poly" fn_download_newer $OSM_FILENAME $OSM_DOWNLOAD_MIRROR/$OSM_FILE if [[ "$BOUNDING" == *.poly ]]; then echo " - filtering OpenStreetMap file" $DEPLOY_DIR/osmosis/bin/osmosis --read-pbf file="$OSM_FILENAME" --bounding-polygon file="$BOUNDING_POLYGON" --way-key keyList="$OSM_KEYS" --used-node --write-xml file="$DATA_DIR/$OSM_FILE_FILTERED" >> "filter_osmData_$NAME.log" 2>&1 fi fi } ################ # Data import # ################ if $DEPLOY_ANY_DATASET; then echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo " Importing datasets ($(date +%H:%M:%S)):" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" START=$(date +%s) mkdir -p $DOWNLOAD_DIR mkdir -p $WORKING_DIR cd "$WORKING_DIR" # Generate synthetic datasets (and densities for them) -> they are always created from scratch (never cached) fn_create_synthetic_networks SRID_ARR=(${TARGET_SRID//,/ }) for CURRENT_SRID in "${SRID_ARR[@]}"; do # Import real world datasets fn_import_dataset "Bolzano" "$CURRENT_SRID" fn_import_dataset "Innsbruck" "$CURRENT_SRID" fn_import_dataset "Salzburg" "$CURRENT_SRID" fn_import_dataset "SanFrancisco" "$CURRENT_SRID" fn_import_dataset "WashingtonDC" "$CURRENT_SRID" if $DEPLOY_ALL_DATASETS; then # fn_import_dataset "Italy" "$CURRENT_SRID" fn_import_dataset "AltoAdige" "$CURRENT_SRID" fi # Not importing datasets one-by-one into spatialite -> we copy data from postgis (after postgis data import) if $IMPORT_DATA_SPATIALITE; then fn_import_spatialite "$CURRENT_SRID" fi done # After data import we have to initialize geoserver layers (if geoserver is used) if $AVAILABLE_GEOSERVER; then fn_init_geoserver fi END=$(date +%s) TOTAL=$(( $END - $START )) echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo " Datasets imported" printf ' Time to import the datasets: %dh:%dm:%ds\n' $(($TOTAL/3600)) $(($TOTAL%3600/60)) $(($TOTAL%60)) echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" fi