Skip to content
Snippets Groups Projects
importData.sh 15.7 KiB
Newer Older
#! /bin/bash
#############################
# This script imports data (e.g. from the road network of bolzano) that is
# downloaded from osm (OpenStreetMap) and then imported into available
User expired's avatar
User expired committed
# databases (neo4j_spatial, postgis and spatialite).
#############################

if [ "$EUID" -ne "0" ]; then
	echo '- This script must be run as root!'
	exit 1
fi

############################
# Variable definitions     #
############################

SCRIPT="$(readlink -f ${BASH_SOURCE[0]})"
SCRIPT_DIR="$(dirname ${SCRIPT})"
if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then
	# If config.properties is not besides the bash-script (most likely because vagrant uploaded it into the guest)
	# we will try to find it in the shared folder
	SCRIPT_DIR="/vagrant/bootstrap"
fi
if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then
	echo '- No variable declarations found (config.properties file not found)!'
	exit 1;
fi
source "${SCRIPT_DIR}/config.sh"

	exit 0
fi

fn_arg2string PG_DB_USER "${1}" "${PG_DB_USER}"
fn_arg2string PG_DB_PASSWORD "${2}" "${PG_DB_PASSWORD}"
fn_arg2boolean DEPLOY_ALL_DATASETS "${3}" "${DEPLOY_ALL_DATASETS}"
fn_arg2boolean DEPLOY_ANY_DATASET "${4}" "true"
User expired's avatar
User expired committed
######################################
# Configuration (tool versions, ...) #
######################################
User expired's avatar
User expired committed

CITY_POLYGON_DIR="${SHARED_CONF_DIR}/cityPolygons"
#DENSITY="60,120,180,240,300"
DENSITY="100,200,300,400,500,600,700,800,900,1000"
OSM_DOWNLOAD_MIRROR="http://download.geofabrik.de"
OSM_FILE_AUSTRIA="europe/austria-latest.osm.pbf"
OSM_FILE_ITALY="europe/italy-latest.osm.pbf"
OSM_FILE_US_CALIFORNIA="north-america/us/california-latest.osm.pbf"
OSM_FILE_US_DC="north-america/us/district-of-columbia-latest.osm.pbf"
OSM_KEYS="aerialway,highway,public_transport,railway,route"
User expired's avatar
User expired committed

############################
# Function definitions     #
############################

function fn_create_synthetic_networks() {
	if $AVAILABLE_ISOCHRONE_TOOLS; then
		echo "Importing data for synthetic networks"

		# GridNetwork
		java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.GridNetworkGenerator -d 100 -l 60 > "$WORKING_DIR/generate_gridNetwork.log" 2>&1
			if [ $? -ne 0 ]; then
			printf "%s\n" "[ERROR] An error occurred while creating the grid network. The process will exit now with status $?" 
			exit $?
		fi
		java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t grid_s100 -d "$DENSITY" >> "$WORKING_DIR/generate_gridNetwork.log" 2>&1
		if [ $? -ne 0 ]; then
			printf "%s\n" "[ERROR] An error occurred while creating the node_density table for 'grid_s100' using isochrone-tools. The process will exit now with status $?" 
			exit $?
		fi

		# SpiderNetwork
		java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.SpiderNetworkGenerator -d 6 -lvl 1000 -l 60 > "$WORKING_DIR/generate_spiderNetwork.log" 2>&1
		if [ $? -ne 0 ]; then
			printf "%s\n" "[ERROR] An error occurred while creating the spider network. The process will exit now with status $?" 
			exit $?
		fi
		java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t spider_l1000 -d "$DENSITY" >> "$WORKING_DIR/generate_spiderNetwork.log" 2>&1
		if [ $? -ne 0 ]; then
			printf "%s\n" "[ERROR] An error occurred while creating the node_density table for 'spirder_l1000' using isochrone-tools. The process will exit now with status $?" 
			exit $?
		fi
	fi
}

User expired's avatar
User expired committed
function fn_import_dataset() {
	local SRID="$2"
	echo "Importing data for region of $NAME (EPSG:${SRID})"
	if $IMPORT_DATA_POSTGIS; then
		echo "  - importing into postgis"
		fn_import_dataset_postgis "$NAME" "$SRID"
	fi
		echo "  - importing into neo4j"
		fn_import_dataset_neo4j_spatial "$NAME"
User expired's avatar
User expired committed
}

function fn_import_dataset_neo4j_spatial() {
	local NAME="$1"

	local CITY=${NAME// /}
	CITY=${CITY,,}
	# We copy data from postgis for specified dataset (after postgis data import)
	java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.exporter.ImportDataNeo4j -t "${CITY}" > "$WORKING_DIR/import_neo4j_$CITY.log" 2>&1
	if [ $? -ne 0 ]; then
		printf "%s\n" "[ERROR] An error occurred while importing data into neo4j using isochrone-tools. The process will exit now with status $?" 
		exit $?
	fi
User expired's avatar
User expired committed
function fn_import_dataset_postgis() {
	local NAME="$1"
	local SRID="$2"
	local SQL_EXPORT_FILE="${NAME,,}_export_${SRID}.sql.gz"
User expired's avatar
User expired committed
	local CITY=${NAME// /}
	CITY=${CITY,,}
User expired's avatar
User expired committed
	if ! $UPDATE_DATA; then
		echo "  - downloading SQL export"
		fn_download_newer $DOWNLOAD_DIR/$SQL_EXPORT_FILE "${CACHE_DIR_REMOTE_DATA}${SQL_EXPORT_FILE}"
User expired's avatar
User expired committed
		cp $DOWNLOAD_DIR/$SQL_EXPORT_FILE $DATA_DIR/$SQL_EXPORT_FILE >> /dev/null 2>&1
User expired's avatar
User expired committed
		echo "  - importing SQL export into PostGIS database"
		gunzip -c $DATA_DIR/$SQL_EXPORT_FILE | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost "$PG_DB_NAME" >> "$WORKING_DIR/import_datamodel_$CITY_$SRID.log" 2>&1
User expired's avatar
User expired committed
		echo "  - getting table permissions"
		tables=`PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "SELECT tablename FROM pg_tables WHERE schemaname = 'public' AND tableowner = 'postgres';" "$PG_DB_NAME"`
User expired's avatar
User expired committed
		echo "  - fixing table permissions"
		for tbl in $tables; do
			PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "ALTER TABLE $tbl OWNER TO $PG_DB_USER" "$PG_DB_NAME"
User expired's avatar
User expired committed
		done
	fi
User expired's avatar
User expired committed
	if $UPDATE_DATA && [ -f "$DATA_DIR/$SQL_EXPORT_FILE" ]; then
		echo "  - deleting outdated SQL export (forced data update)"
		rm -rf "$DATA_DIR/$SQL_EXPORT_FILE"
	fi

	if $AVAILABLE_ISOCHRONE_DATAMODEL && [ ! -f "$DATA_DIR/$SQL_EXPORT_FILE" ]; then
		echo "  - creating datamodel using isochrone-datamodel"
		# Create datamodel using isochrone-datamodel project
		DB_USERNAME="$PG_DB_USER" DB_PASSWORD="$PG_DB_PASSWORD" "$DEPLOY_DIR/isochrone-datamodel/osmPti2mmds.sh" -d -s -b -l -t${SRID} -c${CITY} >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1
		if [ $? -ne 0 ]; then
			printf "%s\n" "[ERROR] An error occurred while creating datamodel using isochrone-datamodel. The process will exit now with status $?" 
			exit $?
		fi
User expired's avatar
User expired committed

		echo "  - copying tables to isochrone database"
		# If working with multiple TARGET_SRIDs we have to delete an eventually existing nodes_density table here, so on import later a DROP TABLE ${CITY}_nodes works wihtout CASCADE (pg_dump does not use CASCADE)
		PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -d isochrone -c "DROP TABLE IF EXISTS ${CITY}_nodes_density" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1
		PGPASSWORD="spatial" pg_dump -U spatial -h localhost -d spatial --clean --if-exists --no-privileges --no-owner -t "transformed.${CITY}_*"  | sed -e "s/transformed/public/g" | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost -d isochrone >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1
User expired's avatar
User expired committed
	fi

	if $AVAILABLE_POSTGIS && $AVAILABLE_ISOCHRONE_TOOLS; then
		java -cp "${DEPLOY_DIR}/isochrone-tools.jar" "at.uibk.dbis.isochrone.generator.density.DensityGenerator" -t "${CITY}" -d "${DENSITY}" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1
		if [ $? -ne 0 ]; then
			printf "%s\n" "[ERROR] An error occurred while creating the node_density table for '${CITY}' using isochrone-tools. The process will exit now with status $?" 
			exit $?
		fi

	if $AVAILABLE_ISOCHRONE_DATAMODEL; then
		# Exporting resulting database to $DATA_DIR
		echo "  - exporting database dump"
		PGPASSWORD="$PG_DB_PASSWORD" pg_dump -U "$PG_DB_USER" -h localhost -d isochrone --clean --compress=5 --if-exists -t "${CITY}_*" --file="$DATA_DIR/$SQL_EXPORT_FILE" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1
User expired's avatar
User expired committed
}

function fn_init_geoserver() {
	# TODO: Do we really want to import data from OSM, so that it is available in geoserver (we publish data from within isochron-web to geoserver, but why should we do that with the raw data -> pgRouing comparison or similar?)
	# TODO: Needs testing
	if $AVAILABLE_NEO4J_SPATIAL; then
		echo "Configuring neo4j data permissions for geoserver"
		chown -R $TOMCAT_USER:$TOMCAT_USER $DEPLOY_DIR/neo4j/data/graph.db
	fi

	echo "Preparing data from OpenStreetMap to be imported into geoserver"
	fn_filter_osm_data "Bolzano" "${OSM_FILE_ITALY}"
	fn_filter_osm_data "Innsbruck" "${OSM_FILE_AUSTRIA}"
	fn_filter_osm_data "Salzburg" "${OSM_FILE_AUSTRIA}"
	fn_filter_osm_data "SanFrancisco" "${OSM_FILE_US_CALIFORNIA}"
	fn_filter_osm_data "WashingtonDC" "${OSM_FILE_US_DC}"
	if $DEPLOY_ALL_DATASETS; then
#		fn_filter_osm_data "Italy" "${OSM_FILE_ITALY}"
		fn_filter_osm_data "AltoAdige" "${OSM_FILE_ITALY}"
	fi

	# TODO: We need to import the data into the database before issuing to geoserver that there are tables available

	echo "Configuring geoserver $GEOSERVER_VERSION using geoserver-shell $GEOSERVER_SHELL_VERSION"
	sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_ws.gs >> setup_geoserver_workspace.log 2>&1
	sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_styles.gs >> setup_geoserver_styles.log 2>&1
	cp $SHARED_IMG_DIR/* $DEPLOY_DIR/geoserver/data/styles >> setup_geoserver_styles.log 2>&1

	if $AVAILABLE_NEO4J_SPATIAL; then
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ds.gs >> setup_geoserver_neo4j.log 2>&1
User expired's avatar
User expired committed
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_bolzano.gs >> setup_geoserver_neo4j.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_innsbruck.gs >> setup_geoserver_neo4j.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_sanfrancisco.gs >> setup_geoserver_neo4j.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_washingtondc.gs >> setup_geoserver_neo4j.log 2>&1
		if $DEPLOY_ALL_DATASETS; then
#			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_italy.gs >> setup_geoserver_neo4j.log 2>&1
User expired's avatar
User expired committed
			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_altoadige.gs >> setup_geoserver_neo4j.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft.gs >> setup_geoserver_neo4j.log 2>&1
	fi
	if $AVAILABLE_POSTGIS; then
		# copy geoserver_setup_postgis_ds and replace username/password
		cp $SHARED_CONF_DIR/geoserver_setup_postgis_ds.gs $WORKING_DIR/geoserver_setup_postgis_ds.gs
		sed -i "s/@db_username@/$PG_DB_USER/" $WORKING_DIR/geoserver_setup_postgis_ds.gs
		sed -i "s/@db_password@/$PG_DB_PASSWORD/" $WORKING_DIR/geoserver_setup_postgis_ds.gs

		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $WORKING_DIR/geoserver_setup_postgis_ds.gs >> setup_geoserver_postgis.log 2>&1
User expired's avatar
User expired committed
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_bolzano.gs >> setup_geoserver_postgis.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_innsbruck.gs >> setup_geoserver_postgis.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_sanfrancisco.gs >> setup_geoserver_postgis.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_washingtondc.gs >> setup_geoserver_postgis.log 2>&1
		if $DEPLOY_ALL_DATASETS; then
#			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_italy.gs >> setup_geoserver_postgis.log 2>&1
User expired's avatar
User expired committed
			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_altoadige.gs >> setup_geoserver_postgis.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft.gs >> setup_geoserver_postgis.log 2>&1
User expired's avatar
User expired committed
}

function fn_import_spatialite() {
	local SRID="$1"

	SPATIALITE_FILENAME="isochrone_${SRID}.spatialite"
User expired's avatar
User expired committed
	if ! $UPDATE_DATA; then
		echo "Importing spatialite database"
		fn_download_newer $DOWNLOAD_DIR/$SPATIALITE_FILENAME "${CACHE_DIR_REMOTE_DATA}${SPATIALITE_FILENAME}"
User expired's avatar
User expired committed
		cp -f "$DOWNLOAD_DIR/$SPATIALITE_FILENAME" "$DATA_DIR/"
	fi
	
	if $UPDATE_DATA && [ -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then
		echo "Deleting outdated spatialite database (forced data update)"
		rm -rf "$DATA_DIR/$SPATIALITE_FILENAME"
	fi
	
	if $AVAILABLE_GDAL && [ ! -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then
		echo "Exporting PostGIS data into spatialite database" 
		$DEPLOY_DIR/gdal/apps/ogr2ogr --config PG_LIST_ALL_TABLES YES --config PG_SKIP_VIEWS YES -progress -f "SQLite" "$DATA_DIR/$SPATIALITE_FILENAME" PG:"host=localhost dbname=isochrone user=$PG_DB_USER password=$PG_DB_PASSWORD" -lco LAUNDER=yes -dsco SPATIALITE=yes -lco SPATIAL_INDEX=yes -gt 65536  >> "$WORKING_DIR/import_spatialite.log" 2>&1
User expired's avatar
User expired committed
	fi
User expired's avatar
User expired committed
	if [ -f "$DATA_DIR/$SPATIALITE_FILENAME" ]; then
		echo "  - setting spatialite database permissions"
		chown -R apache:apache "$DATA_DIR/$SPATIALITE_FILENAME"
		chmod -R -g+rwX "$DATA_DIR/$SPATIALITE_FILENAME"
	else
		echo "  - spatialite database could not be created!" >&2
User expired's avatar
User expired committed
}

function fn_filter_osm_data() {
	local NAME="$1"
	local OSM_FILE="$2"
	local CITY=${NAME// /}
	CITY=${CITY,,}
	local OSM_FILE_FILTERED="${CITY}_filtered.osm"

	if $UPDATE_DATA && [ -f "$DATA_DIR/$OSM_FILE_FILTERED" ]; then
		echo "  - deleting outdated osm export (forced data update)"
		rm -rf "$DATA_DIR/$OSM_FILE_FILTERED"
	fi

	if [ ! -f "$DATA_DIR/$OSM_FILE_FILTERED" ]; then
		echo "  - downloading cached OpenStreetMap file"
		fn_download_newer $DOWNLOAD_DIR/$OSM_FILE_FILTERED $CACHE_DIR_REMOTE_DATA/$OSM_FILE_FILTERED
		cp $DOWNLOAD_DIR/$OSM_FILE_FILTERED $DATA_DIR/$OSM_FILE_FILTERED >> /dev/null 2>&1
	fi

	if [ ! -f $DATA_DIR/$OSM_FILE_FILTERED ]; then
		echo "  - downloading OpenStreetMap file (no cached file found)"
		OSM_FILENAME=$DOWNLOAD_DIR/${OSM_FILE##*/}
		BOUNDING="${CITY_POLYGON_DIR}/${city}.poly"

		fn_download_newer $OSM_FILENAME $OSM_DOWNLOAD_MIRROR/$OSM_FILE

		if [[ "$BOUNDING" == *.poly ]]; then
			echo "  - filtering OpenStreetMap file"
			$DEPLOY_DIR/osmosis/bin/osmosis --read-pbf file="$OSM_FILENAME" --bounding-polygon file="$BOUNDING_POLYGON" --way-key keyList="$OSM_KEYS" --used-node --write-xml file="$DATA_DIR/$OSM_FILE_FILTERED" >> "filter_osmData_$NAME.log" 2>&1
		fi
	fi
}


User expired's avatar
User expired committed
################
# Data import  #
################

if $DEPLOY_ANY_DATASET; then
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
	echo " Importing datasets ($(date +%H:%M:%S)):"
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
	START=$(date +%s)
User expired's avatar
User expired committed

	mkdir -p $DOWNLOAD_DIR
	mkdir -p $WORKING_DIR
	cd "$WORKING_DIR"
User expired's avatar
User expired committed

	# Generate synthetic datasets (and densities for them) -> they are always created from scratch (never cached)
	fn_create_synthetic_networks
	SRID_ARR=(${TARGET_SRID//,/ })
	for CURRENT_SRID in "${SRID_ARR[@]}"; do
		# Import real world datasets
		fn_import_dataset "Bolzano" "$CURRENT_SRID"
		fn_import_dataset "Innsbruck" "$CURRENT_SRID"
		fn_import_dataset "Salzburg" "$CURRENT_SRID"
		fn_import_dataset "SanFrancisco" "$CURRENT_SRID"
		fn_import_dataset "WashingtonDC" "$CURRENT_SRID"
		if $DEPLOY_ALL_DATASETS; then
#			fn_import_dataset "Italy" "$CURRENT_SRID"
			fn_import_dataset "AltoAdige" "$CURRENT_SRID"
		fi
		# Not importing datasets one-by-one into spatialite -> we copy data from postgis (after postgis data import)
		if $IMPORT_DATA_SPATIALITE; then
			fn_import_spatialite "$CURRENT_SRID"
		fi
	done
	# After data import we have to initialize geoserver layers (if geoserver is used)
	if $AVAILABLE_GEOSERVER; then
		fn_init_geoserver
	END=$(date +%s)
	TOTAL=$(( $END - $START ))
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
	echo " Datasets imported"
	printf ' Time to import the datasets: %dh:%dm:%ds\n' $(($TOTAL/3600)) $(($TOTAL%3600/60)) $(($TOTAL%60))
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
fi