Skip to content
Snippets Groups Projects
importData.sh 11.9 KiB
Newer Older
#! /bin/bash
#############################
# This script imports data (e.g. from the road network of bolzano) that is
# downloaded from osm (OpenStreetMap) and then imported into available
# databases (neo4j and postgresql).
#############################

if [ "$EUID" -ne "0" ]; then
	echo '- This script must be run as root!'
	exit 1
fi

############################
# Variable definitions     #
############################

SCRIPT="$(readlink -f ${BASH_SOURCE[0]})"
SCRIPT_DIR="$(dirname ${SCRIPT})"
if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then
	# If config.properties is not besides the bash-script (most likely because vagrant uploaded it into the guest)
	# we will try to find it in the shared folder
	SCRIPT_DIR="/vagrant/bootstrap"
fi
if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then
	echo '- No variable declarations found (config.properties file not found)!'
	exit 1;
fi
source "${SCRIPT_DIR}/config.sh"

if ! $IMPORT_DATA_OSM && ! $IMPORT_DATA_OSM_CACHED && ! $IMPORT_DATA_SQL; then
	exit 0
fi

fn_arg2string PG_DB_USER "${1}" "${PG_DB_USER}"
fn_arg2string PG_DB_PASSWORD "${2}" "${PG_DB_PASSWORD}"
fn_arg2boolean IS_LOCAL_TEST_DEPLOY "${3}" "${IS_LOCAL_TEST_DEPLOY}"

User expired's avatar
User expired committed
# Configuration (tool versions, ...)

CITY_POLYGON_DIR="${SHARED_CONF_DIR}/cityPolygons"
#OSM_BOUNDING_BZ="top=46.5200 left=11.3000 bottom=46.4500 right=11.3873"
#OSM_BOUNDING_IBK="top=47.3592 left=11.3020 bottom=47.2108 right=11.4554"
OSM_BOUNDING_BZ="${CITY_POLYGON_DIR}/bolzano.poly"
OSM_BOUNDING_IBK="${CITY_POLYGON_DIR}/innsbruck.poly"
OSM_BOUNDING_IT="${CITY_POLYGON_DIR}/italy.poly"
OSM_BOUNDING_SBG="${CITY_POLYGON_DIR}/salzburg.poly"
User expired's avatar
User expired committed
OSM_BOUNDING_SF="${CITY_POLYGON_DIR}/sanfrancsico.poly"
OSM_BOUNDING_ST="${CITY_POLYGON_DIR}/trentinoaltoadige.poly"
OSM_DOWNLOAD_MIRROR="http://download.geofabrik.de"
OSM_FILE_AUSTRIA="europe/austria-140301.osm.pbf"
OSM_FILE_ITALY="europe/italy-140301.osm.pbf"
OSM_FILE_CALIFORNIA="north-america/us/california-140301.osm.pbf"
OSM_FILE_BZ_FILTERED="bozen-140301-filtered.osm"
OSM_FILE_IBK_FILTERED="innsbruck-140301-filtered.osm"
OSM_FILE_IT_FILTERED="italy-140301-filtered.osm"
OSM_FILE_SBG_FILTERED="salzburg-140301-filtered.osm"
User expired's avatar
User expired committed
OSM_FILE_SF_FILTERED="sanfrancisco-140301-filtered.osm"
OSM_FILE_ST_FILTERED="southtyrol-140301-filtered.osm"
OSM_KEYS="aerialway,highway,public_transport,railway,route"
TABLE_PREFIX_BZ="bolzano_"
TABLE_PREFIX_IBK="innsbruck_"
TABLE_PREFIX_IT="italy_"
TABLE_PREFIX_SBG="salzburg_"
TABLE_PREFIX_SF="sanfrancisco_"
TABLE_PREFIX_ST="trentinoaltoadige_"
User expired's avatar
User expired committed

############################
# Function definitions     #
############################

fn_import_data() {
	local NAME="$1"
	local OSM_FILE="$2"
	local OSM_FILE_FILTERED="$3"
	local BOUNDING="$4"
	local SQL_EXPORT_FILE="$5"
	local TABLE_PREFIX="$6"

	local VALID_OSM=false
	local VALID_SQL=false
	if $IMPORT_DATA_SQL && $AVAILABLE_POSTGIS && $IMPORT_SCHEDULES; then
		if [ -n "$SQL_EXPORT_FILE" ]; then
			VALID_SQL=true
		fi
	fi
	if ( $AVAILABLE_NEO4J_SPATIAL ) && ( $IMPORT_DATA_OSM_CACHED || $IMPORT_DATA_OSM ); then
		# osm data does not need to be downloaded (since it would not be imported afterwards)
		VALID_OSM=true
	fi

	echo "Importing data for region of $NAME"
	if ! ( $VALID_SQL || $VALID_OSM ); then
		echo "  - skipping import"
		return 1
	fi

	if $VALID_SQL; then
		echo "  - downloading sql export"
		fn_download_newer $DOWNLOAD_DIR/$SQL_EXPORT_FILE $CACHE_DIR_REMOTE_DATA/$SQL_EXPORT_FILE
		cp $DOWNLOAD_DIR/$SQL_EXPORT_FILE $DATA_DIR/$SQL_EXPORT_FILE >> /dev/null 2>&1

		echo "  - importing sql export into postgis database"
		gunzip -c $DATA_DIR/$SQL_EXPORT_FILE | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost "$PG_DB_NAME" >> "$WORKING_DIR/import_schedule_$NAME.log" 2>&1

		echo "  - getting table permissions"
		tables=`PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "SELECT tablename FROM pg_tables WHERE schemaname = 'public' AND tableowner = 'postgres';" "$PG_DB_NAME"`

		echo "  - fixing table permissions"
		for tbl in $tables ; do
			PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -c "ALTER TABLE $tbl OWNER TO $PG_DB_USER" "$PG_DB_NAME";
		done
	fi

	if ! $VALID_OSM; then
		# sql export has been handeled above -> if no data from osm should be used then we can return here
		return 0
	fi

	if $IMPORT_DATA_OSM_CACHED; then
		echo "  - downloading cached OpenStreetMap file"
		fn_download_newer $DOWNLOAD_DIR/$OSM_FILE_FILTERED $CACHE_DIR_REMOTE_DATA/$OSM_FILE_FILTERED
		cp $DOWNLOAD_DIR/$OSM_FILE_FILTERED $DATA_DIR/$OSM_FILE_FILTERED >> /dev/null 2>&1
	if [ ! -f $DATA_DIR/$OSM_FILE_FILTERED ]; then
		if $IMPORT_DATA_OSM; then
			echo "  - downloading OpenStreetMap file (no cached file found)"
			OSM_FILENAME=$DOWNLOAD_DIR/${OSM_FILE##*/}
			fn_download_newer $OSM_FILENAME $OSM_DOWNLOAD_MIRROR/$OSM_FILE
	
			echo "  - filtering OpenStreetMap data"
			if [[ "$BOUNDING" == *.poly ]]; then
				$DEPLOY_DIR/osmosis/bin/osmosis --read-pbf file="$OSM_FILENAME" --bounding-polygon file="$BOUNDING" --way-key keyList="$OSM_KEYS" --used-node --write-xml file="$DATA_DIR/$OSM_FILE_FILTERED" >> "initialize_osmData_$NAME.log" 2>&1
				$DEPLOY_DIR/osmosis/bin/osmosis --read-pbf file="$OSM_FILENAME" --bounding-box $BOUNDING --way-key keyList="$OSM_KEYS" --used-node --write-xml file="$DATA_DIR/$OSM_FILE_FILTERED" >> "initialize_osmData_$NAME.log" 2>&1
			fi
		else
			echo "  - problems with downloading cached osm file... this will most likely lead cause errors"		
		fi
	fi

	if $AVAILABLE_NEO4J_SPATIAL; then
		echo "  - importing filtered OpenStreetMap data into neo4j-spatial"
		cd $DEPLOY_DIR/neo4j-spatial
		mvn exec:java -Dexec.mainClass=org.neo4j.gis.spatial.osm.OSMImporter -Dexec.args="$DEPLOY_DIR/neo4j/data/graph.db $DATA_DIR/$OSM_FILE_FILTERED" >> $WORKING_DIR/import_osm2neo4j.log 2>&1
		cd $WORKING_DIR
	fi

	return 0
}

################
# Data import  #
################

echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo " Importing datasets ($(date +%H:%M:%S)):"
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

START=$(date +%s)

mkdir -p $DOWNLOAD_DIR
mkdir -p $WORKING_DIR
cd $WORKING_DIR

fn_import_data "Bozen" "$OSM_FILE_ITALY" "$OSM_FILE_BZ_FILTERED" "$OSM_BOUNDING_BZ" "bolzano_export.sql.gz" "$TABLE_PREFIX_BZ"
fn_import_data "Innsbruck" "$OSM_FILE_AUSTRIA" "$OSM_FILE_IBK_FILTERED" "$OSM_BOUNDING_IBK" "innsbruck_export.sql.gz" "$TABLE_PREFIX_IBK"
fn_import_data "Salzburg" "$OSM_FILE_AUSTRIA" "$OSM_FILE_SBG_FILTERED" "$OSM_BOUNDING_SBG" "salzburg_export.sql.gz" $TABLE_PREFIX_SBG
fn_import_data "San Francisco" "$OSM_FILE_CALIFORNIA" "$OSM_FILE_SF_FILTERED" "$OSM_BOUNDING_SF" "sanfrancisco_export.sql.gz" $TABLE_PREFIX_SF
#	fn_import_data "Italy" "$OSM_FILE_ITALY" "$OSM_FILE_IT_FILTERED" "$OSM_BOUNDING_IT" "italy_export.sql.gz" $TABLE_PREFIX_IT
#	fn_import_data "Alto Adige" "$OSM_FILE_ITALY" "$OSM_FILE_ST_FILTERED" "$OSM_BOUNDING_ST" "trentinoaltoadige_export.sql.gz" $TABLE_PREFIX_ST
# Generate synthetic datasets (and densities for them)
if $AVAILABLE_POSTGIS && $AVAILABLE_ISOCHRONE_TOOLS; then
	java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.GridNetworkGenerator -d 100 -l 60 > "$WORKING_DIR/generate_gridNetwork.log" 2>&1
	java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.SpiderNetworkGenerator -d 6 -lvl 1000 -l 60 > "$WORKING_DIR/generate_spiderNetwork.log" 2>&1
	java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t grid_s100 -d 60,120,180,240,300 >> "$WORKING_DIR/generate_gridNetwork.log" 2>&1
	java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t spider_l1000 -d 60,120,180,240,300 >> "$WORKING_DIR/generate_spiderNetwork.log" 2>&1
fi

if $AVAILABLE_GEOSERVER; then
	if $AVAILABLE_NEO4J_SPATIAL; then
		echo "Configuring neo4j data permissions for geoserver"
		chown -R $TOMCAT_USER:$TOMCAT_USER $DEPLOY_DIR/neo4j/data/graph.db
	fi

	echo "Configuring geoserver $GEOSERVER_VERSION using geoserver-shell $GEOSERVER_SHELL_VERSION"
	sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_ws.gs >> setup_geoserver_workspace.log 2>&1
	sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_styles.gs >> setup_geoserver_styles.log 2>&1
	cp $SHARED_IMG_DIR/* $DEPLOY_DIR/geoserver/data/styles >> setup_geoserver_styles.log 2>&1

	if $AVAILABLE_NEO4J_SPATIAL; then
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ds.gs >> setup_geoserver_neo4j.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_bz.gs >> setup_geoserver_neo4j.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_ibk.gs >> setup_geoserver_neo4j.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_sf.gs >> setup_geoserver_neo4j.log 2>&1
		if ! $IS_LOCAL_TEST_DEPLOY; then
#			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_it.gs >> setup_geoserver_neo4j.log 2>&1
#			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft_st.gs >> setup_geoserver_neo4j.log 2>&1
			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_neo4j_ft.gs >> setup_geoserver_neo4j.log 2>&1
		fi
	fi
	if $AVAILABLE_POSTGIS; then
		# copy geoserver_setup_postgis_ds and replace username/password
		cp $SHARED_CONF_DIR/geoserver_setup_postgis_ds.gs $WORKING_DIR/geoserver_setup_postgis_ds.gs
		sed -i "s/@db_username@/$PG_DB_USER/" $WORKING_DIR/geoserver_setup_postgis_ds.gs
		sed -i "s/@db_password@/$PG_DB_PASSWORD/" $WORKING_DIR/geoserver_setup_postgis_ds.gs

		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $WORKING_DIR/geoserver_setup_postgis_ds.gs >> setup_geoserver_postgis.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_bz.gs >> setup_geoserver_postgis.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_ibk.gs >> setup_geoserver_postgis.log 2>&1
		sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_sf.gs >> setup_geoserver_postgis.log 2>&1
		if ! $IS_LOCAL_TEST_DEPLOY; then
#			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_it.gs >> setup_geoserver_postgis.log 2>&1
#			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft_st.gs >> setup_geoserver_postgis.log 2>&1
			sh $DEPLOY_DIR/gs-shell/bin/gs-shell --cmdfile $SHARED_CONF_DIR/geoserver_setup_postgis_ft.gs >> setup_geoserver_postgis.log 2>&1
		fi
	fi
fi

if $AVAILABLE_GDAL; then
	if [ -f $DATA_DIR/$OSM_FILE_FILTERED ]; then
		echo "Deleting outdated spatialite database"
		rm -rf $DATA_DIR/isochrone.db
	fi

	echo "Exporting PostGIS data into spatialite database" 
	$DEPLOY_DIR/gdal/apps/ogr2ogr --config PG_LIST_ALL_TABLES YES --config PG_SKIP_VIEWS YES -progress -f "SQLite" "$DATA_DIR/isochrone.db" PG:"host=localhost dbname=isochrone user=$PG_DB_USER password=$PG_DB_PASSWORD" -lco LAUNDER=yes -dsco SPATIALITE=yes -lco SPATIAL_INDEX=yes -gt 65536  >> setup_spatialite_neo4j.log 2>&1

	echo "  - setting spatialite database permissions"
	chown -R apache:apache "$DATA_DIR/isochrone.db"
	chmod -R -g+rwX "$DATA_DIR/isochrone.db"
elif $AVAILABLE_SPATIALITE_ADMIN; then
	echo "Importing spatialite database"
	fn_download_newer $DOWNLOAD_DIR/isochrone.db $CACHE_DIR_REMOTE_DATA/201505_spatialite.db
	cp -f $DOWNLOAD_DIR/isochrone.db $DATA_DIR/

	echo "  - setting spatialite database permissions"
	chown -R apache:apache $DATA_DIR/isochrone.db
	chmod -R -g+rwX $DATA_DIR/isochrone.db
fi

END=$(date +%s)
TOTAL=$(( $END - $START ))

echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo " Datasets imported"
printf ' Time to import the datasets: %dh:%dm:%ds\n' $(($TOTAL/3600)) $(($TOTAL%3600/60)) $(($TOTAL%60))
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"