From 6dc7b939f0cacc32a27c7de65a8046e320ba76f2 Mon Sep 17 00:00:00 2001 From: Nikolaus Krismer <nikolaus.krismer@uibk.ac.at> Date: Mon, 25 Jan 2016 00:19:31 +0100 Subject: [PATCH] added script createDataset (in folder etc) to allow creation of datasets on local machine --- CHANGELOG.md | 1 + README.md | 2 +- bootstrap/config.sh | 11 ++-- bootstrap/importData.sh | 42 +++++++++------- etc/.gitignore | 1 + etc/createDataset.sh | 109 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 22 deletions(-) create mode 100644 etc/.gitignore create mode 100755 etc/createDataset.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bc7b9c..cb77b66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ Upcoming version: ----------------- + - added script createDataset (in folder etc) to allow creation of datasets on local machine (Nikolaus Krismer) - re-added some parts for neo4j-spatial and geoserver (Nikolaus Krismer) - fixed parameter order issue (Nikolaus Krismer) - renamed datasets (Nikolaus Krismer) diff --git a/README.md b/README.md index 1eada91..a6eb469 100644 --- a/README.md +++ b/README.md @@ -204,7 +204,7 @@ vagrant up (or TOMCAT_INIT="true" vagrant up) The great thing about this vagrant is that it won't do provisioning again and as a result is much faster (less than 1min for my system) -#### Something went south - what to do +#### Something went south? - what to do If something didn't work out as expected or if you messed things up yourself then things can be easily fixed. Just destroy your backend by typing the following (don't be afraid :-) diff --git a/bootstrap/config.sh b/bootstrap/config.sh index 06f2e37..3a33a7c 100755 --- a/bootstrap/config.sh +++ b/bootstrap/config.sh @@ -3,8 +3,13 @@ # This is used to configure the behavior of the various bootstrap scripts. ############################# +# Prevent double inclusion (so vars can be modifed after first include) +if [ -n "${WORKING_DIR}" ]; then + return +fi + IMPORT_DATA_GEOSERVER=false -IMPORT_DATA_NEO4J_SPATIAL=true +IMPORT_DATA_NEO4J_SPATIAL=false IMPORT_DATA_POSTGIS=true IMPORT_DATA_SPATIALITE=true @@ -40,9 +45,9 @@ TOMCAT_PASSWORD="@tomcat_password@" # Do not change anything below this line! EXEC_POSTGRES="psql" -POSTGRES_SHARE="/usr/share/pgsql" +POSTGRES_SHARE="$(dirname /usr/pgsql*/.)""/share" if [ ! -d ${POSTGRES_SHARE} ]; then - POSTGRES_SHARE="$(dirname /usr/pgsql*/.)""/share" + POSTGRES_SHARE="/usr/share/pgsql" fi AVAILABLE_GDAL=$([ -d "${DEPLOY_DIR}/gdal" ] && echo true || echo false) diff --git a/bootstrap/importData.sh b/bootstrap/importData.sh index 88c56d1..0537a72 100755 --- a/bootstrap/importData.sh +++ b/bootstrap/importData.sh @@ -27,13 +27,14 @@ if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then fi source "${SCRIPT_DIR}/config.sh" -if ! $IMPORT_DATA_NEO4J_SPATIAL && ! $IMPORT_DATA_POSTGIS; then +if ! $IMPORT_DATA_POSTGIS; then exit 0 fi fn_arg2string PG_DB_USER "${1}" "${PG_DB_USER}" fn_arg2string PG_DB_PASSWORD "${2}" "${PG_DB_PASSWORD}" fn_arg2boolean DEPLOY_ALL_DATASETS "${3}" "${DEPLOY_ALL_DATASETS}" +fn_arg2boolean DEPLOY_ANY_DATASETS "${4}" "true" ###################################### # Configuration (tool versions, ...) # @@ -56,13 +57,14 @@ OSM_KEYS="aerialway,highway,public_transport,railway,route" function fn_import_dataset() { local NAME="$1" local SRID="$2" - local SQL_EXPORT_FILE="${NAME,,}_export_${SRID}.sql.gz" echo "Importing data for region of $NAME (EPSG:${SRID})" - echo " - importing into postgis" - fn_import_dataset_postgis "$NAME" "$SRID" "$SQL_EXPORT_FILE" + if $IMPORT_DATA_POSTGIS; then + echo " - importing into postgis" + fn_import_dataset_postgis "$NAME" "$SRID" + fi - if $IMPORT_DATA_SPATIALITE; then + if $IMPORT_DATA_NEO4J_SPATIAL; then echo " - importing into neo4j" fn_import_dataset_neo4j_spatial "$NAME" fi @@ -70,20 +72,19 @@ function fn_import_dataset() { function fn_import_dataset_neo4j_spatial() { local NAME="$1" - local SQL_EXPORT_FILE="$2" local CITY=${NAME// /} CITY=${CITY,,} # We copy data from postgis for specified dataset (after postgis data import) - java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.exporter.ImportDataNeo4j -t "${CITY}" > "$WORKING_DIR/import_neo4j_$CITY.log" 2>&1 + java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.exporter.ImportDataNeo4j -t "${CITY}" > "$WORKING_DIR/import_neo4j_$CITY.log" 2>&1 } function fn_import_dataset_postgis() { local NAME="$1" local SRID="$2" - local SQL_EXPORT_FILE="$3" + local SQL_EXPORT_FILE="${NAME,,}_export_${SRID}.sql.gz" local CITY=${NAME// /} CITY=${CITY,,} @@ -112,22 +113,22 @@ function fn_import_dataset_postgis() { if $AVAILABLE_ISOCHRONE_DATAMODEL && [ ! -f "$DATA_DIR/$SQL_EXPORT_FILE" ]; then echo " - creating datamodel using isochrone-datamodel" # Create datamodel using isochrone-datamodel project - DB_USERNAME="$PG_DB_USER" DB_PASSWORD="$PG_DB_PASSWORD" "$DEPLOY_DIR/isochrone-datamodel/builder.sh" -d -s -b -l -t${SRID} -c${CITY} >> "$WORKING_DIR/create_datamodel_$CITY_$SRID.log" 2>&1 + DB_USERNAME="$PG_DB_USER" DB_PASSWORD="$PG_DB_PASSWORD" "$DEPLOY_DIR/isochrone-datamodel/builder.sh" -d -s -b -l -t${SRID} -c${CITY} >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 echo " - copying tables to isochrone database" # If working with multiple TARGET_SRIDs we have to delete an eventually existing nodes_density table here, so on import later a DROP TABLE ${CITY}_nodes works wihtout CASCADE (pg_dump does not use CASCADE) - PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -d isochrone -c "DROP TABLE IF EXISTS ${CITY}_nodes_density" >> "$WORKING_DIR/create_datamodel_$CITY_$SRID.log" 2>&1 - PGPASSWORD="spatial" pg_dump -U spatial -h localhost -d spatial --clean --if-exists --no-privileges --no-owner -t "transformed.${CITY}_*" | sed -e "s/transformed/public/g" | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost -d isochrone >> "$WORKING_DIR/create_datamodel_$CITY_$SRID.log" 2>&1 + PGPASSWORD="$PG_DB_PASSWORD" psql -qAt -U "$PG_DB_USER" -h localhost -d isochrone -c "DROP TABLE IF EXISTS ${CITY}_nodes_density" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 + PGPASSWORD="spatial" pg_dump -U spatial -h localhost -d spatial --clean --if-exists --no-privileges --no-owner -t "transformed.${CITY}_*" | sed -e "s/transformed/public/g" | PGPASSWORD="$PG_DB_PASSWORD" psql -U "$PG_DB_USER" -h localhost -d isochrone >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 fi if $AVAILABLE_POSTGIS && $AVAILABLE_ISOCHRONE_TOOLS; then - java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t "$CITY" -d "$DENSITY" >> "$WORKING_DIR/create_datamodel_$CITY_$SRID.log" 2>&1 + java -cp "${DEPLOY_DIR}/isochrone-tools.jar" "at.uibk.dbis.isochrone.generator.density.DensityGenerator" -t "${CITY}" -d "${DENSITY}" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 fi if $AVAILABLE_ISOCHRONE_DATAMODEL; then # Exporting resulting database to $DATA_DIR echo " - exporting database dump" - PGPASSWORD="$PG_DB_PASSWORD" pg_dump -U "$PG_DB_USER" -h localhost -d isochrone --clean --compress=5 --if-exists -t "${CITY}_*" --file="$DATA_DIR/$SQL_EXPORT_FILE" >> "$WORKING_DIR/create_datamodel_$CITY_$SRID.log" 2>&1 + PGPASSWORD="$PG_DB_PASSWORD" pg_dump -U "$PG_DB_USER" -h localhost -d isochrone --clean --compress=5 --if-exists -t "${CITY}_*" --file="$DATA_DIR/$SQL_EXPORT_FILE" >> "$WORKING_DIR/create_datamodel_${CITY}_${SRID}.log" 2>&1 fi } @@ -220,9 +221,10 @@ function fn_import_spatialite() { function fn_filter_osm_data() { local NAME="$1" local OSM_FILE="$2" + local CITY=${NAME// /} CITY=${CITY,,} - local OSM_FILE_FILTERED="${city}_filtered.osm" + local OSM_FILE_FILTERED="${CITY}_filtered.osm" if $UPDATE_DATA && [ -f "$DATA_DIR/$OSM_FILE_FILTERED" ]; then echo " - deleting outdated osm export (forced data update)" @@ -254,6 +256,10 @@ function fn_filter_osm_data() { # Data import # ################ +if ! $DEPLOY_ANY_DATASETS; then + return; +fi + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo " Importing datasets ($(date +%H:%M:%S)):" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" @@ -267,10 +273,10 @@ cd "$WORKING_DIR" # Generate synthetic datasets (and densities for them) -> they are always created from scratch (never cached) if $AVAILABLE_POSTGIS && $AVAILABLE_ISOCHRONE_TOOLS; then echo "Importing data for synthetic networks" - java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.GridNetworkGenerator -d 100 -l 60 > "$WORKING_DIR/generate_gridNetwork.log" 2>&1 - java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.SpiderNetworkGenerator -d 6 -lvl 1000 -l 60 > "$WORKING_DIR/generate_spiderNetwork.log" 2>&1 - java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t grid_s100 -d "$DENSITY" >> "$WORKING_DIR/generate_gridNetwork.log" 2>&1 - java -cp $DOWNLOAD_DIR/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t spider_l1000 -d "$DENSITY" >> "$WORKING_DIR/generate_spiderNetwork.log" 2>&1 + java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.GridNetworkGenerator -d 100 -l 60 > "$WORKING_DIR/generate_gridNetwork.log" 2>&1 + java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.SpiderNetworkGenerator -d 6 -lvl 1000 -l 60 > "$WORKING_DIR/generate_spiderNetwork.log" 2>&1 + java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t grid_s100 -d "$DENSITY" >> "$WORKING_DIR/generate_gridNetwork.log" 2>&1 + java -cp ${DEPLOY_DIR}/isochrone-tools.jar at.uibk.dbis.isochrone.generator.density.DensityGenerator -t spider_l1000 -d "$DENSITY" >> "$WORKING_DIR/generate_spiderNetwork.log" 2>&1 fi SRID_ARR=(${TARGET_SRID//,/ }) diff --git a/etc/.gitignore b/etc/.gitignore new file mode 100644 index 0000000..bf0824e --- /dev/null +++ b/etc/.gitignore @@ -0,0 +1 @@ +*.log \ No newline at end of file diff --git a/etc/createDataset.sh b/etc/createDataset.sh new file mode 100755 index 0000000..3534c3f --- /dev/null +++ b/etc/createDataset.sh @@ -0,0 +1,109 @@ +#! /bin/bash +############################# +# This script imports data (e.g. from the road network of bolzano) that is +# downloaded from osm (OpenStreetMap) and then imported into available +# databases (neo4j_spatial, postgis and spatialite). +# +# In difference to importData.sh this only imports a given dataset with the +# given SRIDs. +# +# Note that you need some synlinks in directoy /opt/ (to isochrone-datamodel +# and isochrone-tools project) for this script to work correctly. +############################# + +if [ "$EUID" -ne "0" ]; then + echo '- This script must be run as root!' + exit 1 +fi + +######################################################################## +# Config import (so we can override some things like working directory # +######################################################################## + +SCRIPT="$(readlink -f ${BASH_SOURCE[0]})" +CURRENT_DIR="$(dirname ${SCRIPT})" +SCRIPT_DIR="${CURRENT_DIR}/../bootstrap" +if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then + # If config.properties is not besides the bash-script (most likely because vagrant uploaded it into the guest) + # we will try to find it in the shared folder + SCRIPT_DIR="/vagrant/bootstrap" +fi +if [ ! -f "${SCRIPT_DIR}/config.sh" ]; then + echo '- No variable declarations found (config.properties file not found)!' + exit 1; +fi +source "${SCRIPT_DIR}/config.sh" + +############################ +# Precondition checks # +############################ + + +if ! $AVAILABLE_ISOCHRONE_DATAMODEL; then + echo " - Project folder isochrone-datamodel not present in directory '${DEPLOY_DIR}'" + exit 1 +fi + +if ! $AVAILABLE_ISOCHRONE_TOOLS; then + echo " - Project archive isochrone-tools not present in directory '${DEPLOY_DIR}'" + exit 1 +fi + +if ! $AVAILABLE_POSTGRES; then + echo " - PostgreSQL database not installed on this system" + exit 1 +fi + +if ! $AVAILABLE_POSTGIS; then + echo " - Postgis extension not installed on this system" + exit 1 +fi + +############################ +# Variable definitions # +############################ + +DATASET_SRID="${1}" +DATASET_CITY="${2}" +PG_DB_USER="${3}" +PG_DB_PASSWORD="${4}" +UPDATE_DATA=true +WORKING_DIR="${CURRENT_DIR}" + +source "${SCRIPT_DIR}/importData.sh" "${PG_DB_USER}" "${PG_DB_PASSWORD}" "false" "false" + +################ +# Data import # +################ + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " Creating datasets ($(date +%H:%M:%S)):" +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + +START=$(date +%s) + +mkdir -p "${DATA_DIR}" +mkdir -p "${DOWNLOAD_DIR}" +mkdir -p "${WORKING_DIR}" +cd "${WORKING_DIR}" + +# Delete old log files +find . -type f -name "*.log" -delete + +# Create datasets +CITY_ARR=(${DATASET_CITY//,/ }) +SRID_ARR=(${DATASET_SRID//,/ }) +for CURRENT_CITY in "${CITY_ARR[@]}"; do + for CURRENT_SRID in "${SRID_ARR[@]}"; do + fn_import_dataset_postgis "${CURRENT_CITY}" "${CURRENT_SRID}" + done +done + +END=$(date +%s) +TOTAL=$(( $END - $START )) + +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo " Datasets created in directory '${DATA_DIR}'" +echo " Imports with last srid specified are still present in the database" +printf ' Time to import the datasets: %dh:%dm:%ds\n' $(($TOTAL/3600)) $(($TOTAL%3600/60)) $(($TOTAL%60)) +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -- GitLab