#!/bin/bash

set -u
set -e

me="$0"
PACKAGE="dsc-statistics"

logecho() {
  PVALUE="$1"
  shift
  STR="$@"
  logger -p "$PVALUE" "$PACKAGE" -- "me: $STR"
  PRE="$(echo $PVALUE | sed 's/[^\.]*\.\(.*\)/\1/' | tr 'a-z' 'A-Z')"
  if tty -s || [ "$PRE" != "INFO" ] ; then
    echo >&2 "$PRE: $STR"
  fi
}

if ! command -v rsync >/dev/null; then
  logecho daemon.err "rsync not found"
  exit 1
fi

RSYNC_PARALLEL="1"
if [ -e "/etc/default/dsc-statistics-presenter" ]; then
 . /etc/default/dsc-statistics-presenter
fi

usage() {
cat <<EOF
$0 - pull and process dsc data
        Options:
        -h|--help - Show this message
	-r|--remove-source-files - Remove files from source host
	-R|--rsync - Do the actual rsync (internal use only)
	-s|--serial - Do rsyncs serially (to handle known_hosts issues)
EOF
}

function do_rsync {
    NODEDIR=$1
    mkdir -p $NODEDIR/incoming
    cd $NODEDIR
    USER=""
    REMOTEHOST=""
    UPLOADDIR=""
    SSHKEY=""
    RSYNCLOCOPTS=""
    if [ -e "./config" ]; then
      . ./config
      if [ -n "$REMOTEHOST" ]; then
        if ! rsync $RSYNCOPTS $RSYNCLOCOPTS --rsh="ssh -i $SSHKEY" --archive $USER@$REMOTEHOST:$UPLOADDIR/* $NODEDIR/incoming; then
          logecho daemon.err "rsync from $REMOTEHOST failed with error code $?"
        fi
      else
        logecho daemon.info "ignoring $NODEDIR, no REMOTEHOST set in config"
      fi
    else
      logecho daemon.err "$NODEDIR/config not found"
    fi
}

## Parse commandline
TEMP=$(getopt -n pull-and-process-data \
        -l ,help,remove-source-files,rsync:,serial -- \
        +hrR:s "$@")

if test "$?" != 0; then
        echo "Terminating..." >&2
        exit 1
fi


eval set -- ${TEMP}
RSYNC=""
RSYNCOPTS="${RSYNCOPTS:-}"
REMOVE_SOURCE_FILES=""
SERIAL=""
while test "$1" != "--"; do
        case $1 in
                -h|--help)
                        usage
                        exit 0
                ;;
                -v|--verbose)
                        verbose=yes
                ;;
                -r|--remove-source-files)
                        REMOVE_SOURCE_FILES="--remove-source-files"
                ;;
		-R|--rsync)
			shift
			RSYNC="$1"
		;;
		-s|--serial)
			SERIAL="1"
		;;
	 esac
	 shift
done
shift

if [ "$REMOVE_SOURCE_FILES" = "--remove-source-files" ]; then
	RSYNCOPTS="$RSYNCOPTS $REMOVE_SOURCE_FILES"
fi

if [ -n "$RSYNC" ]; then
  do_rsync $RSYNC
  exit 0
fi

if [ "$REMOVE_SOURCE_FILES" != "--remove-source-files" ]; then
       logecho daemon.warning "WARN: test mode, will not remove source files"
fi

TIME0="$(date +%s)"

LOCKFILE="/var/run/dsc-statistics-presenter/pull-and-process-data.lock"

TIME1="$(date +%s)"

if ! dotlockfile -p -l "$LOCKFILE"; then
	logecho daemon.err "lock not obtained (after $(( $TIME1 - $TIME0 )) ), aborting"
        exit 1
fi
LOCKED=yes

CONFFILE="/etc/dsc-statistics/dsc-grapher.cfg"

BASEDIR="/var/lib/dsc-statistics"
DATADIR="$BASEDIR/data"

logecho daemon.info "lock obtained (after  $(( $TIME1 - $TIME0 )) secs), start rsync"

NODEDIRS="$(< $CONFFILE grep '^server' | while read dummy SERVER nodes; do
  SERVERDIR="$DATADIR/$SERVER"
  test -L $SERVERDIR && continue;
  test -d $SERVERDIR || continue;
  for NODE in $nodes; do
    NODEDIR="$SERVERDIR/$NODE"
    test -L $NODEDIR && continue;
    test -d $NODEDIR || continue;
    echo "$NODEDIR"
  done
done)"

export RSYNCOPTS

if [ -n "$SERIAL" ]; then
  echo "serialized"
  for nodedir in $NODEDIRS; do
    echo $nodedir
    do_rsync $nodedir
    sleep 2 # allow for Ctrl-C
  done
else
  echo $NODEDIRS | xargs --no-run-if-empty --max-args=1 --max-procs=$RSYNC_PARALLEL $me --rsync
fi

TIME2="$(date +%s)"
logecho daemon.info "end rsync (after $(( $TIME2 - $TIME1 )) secs), starting refile-and-grok"

# run refile-and-grok twice to slowly catch up after outages
/usr/share/dsc-statistics-presenter/refile-and-grok
/usr/share/dsc-statistics-presenter/refile-and-grok

dotlockfile -u "$LOCKFILE" || true

TIME3="$(date +%s)"
logecho daemon.info "end refile-and-grok (after $(( $TIME3 - $TIME2 )) secs), end"

exit 0

# EOF
