File: pull-and-process-data

package info (click to toggle)
dsc-statistics 201203250530-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 8,696 kB
  • sloc: ansic: 13,636; sh: 10,154; perl: 4,516; cpp: 4,441; makefile: 407
file content (175 lines) | stat: -rwxr-xr-x 4,039 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/bin/bash

set -u
set -e

me="$0"
PACKAGE="dsc-statistics"

logecho() {
  PVALUE="$1"
  shift
  STR="$@"
  logger -p "$PVALUE" "$PACKAGE" -- "me: $STR"
  PRE="$(echo $PVALUE | sed 's/[^\.]*\.\(.*\)/\1/' | tr 'a-z' 'A-Z')"
  if tty -s || [ "$PRE" != "INFO" ] ; then
    echo >&2 "$PRE: $STR"
  fi
}

if ! command -v rsync >/dev/null; then
  logecho daemon.err "rsync not found"
  exit 1
fi

RSYNC_PARALLEL="1"
if [ -e "/etc/default/dsc-statistics-presenter" ]; then
 . /etc/default/dsc-statistics-presenter
fi

usage() {
cat <<EOF
$0 - pull and process dsc data
        Options:
        -h|--help - Show this message
	-r|--remove-source-files - Remove files from source host
	-R|--rsync - Do the actual rsync (internal use only)
	-s|--serial - Do rsyncs serially (to handle known_hosts issues)
EOF
}

function do_rsync {
    NODEDIR=$1
    mkdir -p $NODEDIR/incoming
    cd $NODEDIR
    USER=""
    REMOTEHOST=""
    UPLOADDIR=""
    SSHKEY=""
    RSYNCLOCOPTS=""
    if [ -e "./config" ]; then
      . ./config
      if [ -n "$REMOTEHOST" ]; then
        if ! rsync $RSYNCOPTS $RSYNCLOCOPTS --rsh="ssh -i $SSHKEY" --archive $USER@$REMOTEHOST:$UPLOADDIR/* $NODEDIR/incoming; then
          logecho daemon.err "rsync from $REMOTEHOST failed with error code $?"
        fi
      else
        logecho daemon.info "ignoring $NODEDIR, no REMOTEHOST set in config"
      fi
    else
      logecho daemon.err "$NODEDIR/config not found"
    fi
}

## Parse commandline
TEMP=$(getopt -n pull-and-process-data \
        -l ,help,remove-source-files,rsync:,serial -- \
        +hrR:s "$@")

if test "$?" != 0; then
        echo "Terminating..." >&2
        exit 1
fi


eval set -- ${TEMP}
RSYNC=""
RSYNCOPTS="${RSYNCOPTS:-}"
REMOVE_SOURCE_FILES=""
SERIAL=""
while test "$1" != "--"; do
        case $1 in
                -h|--help)
                        usage
                        exit 0
                ;;
                -v|--verbose)
                        verbose=yes
                ;;
                -r|--remove-source-files)
                        REMOVE_SOURCE_FILES="--remove-source-files"
                ;;
		-R|--rsync)
			shift
			RSYNC="$1"
		;;
		-s|--serial)
			SERIAL="1"
		;;
	 esac
	 shift
done
shift

if [ "$REMOVE_SOURCE_FILES" = "--remove-source-files" ]; then
	RSYNCOPTS="$RSYNCOPTS $REMOVE_SOURCE_FILES"
fi

if [ -n "$RSYNC" ]; then
  do_rsync $RSYNC
  exit 0
fi

if [ "$REMOVE_SOURCE_FILES" != "--remove-source-files" ]; then
       logecho daemon.warning "WARN: test mode, will not remove source files"
fi

TIME0="$(date +%s)"

LOCKFILE="/var/run/dsc-statistics-presenter/pull-and-process-data.lock"

TIME1="$(date +%s)"

if ! dotlockfile -p -l "$LOCKFILE"; then
	logecho daemon.err "lock not obtained (after $(( $TIME1 - $TIME0 )) ), aborting"
        exit 1
fi
LOCKED=yes

CONFFILE="/etc/dsc-statistics/dsc-grapher.cfg"

BASEDIR="/var/lib/dsc-statistics"
DATADIR="$BASEDIR/data"

logecho daemon.info "lock obtained (after  $(( $TIME1 - $TIME0 )) secs), start rsync"

NODEDIRS="$(< $CONFFILE grep '^server' | while read dummy SERVER nodes; do
  SERVERDIR="$DATADIR/$SERVER"
  test -L $SERVERDIR && continue;
  test -d $SERVERDIR || continue;
  for NODE in $nodes; do
    NODEDIR="$SERVERDIR/$NODE"
    test -L $NODEDIR && continue;
    test -d $NODEDIR || continue;
    echo "$NODEDIR"
  done
done)"

export RSYNCOPTS

if [ -n "$SERIAL" ]; then
  echo "serialized"
  for nodedir in $NODEDIRS; do
    echo $nodedir
    do_rsync $nodedir
    sleep 2 # allow for Ctrl-C
  done
else
  echo $NODEDIRS | xargs --no-run-if-empty --max-args=1 --max-procs=$RSYNC_PARALLEL $me --rsync
fi

TIME2="$(date +%s)"
logecho daemon.info "end rsync (after $(( $TIME2 - $TIME1 )) secs), starting refile-and-grok"

# run refile-and-grok twice to slowly catch up after outages
/usr/share/dsc-statistics-presenter/refile-and-grok
/usr/share/dsc-statistics-presenter/refile-and-grok

dotlockfile -u "$LOCKFILE" || true

TIME3="$(date +%s)"
logecho daemon.info "end refile-and-grok (after $(( $TIME3 - $TIME2 )) secs), end"

exit 0

# EOF