File: backup-recipe.sh

package info (click to toggle)
subversion 1.4.2dfsg1-3
  • links: PTS
  • area: main
  • in suites: etch
  • size: 37,284 kB
  • ctags: 32,888
  • sloc: ansic: 406,472; python: 38,378; sh: 15,438; cpp: 9,604; ruby: 8,313; perl: 5,308; java: 4,576; lisp: 3,860; xml: 3,298; makefile: 856
file content (271 lines) | stat: -rwxr-xr-x 9,273 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#!/bin/sh

###########################################################################
#                                                                         #
#  This shell script demonstrates a backup/restore recipe for live        #
#  Subversion repositories, using a standard full+incrementals process.   #
#                                                                         #
#  This script is intended only as an example; the idea is that you       #
#  can read over it, understand how it works (it's extensively commented) #
#  and then implement real backup and restore scripts based on this       #
#  recipe.                                                                #
#                                                                         #
#  To reiterate: this is *not* a backup and restore solution.  It's       #
#  really just documentation, in the form of code with comments.          #
#                                                                         #
#  If you do implement your own scripts based on the recipe here, and     #
#  your implementations are generic enough to be generally useful,        #
#  please post them to dev@subversion.tigris.org.  It would be great if   #
#  we could offer a real solution, and not just a description of one.     #
#                                                                         #
#  This recipe is distilled from the Berkeley DB documentation, see       #
#  http://www.sleepycat.com/docs/ref/transapp/archival.html.              #
#                                                                         #
#  See also http://www.sleepycat.com/docs/ref/transapp/reclimit.html for  #
#  for possible problems using standard 'cp' in this recipe.              #
#                                                                         #
###########################################################################

# High-level overview of the full backup recipe:
# 
#    1. Ask BDB's db_archive for a list of unused log files.
#
#    2. Copy the entire db/ dir to the backup area.
#
#    3. Recopy all the logfiles to the backup area.  There may be more
#       logfiles now than there were when step (1) ran.
#
#    4. Remove the logfiles listed as inactive in step (1) from the
#       repository, though not from the backup.
#    
# High-level overview of the incremental backup recipe:
#
#    1. Just copy the Berkeley logfiles to a backup area.
#    
# High-level overview of the restoration recipe:
#
#    1. Copy all the datafiles and logfiles back to the repository, in
#       the same order they were backed up.
#
#    2. Run Berkeley's "catastrophic recovery" command on the repository.
#
# That's it.  Here we go...

# You might need to customize some of these paths.
SVN=svn
SVNADMIN=svnadmin
SVNLOOK=svnlook
# See http://www.sleepycat.com/docs/utility/db_archive.html:
DB_ARCHIVE=/usr/local/BerkeleyDB.4.2/bin/db_archive
# See http://www.sleepycat.com/docs/utility/db_recover.html:
DB_RECOVER=/usr/local/BerkeleyDB.4.2/bin/db_recover

# This is just source data to generate repository activity.
# Any binary file of about 64k will do, it doesn't have to be /bin/ls.
DATA_BLOB=/bin/ls

# You shouldn't need to customize below here.
SANDBOX=`pwd`/backups-test-tmp
FULL_BACKUPS=${SANDBOX}/full
INCREMENTAL_PREFIX=${SANDBOX}/incremental-logs
RECORDS=${SANDBOX}/records
PROJ=myproj
REPOS=${PROJ}-repos

rm -rf ${SANDBOX}
mkdir ${SANDBOX}
mkdir ${RECORDS}

cd ${SANDBOX}

${SVNADMIN} create --bdb-log-keep ${REPOS}
${SVN} co file://${SANDBOX}/${REPOS} wc

cd wc

# Put in enough data for us to exercise the logfiles.
cp ${DATA_BLOB} ./a1
cp ${DATA_BLOB} ./b1
cp ${DATA_BLOB} ./c1
${SVN} -q add a1 b1 c1
${SVN} -q ci -m "Initial add."

echo "Created test data."

cd ..

# Exercise the logfiles by moving data around a lot.  Note that we
# avoid adds-with-history, since those cause much less Berkeley
# activity than plain adds.
#
# Call this from the parent of wc, that is, with $SANDBOX as CWD.
# Pass one argument, a number, indicating how many cycles of exercise
# you want.  The more cycles, the more logfiles will be generated.
# The ratio is about two cycles per logfile.
function exercise
{
   limit=${1}

   saved_cwd=`pwd`
   cd ${SANDBOX}/wc

   echo ""
   i=1
   while [ ${i} -le ${limit} ]; do
     mv a1 a2
     mv b1 b2
     mv c1 c2
     ${SVN} -q rm a1 b1 c1
     ${SVN} -q add a2 b2 c2
     ${SVN} -q ci -m "Move 1s to 2s, but not as cheap copies."

     mv a2 a1
     mv b2 b1
     mv c2 c1
     ${SVN} -q rm a2 b2 c2
     ${SVN} -q add a1 b1 c1
     ${SVN} -q ci -m "Move 2s back to 1s, same way."

     echo "Exercising repository, pass ${i} of ${limit}."
     i=`dc -e "${i} 1 + p"`
   done
   echo ""

   cd ${saved_cwd}
}

# Generate some logfile activity.
exercise 10

# Do a full backup.
head=`${SVNLOOK} youngest ${REPOS}`
echo "Starting full backup (at r${head})..."
mkdir ${FULL_BACKUPS}
mkdir ${FULL_BACKUPS}/${PROJ}
mkdir ${FULL_BACKUPS}/${PROJ}/repos
mkdir ${FULL_BACKUPS}/${PROJ}/logs
cd ${REPOS}/db
${DB_ARCHIVE} > ${RECORDS}/${PROJ}-full-backup-inactive-logfiles
cd ../..
cp -a ${REPOS} ${FULL_BACKUPS}/${PROJ}/repos/
cd ${REPOS}/db
for logfile in `${DB_ARCHIVE} -l`; do
  # For maximum paranoia, we want repository activity *while* we're
  # making the full backup.
  exercise 5
  cp ${logfile} ${FULL_BACKUPS}/${PROJ}/logs
done
cat ${RECORDS}/${PROJ}-full-backup-inactive-logfiles | xargs rm -f
cd ../..
echo "Full backup completed (r${head} was head when started)."

# Do the incremental backups for a nominal week.
for day in 1 2 3 4 5 6; do
  exercise 5
  head=`${SVNLOOK} youngest ${REPOS}`
  echo "Starting incremental backup ${day} (at r${head})..."
  mkdir ${INCREMENTAL_PREFIX}-${day}
  mkdir ${INCREMENTAL_PREFIX}-${day}/${PROJ}
  cd ${REPOS}/db
  ${DB_ARCHIVE} > ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles
  for logfile in `${DB_ARCHIVE} -l`; do
    # For maximum paranoia, we want repository activity *while* we're
    # making the incremental backup.  But if we did commits with each
    # logfile copy, this script would be quite slow (Fibonacci effect). 
    # So we only exercise on the last two "days" of incrementals.
    if [ ${day} -ge 5 ]; then
      exercise 3
    fi
    cp ${logfile} ${INCREMENTAL_PREFIX}-${day}/${PROJ}
  done
  cat ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles | xargs rm -f
  cd ../..
  echo "Incremental backup ${day} done (r${head} was head when started)."
done

# The last revision a restoration is guaranteed to contain is whatever
# was head at the start of the last incremental backup.
last_guaranteed_rev=${head}

# Make the repository vanish, so we can restore it.
mv ${REPOS} was_${REPOS}

echo ""
echo "Oliver Cromwell has destroyed the repository!  Restoration coming
up..."
echo ""

# Restore.
#
# After copying the full repository backup over, we remove the shared
# memory segments and the dav/* stuff.  Recovery recreates the shmem
# segments, and anything in dav/* is certainly obsolete if we're doing
# a restore.
#
# Note that we use db_recover instead of 'svnadmin recover'.  This is
# because we want to pass the -c ('catastrophic') flag to db_recover.
# As of Subversion 1.0.x, there is no '--catastrophic' flag to
# 'svnadmin recover', unfortunately.
cp -a ${FULL_BACKUPS}/${PROJ}/repos/${REPOS} .
cp -a ${FULL_BACKUPS}/${PROJ}/logs/* ${REPOS}/db
rm -rf ${REPOS}/db/__db*
rm -rf ${REPOS}/dav/*
cd ${REPOS}/db
${DB_RECOVER} -ce
cd ../..
head=`${SVNLOOK} youngest ${REPOS}`
echo ""
echo "(Restored from full backup to r${head}...)"
for day in 1 2 3 4 5 6; do
  cd ${REPOS}/db
  cp ${INCREMENTAL_PREFIX}-${day}/${PROJ}/* .
  ${DB_RECOVER} -ce
  cd ../..
  head=`${SVNLOOK} youngest ${REPOS}`
  echo "(Restored from incremental-${day} to r${head}...)"
done
echo ""
echo "Restoration complete.  All hail the King."

# Verify the restoration.
was_head=`${SVNLOOK} youngest was_${REPOS}`
restored_head=`${SVNLOOK} youngest ${REPOS}`
echo ""
echo "Highest revision in original repository:  ${was_head}"
echo "Highest revision restored:                ${restored_head}"
echo ""
echo "(It's okay if restored is less than original, even much less.)"

if [ ${restored_head} -lt ${last_guaranteed_rev} ]; then
   echo ""
   echo "Restoration failed because r${restored_head} is too low --"
   echo "should have restored to at least r${last_guaranteed_rev}."
   exit 1
fi

# Looks like we restored at least to the minimum required revision.
# Let's do some spot checks, though.

echo ""
echo "Comparing logs up to r${restored_head} for both repositories..."
${SVN} log -v -r1:${restored_head} file://`pwd`/was_${REPOS} > a
${SVN} log -v -r1:${restored_head} file://`pwd`/${REPOS}     > b
if cmp a b; then
  echo "Done comparing logs."
else
  echo "Log comparison failed -- restored repository is not right."
  exit 1
fi

echo ""
echo "Comparing r${restored_head} exported trees from both repositories..."
${SVN} -q export -r${restored_head} file://`pwd`/was_${REPOS} orig-export
${SVN} -q export -r${restored_head} file://`pwd`/${REPOS} restored-export
if diff -q -r orig-export restored-export; then
  echo "Done comparing r${restored_head} exported trees."
else
  echo "Recursive diff failed -- restored repository is not right."
fi

echo ""
echo "Done."