File: convert-charset.sh

package info (click to toggle)
usemod-wiki 1.2.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 896 kB
  • sloc: perl: 4,991; sh: 27; makefile: 16
file content (32 lines) | stat: -rw-r--r-- 1,049 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/bin/sh
# (c) 2004 Christoph Berg <cb@df7cb.de>, GNU GPL.
# This script is loosely based on a suggestion by Peter Gervai.
# cb 2004-05-29: first version.

# Use this script to convert the .db files in your wiki if you are switching to
# a different charset.
#
# Note: the usemod wiki data format uses \263 as internal separator. If this
# character appears on your pages, it will not be converted.

SEP=263 # octal 179 (three superior in latin1)
TMPSEP=001
OLDENC=iso-8859-1
NEWENC=utf-8

# Use this script at you own risk! The remaining part is commented out to
# prevent accidental data loss.

# use this section to convert a single page
#[ -f "$1" ] || exit 1
#tr "\\$SEP" "\\$TMPSEP" < "$1" | \
#	iconv -f $OLDENC -t $NEWENC | \
#	tr "\\$TMPSEP" "\\$SEP" > "$1.new"

# use the next section to convert all pages
#[ "$1" ] && cd "$1"
#[ -d page ] || exit 1 # make sure this is a wiki root
#find page keep -name "*.db" \
#	-exec sh -c "cat {} | tr '\\$SEP' '\\$TMPSEP' | \
#	iconv -f $OLDENC -t $NEWENC | \
#	tr '\\$TMPSEP' '\\$SEP' > {}.new" \;