File: split_mbox.sh

package info (click to toggle)
mairix 0.24-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, buster, forky, sid, trixie
  • size: 1,376 kB
  • sloc: ansic: 13,318; sh: 1,035; yacc: 185; makefile: 143; lex: 87; perl: 34
file content (76 lines) | stat: -rwxr-xr-x 2,478 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash

set -e

FORMAIL=formail

SPLIT_MBOX_DIR=

#--------------------------------------------------

error () {
    echo "**Error:**" "$@" >&2
    exit 1
}

#--------------------------------------------------

while [ $# -ge 1 ]
do
    case "$1" in
	"--split-to" )
	    [[ $# -ge 2 ]] || error "--split-to requires a further parameter: the directory for the split mboxen"
	    SPLIT_MBOX_DIR="$2"
	    shift
	    ;;
	* )
	    [[ $# -le 1 ]] || error "Further paramaters follow the mbox parameter"
	    MBOX_FILE="$1"
	    ;;
	esac
    shift
done

#--------------------------------------------------

[[ ! -z "$MBOX_FILE" ]] || error "No mbox file given"
[[ -e "$MBOX_FILE" ]] || error "The given mbox file \"${MBOX_FILE}\" does not exist"
[[ ! -d "$MBOX_FILE" ]] || error "The given file \"${MBOX_FILE}\" should be an mbox, but is a directory"

#--------------------------------------------------

if [ -z "$SPLIT_MBOX_DIR" ]
then
    SPLIT_MBOX_DIR="$(echo "$MBOX_FILE" | sed -e 's@\(\(.*/\|^\)mbox\)/@\1_split/@')"

    [[ "$MBOX_FILE" != "$SPLIT_MBOX_DIR" ]] || error "Determining directory for split mbox failed"

    [[ ! -e "$SPLIT_MBOX_DIR" ]] || error "The directory for the split mboxen \"${SPLIT_MBOX_DIR}\" already exists"
fi

mkdir -p "$SPLIT_MBOX_DIR"

PART=0 # the suffix number for the split files
PREVIOUS_SEPARATION_POS=1 # the position of the newline before the mbox From
  # separator for the previous message. We initialize it to the beginning of
  # the file to get a good start (the first Message does not have a prepented
  # newline).
LINE_OFFSET=2 # the parameter passed to tail to strip the correct number of lines.
  # This is two for the first iteration, and three for the following iterations

# SEPARATION_POS is the byte offset rom the beginning of the file to a newline
# before a "From" mbox separator. We add the file size as final separation
# point to not omit the last message of the mbox.
for SEPARATION_POS in $( grep -b -B 1 '^From[[:space:]]' "$MBOX_FILE" | grep '^[0-9]*-$' | sed -e 's/-//' ) $(($(stat "-c%s" "$MBOX_FILE")-1))
do

    # We first chop off the part after the message, then the part before the
    # message and finally "tail" away the remains the the mbox format.
    head -c $SEPARATION_POS "$MBOX_FILE" | \
	tail -c $((SEPARATION_POS-PREVIOUS_SEPARATION_POS)) | \
	tail -n +${LINE_OFFSET} >"${SPLIT_MBOX_DIR}"/part."$PART"

    PART=$((PART+1))
    PREVIOUS_SEPARATION_POS=$SEPARATION_POS
    LINE_OFFSET=3
done