File: cleanup_text.sh

package info (click to toggle)
rocblas 5.5.1%2Bdfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 565,372 kB
  • sloc: cpp: 198,491; python: 44,792; f90: 25,111; sh: 24,429; asm: 8,954; xml: 222; makefile: 147; ansic: 107; awk: 14
file content (183 lines) | stat: -rwxr-xr-x 4,781 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/bin/bash

# Script to clean up RST and other text files
# By Lee Killough

export PATH=/usr/bin:/bin

files=
rstcode=0
ascii=0
trailing=0
trailing_after=0

main()
{
    parse_args "$@"
    cleanup
}

cleanup()
{
    # iconv command to translate UTF8 to ASCII
    iconv="/usr/bin/iconv -s -f utf-8 -t ascii//TRANSLIT"

    set -ex

    git ls-files -z --exclude-standard "$files" | while read -rd '' file; do
        # Operate only on regular files of MIME type text/*
        if [[ -f $file && "$(file -b --mime-type "$file")" == text/* ]]; then
            # Add missing newline to end of file
            sed -i -e '$a\' "$file"

            # Remove trailing whitespace at end of lines
            if [[ $trailing -ne 0 ]]; then
                sed -i -e 's/[[:space:]]*$//' "$file"
            elif [[ $trailing_after -ne 0 ]]; then
                perl -pi -e 's/\S\K\s+$/\n/' "$file"
            fi

            # Temporary file
            temp=$(mktemp)

            # Replace non-ASCII text and/or RST code line with ASCII equivalents
            if [[ $ascii -ne 0 ]]; then
                $iconv "$file" > "$temp"
            elif [[ $rstcode -ne 0 && $file == *.rst ]]; then
                { set +x; } 2>/dev/null
                echo perl -e '$(rstcode_perl)' "\"$iconv\" \"$file\" > \"$temp\"" >&2
                perl -e "$(rstcode_perl)" "$iconv" "$file" > "$temp"
                set -x
            fi

            # Preserve permissions and add file to Git if updated
            chmod --reference="$file" "$temp"
            mv -f "$temp" "$file"
            git add -u "$file"
        fi
        echo "" >&2
    done

    { set +x; } 2>/dev/null
    git status
    echo " All of the selected files in the repository have been cleaned up."
}

# Parse the command-line arguments
parse_args()
{
    while [[ $# -ne 0 ]]; do
        case "$1" in
            --files)
                shift
                [[ $# -ne 0 ]] || usage
                files="$1"
                ;;
            --rstcode)
                : ${files:=*.rst}
                rstcode=1
                ;;
            --ascii)
                ascii=1
                ;;
            --trailing)
                $trailing=1
                ;;
            --trailing-after)
                ;&
            --trailing_after)
                $trailing_after=1
                ;;
            *)
                usage
        esac
        shift
    done

    [[ -n $files ]] || usage
}

# Perl script to handle code sections of RST files
rstcode_perl()
{
    cat <<'EOF'
use strict;
use warnings;
my ($iconv, $code, $code_indent) = (shift, 0);
while(<>)
{
    my ($indent) = /^(\s*)/;
    if($code) {
      $code = 0 if /\S/ && length($indent) <= length($code_indent);
      open ICONV, "|-", $iconv or die "$!";
      print ICONV;
      close ICONV;
    } else {
      ($code, $code_indent) = (1, $indent) if /::(\s+\S+)?\s*$/;
      print;
    }
}
EOF
}

# Help message
usage()
{
    cat<<EOF
Usage:

    $0
       [ --files <wildcard or path> ]
       [ --rstcode ]
       [ --ascii ]
       [ --trailing | --trailing-after ]

Description:

    Replaces non-ASCII Unicode characters with their ASCII equivalents in
    selected text files, or in the code sections of reStructuredText (RST)
    files.

    Adds missing newlines at the ends of selected text files.

    Optionally removes trailing whitespace at the ends of lines in selected
    text files.

    Code sections of RST files are critically important, because they are
    often copied-and-pasted to a user's terminal, and if they contain
    non-ASCII characters, then they will not work.

Options:

    --files <wildcard or path>

                       Clean up all text files matching wildcard or path,
                       e.g.:

                       --files "*.md"
                       --files "*.rst"
                       --files "*"
                       --files README.md

                       (Wildcard may need to be quoted, to prevent shell
                       wildcard expansion.)

    --rstcode          Clean up only the code sections of selected RST
                       files, or all RST files if --files is not specified.

    --ascii            Replace non-ASCII UTF-8 characters in selected text
                       files with their ASCII equivalents.

    --trailing         Remove trailing whitespace at the ends of lines in
                       selected files. This includes converting CR-LF to LF.

    --trailing-after   Remove trailing whitespace at the ends of lines in
                       selected files, but only after non-space characters.
                       This prevents removing indentation from otherwise
                       blank lines.

EOF
    exit 1
}

main "$@"