File: check-copyright.sh

package info (click to toggle)
rocthrust 6.4.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 13,588 kB
  • sloc: cpp: 66,309; ansic: 34,184; python: 1,519; sh: 331; xml: 212; makefile: 115
file content (221 lines) | stat: -rwxr-xr-x 8,903 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env bash

# Start of configuration
preamble="Copyright *(\([cC]\)|©)? *"
postamble=",? +Advanced +Micro +Devices, +Inc\."
find_pattern="$preamble([0-9]{4}-)?[0-9]{4}$postamble"
# printf format string, receives the current year as a parameter
uptodate_pattern="$preamble([0-9]{4}-)?%d$postamble"
# <pattern>/<replacement> interpreted with sed syntax, also passed to printf
# printf interprets '\' escape sequences so they must be escaped
# The capture groups are as follows:
#   - \1 is the whole preamble text
#   - \3 is the start year, \2 is skipped because it is used for an optional part of the preamble
#   - \5 is the end of the copyright statement after the end year, \4 would be the original end year
#     as written in the file, it is replaced by the current year instead.
replace_pattern="($preamble)([0-9]{4})(-[0-9]{4})?($postamble)/\\\1\\\3-%d\\\5"
# End of configuration

print_help() { printf -- \
"\033[36musuage\033[0m: \033[33mcheck_year.sh [-h] [-u] [-a] [-d <SHA>] [-k] [-v]\033[0m
\033[36mdescription\033[0m: Checks for if the copyright year in the staged files is up to date and displays the files with out-of-date copyright statements. Exits with '0' if successful and with '1' if something is out of date.
\033[36moptions\033[0m:
  \033[34m-h\033[0m       Displays this message.
  \033[34m-u\033[0m       Automatically updates the copyright year
  \033[34m-a\033[0m       Automatically applies applies the changes to current staging environment. Implies '-u' and '-c'.
  \033[34m-c\033[0m       Compare files to the index instead of the working tree.
  \033[34m-d <SHA>\033[0m Compare using the diff of a hash.
  \033[34m-k\033[0m       Compare using the fork point: where this branch and 'remotes/origin/HEAD' diverge.
  \033[34m-q\033[0m       Suppress updates about progress.
  \033[34m-v\033[0m       Verbose output.
Use '\033[33mgit config --local hooks.updateCopyright <true|false>\033[0m' to automatically apply copyright changes on commit.
"
}

# argument parsing
apply=false
update=false
verbose=false
forkdiff=false
quiet=false
cached=false

while getopts "auhvkqcd:" arg; do
    case $arg in
        a) update=true;apply=true;cached=true;;
        u) update=true;;
        v) verbose=true;;
        k) forkdiff=true;;
        q) quiet=true;;
        c) cached=true;;
        d) diff_hash=${OPTARG};;
        h) print_help; exit;;
        *) print help; exit 1;;
    esac
done

# If set, check all files changed since the fork point
if $forkdiff; then
    branch="$(git rev-parse --abbrev-ref HEAD)"
    remote="$(git config --local --get "branch.$branch.remote" || echo 'origin')"
    source_commit="remotes/$remote/HEAD"

    # don't use fork-point for finding fork point (lol)
    diff_hash="$(git merge-base "$source_commit" "$branch")"
fi

if [ -n "${diff_hash}" ]; then
    $verbose && printf -- "Using base commit: %s\n" "${diff_hash}"
else
    diff_hash="HEAD"
fi

# Current year
year="$(date +%Y)"

# Enable rename detection with full matches only, this skips copyright checks for file name only
# changes.
diff_opts=(-z --name-only '--diff-filter=MA' '--find-renames=100%')
git_grep_opts=(-z --extended-regexp --ignore-case --no-recursive -I)
if $cached; then
    diff_opts+=(--cached)
    git_grep_opts+=(--cached)
fi

! $quiet && printf -- "Checking if copyright statements are up-to-date... "
mapfile -d $'\0' changed_files < <(git diff-index "${diff_opts[@]}" "$diff_hash" | LANG=C.UTF-8 sort -z)

if ! (( ${#changed_files[@]} )); then
    ! $quiet && printf -- "\033[32mDone!\033[0m\n"
    $verbose && printf -- "\033[36mNo changed files found.\033[0m\n"
    exit 0
fi;

mapfile -d $'\0' found_copyright < <(                                      \
    git grep "${git_grep_opts[@]}" --files-with-matches -e "$find_pattern" \
        -- "${changed_files[@]}" |                                         \
    LANG=C.UTF-8 sort -z)

outdated_copyright=()
if (( ${#found_copyright[@]} )); then
    # uptodate_pattern variable holds the format string using it as such is intentional
    # shellcheck disable=SC2059
    printf -v uptodate_pattern -- "$uptodate_pattern" "$year"
    mapfile -d $'\0' outdated_copyright < <(                                        \
        git grep "${git_grep_opts[@]}" --files-without-match -e "$uptodate_pattern" \
            -- "${found_copyright[@]}" |                                            \
        LANG=C.UTF-8 sort -z)
fi

! $quiet && printf -- "\033[32mDone!\033[0m\n"
if $verbose; then
    # Compute the files that don't have a copyright as the set difference of
    # `changed_files and `found_copyright`
    mapfile -d $'\0' notfound_copyright < <(                                   \
        printf -- '%s\0' "${changed_files[@]}" |                               \
        LANG=C.UTF-8 comm -z -23 - <(printf -- '%s\0' "${found_copyright[@]}"))

    if (( ${#notfound_copyright[@]} )); then
        printf -- "\033[36mCouldn't find a copyright statement in %d file(s):\033[0m\n" \
            "${#notfound_copyright[@]}"
        printf -- '  - %q\n' "${notfound_copyright[@]}"
    fi

    # Similarly the up-to-date files are the difference of `found_copyright` and `outdated_copyright`
    mapfile -d $'\0' uptodate_copyright < <(                                       \
        printf -- '%s\0' "${found_copyright[@]}" |                                 \
        LANG=C.UTF-8 comm -z -23 - <(printf -- '%s\0' "${outdated_copyright[@]}"))

    if (( ${#uptodate_copyright[@]} )); then
        printf -- "\033[36mThe copyright statement was already up to date in %d file(s):\033[0m\n" \
            "${#uptodate_copyright[@]}"
        printf -- '  - %q\n' "${uptodate_copyright[@]}"
    fi
fi

if ! (( ${#outdated_copyright[@]} )); then
    exit 0
fi

printf -- \
"\033[31m==== COPYRIGHT OUT OF DATE ====\033[0m
\033[36m%d file(s) need(s) to be updated:\033[0m\n" "${#outdated_copyright[@]}"
printf -- '  - %q\n' "${outdated_copyright[@]}"

# If we don't need to update, we early exit.
if ! $update; then
    printf -- \
"\nRun '\033[33mscripts/copyright-date/check-copyright.sh -u\033[0m' to update the copyright statement(s). See '-h' for more info,
or set '\033[33mgit config --local hooks.updateCopyright true\033[0m' to automatically update copyrights when committing.\n"
    exit 1
fi

if $apply; then
    ! $quiet && printf -- "Updating copyrights and staging changes... "
else
    ! $quiet && printf -- "Updating copyrights... "
fi

# replace_pattern variable holds a format string, using it as such is intentional
# shellcheck disable=SC2059
printf -v replace_pattern -- "$replace_pattern" "$year"
# Just update the files in place if only touching the working-tree
if ! $apply; then
    sed --regexp-extended --separate "s/$replace_pattern/g" -i "${outdated_copyright[@]}"
    printf -- "\033[32mDone!\033[0m\n"
    exit 0
fi

generate_patch() {
    # Sed command to create a hunk for a copyright statement fix
    # expects input to be line number then copyright statement on the next line
    to_hunk_cmd="{# Print hunk header, move to the next line
                  s/.+/@@ -&,1 +&,1 @@/;n
                  # Print removed line by prepending '-' to it
                  ;s/^/-/;p
                  # Print added line, replace the '-' with '+' and replace the copyright statement
                  s/^-/+/;s/$replace_pattern/g}"

    # Run file-names through git ls-files, just to get a (possibly) quoted name for each
    mapfile -t -d $'\n' quoted_files < <(git ls-files --cached -- "${outdated_copyright[@]}")
    for ((i = 0;i < ${#outdated_copyright[@]}; i++)); do
        file="${outdated_copyright["$i"]}"
        quoted="${quoted_files["$i"]}"
        # Drop the quote from the start and end (to avoid quoting twice)
        escaped="${quoted#\"}"; escaped="${escaped%\"}"
        a="\"a/$escaped\""
        b="\"b/$escaped\""

        printf -- "diff --git %s %s\n--- %s\n+++ %s\n" "$a" "$b" "$a" "$b"

        # Print line number and line for each line with a copyright statement
        git cat-file blob ":$file" |                               \
            sed --quiet --regexp-extended "/$find_pattern/{=;p}" | \
            sed --regexp-extended "$to_hunk_cmd"
    done
}

patch_file="$(git rev-parse --git-dir)/copyright-fix.patch"
generate_patch > "$patch_file"

# Cleanup patch file when the script exits
finish () {
    rm -f "$patch_file"
}
# The trap will be invoked whenever the script exits, even due to a signal, this is a bash only
# feature
trap finish EXIT

if ! git apply --unidiff-zero < "$patch_file"; then
    printf -- "\033[31mFailed to apply changes to working tree.
Perhaps the fix is already applied, but not yet staged?\n\033[0m"
    exit 1
fi

if ! git apply --cached --unidiff-zero < "$patch_file"; then
    printf -- "\033[31mFailed to apply change to the index.\n\033[0m"
    exit 1
fi

! $quiet && printf -- "\033[32mDone!\033[0m\n"
exit 0