File: link_check.sh

package info (click to toggle)
seqan3 3.4.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,580 kB
  • sloc: cpp: 145,192; sh: 307; xml: 264; javascript: 95; makefile: 70; perl: 29; php: 15
file content (59 lines) | stat: -rwxr-xr-x 1,968 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env bash
# SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
# SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
# SPDX-License-Identifier: BSD-3-Clause
#
# Usage: link_check.sh <SeqAn3 root directory>
# Will output the status of links in the repository.
#
# Of main interest are broken links and those with a "Link STATUS" message.
# Some URLs may not be properly matched by the regex.
#
# The general workflow is to first run the script and then check the non-working links by searching the occurrence
# within the codebase and verifying that they are indeed broken.

COUNT=0
do_check ()
{
    (( COUNT++ ))
    RESPONSE=$(curl --http2 -Is -A 'Mozilla/5.0' "$1") # HTTP2 is the default.
    if ! [[ "${RESPONSE}" =~ ^HTTP.* ]]; then # If this does not work,
        RESPONSE=$(curl --http1.1 -Is -A 'Mozilla/5.0' "$1") # fall back to HTTP1.1.
    fi

    HEADER=($(echo "${RESPONSE}" | head -1)); # May look like: HTTP/2 200
    STATUS=${HEADER[1]}
    echo -n "[${COUNT}] "
    case "$STATUS" in
        200) echo "Link OK         : $1";;
        301) echo "Link PERM MOVED : $1";;
        302) echo "Link TEMP MOVED : $1";;
        404) echo "Link BROKE      : $1";;
        429) sleep 5; do_check "$1";;
        *)   echo "Link STATUS ${STATUS} : $1";;
    esac
}

if [[ $# -ne 1 ]] && [[ $# -ne 2 ]]; then
    echo "Usage: link_check.sh <SeqAn3 root directory> <Skip first n=0 entries>"
    exit 1
fi

if [[ ! -d $1 ]]; then
    echo "The directory $1 does not exist."
    exit 1
fi

if [[ ! -f $1/include/seqan3/version.hpp ]]; then
    echo "The directory $1 does not seem to be the SeqAn3 root directory."
    echo "Cannot find $1/include/seqan3/version.hpp."
    exit 1
fi

SKIP="${2:-0}"
COUNT=$((COUNT + SKIP))

for URL in $(grep -ohr --exclude-dir={.git,.vscode,build} "https*://[a-zA-Z0-9./#+?=_%:-]*[a-zA-Z0-9/#+?=_%:-]" "$1" | sort | uniq | tail -n +"${SKIP}")
do
  do_check "${URL}"
done