1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
|
#!/usr/bin/env python3
# Apache License, Version 2.0
import os
import io
import argparse
CURRENT_DIR = os.path.abspath(os.path.dirname(__file__))
ROOT_DIR = os.path.normpath(os.path.join(CURRENT_DIR, "..", "..", "manual"))
EDIT_FILE = os.path.join(CURRENT_DIR, "broken_doc_links.txt")
ROLE = ":doc:"
# list of lists as: [file_with_link, path]
to_print = [["# Location of broken link:", "Line:", " Link Target:"]]
def clear_console():
os.system('cls' if os.name == 'nt' else 'clear')
def rst_files(path):
for dirpath, dirnames, filenames in os.walk(path):
if dirpath.startswith("."):
continue
for filename in filenames:
if filename.startswith("."):
continue
ext = os.path.splitext(filename)[1]
if ext.lower() == ".rst":
yield os.path.join(dirpath, filename)
def get_broken_doc_links(fname, anchors='INCLUDE', target_chapter=''):
with open(fname, "r", encoding="utf-8") as f:
lines = f.readlines()
# First get all paths
paths = []
for i, l in enumerate(lines):
if ROLE in l:
links = l.split(ROLE + "`")
# first split item will be "blah blah :doc:`" and not a link path
del links[0]
for path in links:
if "`" in path:
path = path.split("`")[0]
if "<" in path and path.endswith(">"):
# turns "Text <path>" into "path"
path = path.split("<")[-1][:-1]
lfname = path.split('/')[-1]
do_append = False
if anchors in ['ONLY', 'INCLUDE'] and '#' in lfname:
do_append = True
elif anchors == 'INCLUDE' or (anchors == 'IGNORE' and '#' not in lfname):
do_append = True
if do_append:
if path.startswith("/" + target_chapter):
# using i+1 so line number starts from 1
paths.append([path, str(i + 1) + ' '])
# Then check validity
for path in paths:
line = path[1]
path = path[0]
fullp = ROOT_DIR + path + ".rst"
if not os.path.exists(fullp):
to_print.append([fname.replace(ROOT_DIR, ""), line, path])
def check_links(sc='', a='INCLUDE', tc=''):
# Fetch broken links
for fn in rst_files(ROOT_DIR):
src = fn.replace(ROOT_DIR, '').replace('\\', '/')
if src.startswith("/" + sc):
get_broken_doc_links(fn, anchors=a, target_chapter=tc)
# Format it nicely
out = []
pmaxlen = 0 # max string length of path
for l in to_print:
llen = len(l[0])
if llen > pmaxlen:
pmaxlen = llen
lmaxlen = 0 # max string length of line number
for l in to_print:
llen = len(l[1])
if llen > lmaxlen:
lmaxlen = llen
for l in to_print:
p0 = l[0] + (' ' * (pmaxlen - len(l[0]))) # pad with spaces on right
p1 = (' ' * (lmaxlen - len(l[1]))) + l[1] # pad with spaces on left
out.append(' '.join((p0, "::", p1, "::", l[2])))
# Write to file user will edit, and to file that will be compared to.
with open(EDIT_FILE, "w") as f:
f.write('\n'.join(out))
with open(EDIT_FILE + ".orig", "w") as f:
f.write('\n'.join(out))
def fix_links():
with open(EDIT_FILE, "r") as f:
lines = f.read().split('\n')
with open(EDIT_FILE + ".orig", "r") as f:
lines_orig = f.read().split('\n')
for i, line in enumerate(lines):
line_orig = lines_orig[i]
if not line.startswith('#'):
if line != line_orig:
path, lineno, target = line.split('::')
path = path.strip()
lineno = int(lineno.strip()) - 1 # line number starting from 0
target = target.strip()
target_orig = line_orig.split('::')[-1].strip()
fullp = ROOT_DIR + path
with io.open(fullp, "r", encoding="utf-8", newline='') as f:
# newline is empty str to ensure that the original line ending is not changed
flines = f.readlines()
lorig = flines[lineno]
flines[lineno] = flines[lineno].replace('<' + target_orig + '>', '<' + target + '>')
flines[lineno] = flines[lineno].replace('`' + target_orig + '`', '`' + target + '`')
print(lorig + flines[lineno])
with io.open(fullp, "w", encoding="utf-8", newline='') as f:
f.write(''.join(flines))
def auto_fix_links():
with open(EDIT_FILE, "r") as f:
lines = f.read().split('\n')
with open(EDIT_FILE + ".orig", "r") as f:
lines_orig = f.read().split('\n')
success = 0
total = 0
for i, line in enumerate(lines):
line_orig = lines_orig[i]
if not line.startswith('#'):
total += 1
if line == line_orig: # only check lines that the user has not changed
path, lineno, target = line.split('::')
path = path.strip()
lineno = int(lineno.strip()) - 1 # line number starting from 0
target = target.strip()
# check if index exists
fix = ""
possibilities = [target + "/index",
target + "/introduction"]
for p in possibilities:
if os.path.exists(ROOT_DIR + p + ".rst"):
fix = p
success += 1
break
if fix:
fullp = ROOT_DIR + path
with io.open(fullp, "r", encoding="utf-8", newline='') as f:
flines = f.readlines()
lorig = flines[lineno]
flines[lineno] = flines[lineno].replace(
'<' + target + '>', '<' + fix + '>')
flines[lineno] = flines[lineno].replace(
'`' + target + '`', '`' + fix + '`')
print(lorig + flines[lineno])
with io.open(fullp, "w", encoding="utf-8", newline='') as f:
f.write(''.join(flines))
if success == total:
print("\nSuccessfully fixed all links automatically!")
elif success > 0:
print("\nSuccessfully fixed %s links automatically, run this script again to try manually." % success)
else:
print("Failed to fix any links automatically :(")
def main():
parser = argparse.ArgumentParser(
description=(
"An interactive script that can be used to manually "
"or automatically fix broken internal links.\n\n"
"Basic usage:\n"
"- Run \"fix_internal_links.py\"\n"
"- Edit the right column in the text file \"broken_doc_links.txt\"\n"
"- Enter \"done\" at the prompt, all links will then be updated "
"as you changed them in the text file."),
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
'-a', '--auto',
action="store_true",
help="Skip the user input process and\nautomatically try to fix links.",
)
parser.add_argument(
'-ia', '--ignoreanchors',
action="store_true",
help="Ignore links with html anchors at the end.",
)
parser.add_argument(
'-oa', '--onlyanchors',
action="store_true",
help="Only check links with html anchors at the end.",
)
parser.add_argument(
'-sc', '--sourcechapter',
help="Only check files in this chapter.",
)
parser.add_argument(
'-tc', '--targetchapter',
help="Only check for links pointing at this chapter.",
)
args = parser.parse_args()
source_chapter = ''
target_chapter = ''
anchors = 'INCLUDE'
if args.sourcechapter:
source_chapter = args.sourcechapter
if args.targetchapter:
target_chapter = args.targetchapter
if args.ignoreanchors:
anchors = 'IGNORE'
elif args.onlyanchors:
anchors = 'ONLY'
if args.auto:
check_links(source_chapter, anchors, target_chapter)
auto_fix_links()
else:
print("Checking for broken links...")
check_links(source_chapter, anchors, target_chapter)
num_broken = len(to_print) - 1
clear_console()
if num_broken > 0:
print("Found: " + str(num_broken) + " broken links\n\n"
"Now edit the link targets in the right column of broken_doc_links.txt (next to this script)\n\n"
"When finished, type \"done\" below, or anything else to cancel.\n\n"
"You may also type \"auto\" to attempt to fix the links automatically.\n")
response = input("> ")
if response == "done":
fix_links()
elif response == "auto":
clear_console()
print("Attempting to fix links automatically...")
auto_fix_links()
else:
print("Canceling")
else:
print("No broken links found! Yay!")
# Delete broken_doc_links.txt
try:
os.remove(EDIT_FILE)
os.remove(EDIT_FILE + ".orig")
except BaseException as ex:
# in case file is locked
print("WARNING: Unable to delete " + EDIT_FILE + " error: " + str(ex) + "\n"
"Make sure this file (and its \".orig\" duplicate) is deleted before committing.")
if __name__ == "__main__":
main()
|