File: offsets.py

package info (click to toggle)
whipper 0.10.0-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,696 kB
  • sloc: python: 6,449; xml: 174; ansic: 124; makefile: 18
file content (66 lines) | stat: -rw-r--r-- 1,479 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- Mode: Python -*-
# vi:si:et:sw=4:sts=4:ts=4

# show all possible offsets, in order of popularity, from a download of
# http://www.accuraterip.com/driveoffsets.htm

import sys

from bs4 import BeautifulSoup

with open(sys.argv[1]) as f:
    doc = f.read()

soup = BeautifulSoup(doc, features='html.parser')

offsets = {}  # offset -> total count

# skip first two spurious elements
rows = soup.findAll('tr')[2:]
for row in rows:
    columns = row.findAll('td')
    if len(columns) == 4:
        first, second, third, fourth = columns
        name = first.find(text=True)
        offset = second.find(text=True)
        count = third.find(text=True)

        # only use numeric offsets
        try:
            int(offset)
        except ValueError:
            continue

        if offset not in offsets.keys():
            offsets[offset] = 0
        offsets[offset] += int(count)

# now sort offsets by count
counts = []
for offset, count in offsets.items():
    counts.append((count, offset))

counts.sort()
counts.reverse()

offsets = []
for count, offset in counts:
    offsets.append(offset)

# now format it for code inclusion
lines = []
line = 'OFFSETS = ("'

for offset in offsets:
    line += offset + ', '
    if len(line) > 60:
        line += '"'
        lines.append(line)
        line = '           "'

# get last line too, trimming the comma and adding the quote
if len(line) > 11:
    line = line[:-2] + '")'
    lines.append(line)

print('\n'.join(lines))