File: regexp-cmdline.py

package info (click to toggle)
modsecurity 3.0.3-1%2Bdeb10u2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 42,032 kB
  • sloc: cpp: 25,953; ansic: 15,785; sh: 5,357; python: 3,556; yacc: 2,896; makefile: 1,394; lex: 1,344; perl: 464; ruby: 69; javascript: 53; php: 42
file content (64 lines) | stat: -rwxr-xr-x 1,989 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
#
# Convert a word list to a list of regexps usable by Regexp::Assemble.
#
# Examples:
# cat regexp-932100.txt | ./regexp-cmdline.py unix | ./regexp-assemble.pl
# cat regexp-932110.txt | ./regexp-cmdline.py windows | ./regexp-assemble.pl
# cat regexp-932150.txt | ./regexp-cmdline.py unix | ./regexp-assemble.pl
#
# Refer to rule 932100, 932110, 932150 for documentation.
#

import fileinput, string, sys

# Convert a single line to regexp format, and insert anti-cmdline
# evasions between characters.
def regexp_str(str, evasion):
    # By convention, if the line starts with ' char, copy the rest
    # verbatim.
    if str[0] == "'":
        return str[1:]

    result = ''
    for i, char in enumerate(str):
        if i > 0:
            result += evasion
        result += regexp_char(char, evasion)

    return result

# Ensure that some special characters are escaped
def regexp_char(char, evasion):
    char = str.replace(char, '.', '\.')
    char = str.replace(char, '-', '\-')
    char = str.replace(char, '+', r'''(?:\s|<|>).*''')
        # Unix: "cat foo", "cat<foo", "cat>foo"
    char = str.replace(char, '@', r'''(?:[\s,;]|\.|/|<|>).*''')
        # Windows: "more foo", "more,foo", "more;foo", "more.com", "more/e",
        # "more<foo", "more>foo"
    char = str.replace(char, ' ', '\s+')
        # Ensure multiple spaces are matched
    return char

# Insert these sequences between characters to prevent evasion.
# This emulates the relevant parts of t:cmdLine.
evasions = {
    'unix': r'''[\\\\'\"]*''',
    'windows': r'''[\"\^]*''',
}

# Parse arguments
if len(sys.argv) <= 1 or not sys.argv[1] in evasions:
    print(sys.argv[0] + ' unix|windows [infile]')
    sys.exit(1)

evasion = evasions[sys.argv[1]]
del sys.argv[1]

# Process lines from input file, or if not specified, standard input
for line in fileinput.input():
    line = line.rstrip('\n ')
    line = line.split('#')[0]
    if line != '':
        print(regexp_str(line, evasion))