1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
# Author: Mauro Soria
import re
from urllib.parse import unquote
from lib.core.logger import logger
from lib.core.settings import (
REFLECTED_PATH_MARKER,
TEST_PATH_LENGTH,
WILDCARD_TEST_POINT_MARKER,
)
from lib.parse.url import clean_path
from lib.utils.diff import generate_matching_regex, DynamicContentParser
from lib.utils.random import rand_string
class Scanner:
def __init__(self, requester, **kwargs):
self.path = kwargs.get("path", "")
self.tested = kwargs.get("tested", [])
self.context = kwargs.get("context", "all cases")
self.requester = requester
self.response = None
self.wildcard_redirect_regex = None
self.setup()
def setup(self):
"""
Generate wildcard response information containers, this will be
used to compare with other path responses
"""
first_path = self.path.replace(
WILDCARD_TEST_POINT_MARKER,
rand_string(TEST_PATH_LENGTH),
)
first_response = self.requester.request(first_path)
self.response = first_response
duplicate = self.get_duplicate(first_response)
# Another test was performed before and has the same response as this
if duplicate:
self.content_parser = duplicate.content_parser
self.wildcard_redirect_regex = duplicate.wildcard_redirect_regex
logger.debug(f'Skipped the second test for "{self.context}"')
return
second_path = self.path.replace(
WILDCARD_TEST_POINT_MARKER,
rand_string(TEST_PATH_LENGTH, omit=first_path),
)
second_response = self.requester.request(second_path)
if first_response.redirect and second_response.redirect:
self.wildcard_redirect_regex = self.generate_redirect_regex(
clean_path(first_response.redirect),
first_path,
clean_path(second_response.redirect),
second_path,
)
logger.debug(f'Pattern (regex) to detect wildcard redirects for "{self.context}": {self.wildcard_redirect_regex}')
self.content_parser = DynamicContentParser(
first_response.content, second_response.content
)
def get_duplicate(self, response):
for category in self.tested:
for tester in self.tested[category].values():
if response == tester.response:
return tester
return None
def is_wildcard(self, response):
"""Check if response is similar to wildcard response"""
# Compare 2 binary responses (Response.content is empty if the body is binary)
if not self.response.content and not response.content:
return self.response.body == response.body
return self.content_parser.compare_to(response.content)
def check(self, path, response):
"""
Perform analyzing to see if the response is wildcard or not
"""
if self.response.status != response.status:
return True
# Read from line 129 to 138 to understand the workflow of this.
if self.wildcard_redirect_regex and response.redirect:
# - unquote(): Sometimes, some path characters get encoded or decoded in the response redirect
# but it's still a wildcard redirect, so unquote everything to prevent false positives
# - clean_path(): Get rid of queries and DOM in URL because of weird behaviours could happen
# with them, so messy that I give up on finding a way to test them
path = unquote(clean_path(path))
redirect = unquote(clean_path(response.redirect))
regex_to_compare = self.wildcard_redirect_regex.replace(
REFLECTED_PATH_MARKER, re.escape(path)
)
is_wildcard_redirect = re.match(regex_to_compare, redirect, re.IGNORECASE)
# If redirection doesn't match the rule, mark as found
if not is_wildcard_redirect:
logger.debug(f'"{redirect}" doesn\'t match the regular expression "{regex_to_compare}", passing')
return True
if self.is_wildcard(response):
return False
return True
@staticmethod
def generate_redirect_regex(first_loc, first_path, second_loc, second_path):
"""
From 2 redirects of wildcard responses, generate a regexp that matches
every wildcard redirect.
How it works:
1. Replace path in 2 redirect URLs (if it gets reflected in) with a mark
(e.g. /path1 -> /foo/path1 and /path2 -> /foo/path2 will become /foo/[mark] for both)
2. Compare 2 redirects and generate a regex that matches both
(e.g. /foo/[mark]?a=1 and /foo/[mark]?a=2 will have the regex: ^/foo/[mark]?a=(.*)$)
3. Next time if it redirects, replace mark in regex with the path and check if it matches
(e.g. /path3 -> /foo/path3?a=5, the regex becomes ^/foo/path3?a=(.*)$, which matches)
"""
if first_path:
first_loc = unquote(first_loc).replace(first_path, REFLECTED_PATH_MARKER)
if second_path:
second_loc = unquote(second_loc).replace(second_path, REFLECTED_PATH_MARKER)
return generate_matching_regex(first_loc, second_loc)
|