File: html.py

package info (click to toggle)
w3af 1.0-rc3svn3489-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd, squeeze, wheezy
  • size: 59,908 kB
  • ctags: 16,916
  • sloc: python: 136,990; xml: 63,472; sh: 153; ruby: 94; makefile: 40; asm: 35; jsp: 32; perl: 18; php: 5
file content (117 lines) | stat: -rw-r--r-- 3,632 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
'''
html.py

Copyright 2006 Andres Riancho

This file is part of w3af, w3af.sourceforge.net .

w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

'''

from plugins.grep.passwordProfilingPlugins.basePpPlugin import basePpPlugin

import sgmllib

class html(basePpPlugin):
    '''
    This plugin creates a map of possible passwords by reading html responses.
      
    @author: Andres Riancho ( andres.riancho@gmail.com )
    '''

    def __init__(self):
        basePpPlugin.__init__(self)
        
    def getWords(self, response):
        '''
        Get words from the body, this is a modified "strings" that filters out HTML tags.
        
        @parameter body: In most common cases, an html. Could be almost anything.
        @return: A map of strings:repetitions.
        '''
        if response.is_text_or_html():
            sp = simpleParser()
            try:
                sp.parse( response.getBody() )
            except:
                # If this plugin couldnt parse the document, return None. This will indicate passwordProfiling.py to
                # continue to the next pp plugin.
                return None
            else:
                data = sp.getData()
                
                # I think that titles have more password material that normal data:
                titles = sp.getTitles()
                for t in titles.keys():
                    titles[ t ] *= 5
                
                # join both maps
                for word in titles:
                    if word in data:
                        data[word] += titles[word]
               
                return data
    
class simpleParser(sgmllib.SGMLParser):
    "A simple parser class."

    def parse(self, s):
        "Parse the given string 's'."
        self.feed(s)
        self.close()

    def __init__(self, verbose=0):
        sgmllib.SGMLParser.__init__(self, verbose)
        self._data = []
        self._titles = []
        self._inTitle = False

    def handle_data(self, data):
        "Handle the textual 'data'."
        if self._inTitle:
            self._titles.append( data )
        else:
            self._data.append(data)
        
    def start_title( self, data):
        "Handle titles."
        self._inTitle = True
        
    def end_title( self ):
        "Handle titles."
        self._inTitle = False

    def _parseStrings( self, stringList ):
        res = {}
        for d in stringList:
            d = d.replace('>', ' ')
            d = d.replace('<', ' ')
            splitted = d.split(' ')
            for chunk in splitted:
                if chunk.isalnum() and len(chunk) >= 4:
                    if chunk in res.keys():
                        res[ chunk ] += 1
                    else:
                        res[ chunk ] = 1
        return res
        
    def getData(self):
        "Return a map of string:repetitions"
        return self._parseStrings( self._data )
        
    def getTitles( self ):
        "Return a map of string:repetitions"
        return self._parseStrings( self._titles )