File: excel.py

package info (click to toggle)
python-shodan 1.28.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 400 kB
  • sloc: python: 2,674; makefile: 150
file content (132 lines) | stat: -rw-r--r-- 3,748 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

from .base import Converter
from ...helpers import iterate_files, get_ip

from collections import defaultdict
from xlsxwriter import Workbook


class ExcelConverter(Converter):

    fields = [
        'port',
        'timestamp',
        'data',
        'hostnames',
        'org',
        'isp',
        'location.country_name',
        'location.country_code',
        'location.city',
        'os',
        'asn',
        'transport',
        'product',
        'version',

        'http.server',
        'http.title',
    ]

    field_names = {
        'org': 'Organization',
        'isp': 'ISP',
        'location.country_code': 'Country ISO Code',
        'location.country_name': 'Country',
        'location.city': 'City',
        'os': 'OS',
        'asn': 'ASN',

        'http.server': 'Web Server',
        'http.title': 'Website Title',
    }

    def process(self, files):
        # Get the filename from the already-open file handle
        filename = self.fout.name

        # Close the existing file as the XlsxWriter library handles that for us
        self.fout.close()

        # Create the new workbook
        workbook = Workbook(filename)

        # Define some common styles/ formats
        bold = workbook.add_format({
            'bold': 1,
        })

        # Create the main worksheet where all the raw data is shown
        main_sheet = workbook.add_worksheet('Raw Data')

        # Write the header
        main_sheet.write(0, 0, 'IP', bold)  # The IP field can be either ip_str or ipv6 so we treat it differently
        main_sheet.set_column(0, 0, 20)

        row = 0
        col = 1
        for field in self.fields:
            name = self.field_names.get(field, field.capitalize())
            main_sheet.write(row, col, name, bold)
            col += 1
        row += 1

        total = 0
        ports = defaultdict(int)
        for banner in iterate_files(files):
            try:
                # Build the list that contains all the relevant values
                data = []
                for field in self.fields:
                    value = self.banner_field(banner, field)
                    data.append(value)

                # Write those values to the main workbook
                # Starting off w/ the special "IP" property
                main_sheet.write_string(row, 0, get_ip(banner))
                col = 1

                for value in data:
                    main_sheet.write(row, col, value)
                    col += 1
                row += 1
            except Exception:
                pass

            # Aggregate summary information
            total += 1
            ports[banner['port']] += 1

        summary_sheet = workbook.add_worksheet('Summary')
        summary_sheet.write(0, 0, 'Total', bold)
        summary_sheet.write(0, 1, total)

        # Ports Distribution
        summary_sheet.write(0, 3, 'Ports Distribution', bold)
        row = 1
        col = 3
        for key, value in sorted(ports.items(), reverse=True, key=lambda kv: (kv[1], kv[0])):
            summary_sheet.write(row, col, key)
            summary_sheet.write(row, col + 1, value)
            row += 1
        
        workbook.close()

    def banner_field(self, banner, flat_field):
        # The provided field is a collapsed form of the actual field
        fields = flat_field.split('.')

        try:
            current_obj = banner
            for field in fields:
                current_obj = current_obj[field]

            # Convert a list into a concatenated string
            if isinstance(current_obj, list):
                current_obj = ','.join([str(i) for i in current_obj])

            return current_obj
        except Exception:
            pass

        return ''