File: darkstat_export

package info (click to toggle)
darkstat 3.0.722-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 868 kB
  • sloc: ansic: 7,585; sh: 660; javascript: 213; makefile: 174; php: 15
file content (257 lines) | stat: -rw-r--r-- 9,457 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/bin/sh

# Copyright 2013 MediaMobil Communication GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# This script converts a binary .db file into a .csv file.
# The .db file was generated by darkstat with the --export option.
# The .csv file shall be read by any spreadsheet application.
SCRIPTNAME=$( basename $0)
if test -z "$( type -P awk )" ; then
  echo "${SCRIPTNAME}: missing AWK interpreter, at least not found in PATH"
  echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH"
  exit 1
fi
if test -z "$( type -P od )" ; then
  echo "${SCRIPTNAME}: missing od file dump tool, at least not found in PATH"
  echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH"
  exit 1
fi
if test $# -ne 1; then
  echo "${SCRIPTNAME}: missing parameter; need file name of .db file"
  exit 1
fi
DBFILENAME=$1
if test -r ${DBFILENAME}; then
  echo ${SCRIPTNAME}: Found file ${DBFILENAME}
else
  echo ${SCRIPTNAME}: file ${DBFILENAME} does not exist
  exit 1
fi
CSVFILENAME=${DBFILENAME%%.*}.csv
echo ${SCRIPTNAME}: Writing output into ${CSVFILENAME}

# The spec of the .db export format exists for different versions:
#   https://github.com/emikulic/darkstat/blob/master/export-format.txt
#   http://git.msquadrat.de/darkstat.git/blob_plain/master:/export-format.txt
#   http://phil.lavin.me.uk/downloads/parse.phps
# Only file format version 1 is supported by us.
# Obviously, darkstat itself distinguishes 3 different host format versions.
# Only host format version 2 is supported by us.
# The darkstat database file is converted from binary format
# to ASCII by the standard Unix command od.

# Some things don't work correctly yet.
# Probably because there is no DNS server configured in our embedded device
# that produces .db files within OpenWRT.
#   - host name contains nonsense at constant length 5
#   - "last seen" timing information contains always 0
#   - we read the graphics section of the file but ignore it

# Let the od tool convert each binary byte into several textual formats.
# The AWK script reads all variants and later picks the format it needs.
od  -Ad -v -tx1 -tu1 -ta -w1 < ${DBFILENAME} |
awk '
  NF==2                    { addr = 0 + $1; hex[addr] = $2; next }
  NF==1 &&   addr in dec   { ascii[addr]=$1;                next }
  NF==1 && ! (addr in dec) { dec[addr]=$1;                  next }
  # Now all variants of the bytes are available in certain arrays.
  # The array indices cover the range 0 .. addr.

  function read_bytes(array, address, count,	retval, c) {
    retval=""
    for (c=0; c<count; c++)
      retval = retval array[address+c]
    return retval
  }
  function read_number(address, count,		retval, c) {
    retval=0
    for (c=0; c<count; c++)
      retval = retval*256 + dec[address+c]
    return retval
  }
  function read_text(address, count,		retval, c) {
    retval=""
    for (c=0; c<count; c++)
      retval = retval ascii[address+c]
    return retval
  }
  function quit(reason, terminate, retval) {
    if (length(reason) > 0)
      print reason
    if (terminate != 0) {
      # Any remaining bytes in the file shall be dumped.
      for (i=ai; i<=addr; i++)
        print i, hex[i], ascii[i]
      exit(retval)
    }
  }
  function readIPsection() {
    ip_protos_data=read_bytes(ascii, ai, 1)
    if (ip_protos_data != "P") 
      quit("expected ip_protos_data P, found " ip_protos_data, 1, 1)
    ai += 1
    ip_proto_count=read_number(ai, 1)
    ai += 1
    for (pi=0; pi<ip_proto_count; pi++) {
      ip_proto_type=read_number(ai, 1)
      ai += 1
      IPprotos = IPprotos " " ip_proto_type
      ip_proto_in  += read_number(ai, 8)
      ai += 8
      ip_proto_out += read_number(ai, 8)
      ai += 8
    }
  }
  function readTCPsection() {
    tcp_protos_data=read_bytes(ascii, ai, 1)
    if (tcp_protos_data != "T") 
      quit("expected tcp_protos_data T, found " tcp_protos_data, 1, 1)
    ai += 1
    tcp_proto_count=read_number(ai, 2)
    ai += 2
    for (ti=0; ti<tcp_proto_count; ti++) {
      tcp_proto_port=read_number(ai, 2)
      ai += 2
      TCPports = TCPports " " tcp_proto_port
      tcp_proto_syn=read_number(ai, 8)
      ai += 8
      tcp_proto_in  += read_number(ai, 8)
      ai += 8
      tcp_proto_out += read_number(ai, 8)
      ai += 8
      if (tcp_proto_port == 22) {
        ssh_in  += tcp_proto_in
        ssh_out += tcp_proto_out
      }
      if (tcp_proto_port == 3389) {
        rdp_in  += tcp_proto_in
        rdp_out += tcp_proto_out
      }
    }
  }
  function readUDPsection() {
    udp_protos_data=read_bytes(ascii, ai, 1)
    if (udp_protos_data != "U") 
      quit("expected udp_protos_data U, found " udp_protos_data, 1, 1)
    ai += 1
    udp_proto_count=read_number(ai, 2)
    ai += 2
    for (ui=0; ui<udp_proto_count; ui++) {
      udp_proto_port=read_number(ai, 2)
      ai += 2
      UDPports = UDPports " " udp_proto_port
      udp_proto_in  += read_number(ai, 8)
      ai += 8
      udp_proto_out += read_number(ai, 8)
      ai += 8
      if (udp_proto_port == 22) {
        ssh_in  += udp_proto_in
        ssh_out += udp_proto_out
      }
      if (udp_proto_port == 3389) {
        rdp_in  += udp_proto_in
        rdp_out += udp_proto_out
      }
    }
  }
  function readGraphsection(interval) {
    n_bars=read_number(ai++, 1)
    i_bars=read_number(ai++, 1)
    for (bi=0; bi<n_bars; bi++) {
      graph_bytes_in=read_number(ai, 8)
      ai += 8
      graph_bytes_out=read_number(ai, 8)
      ai += 8
    }
  }

  END {
    file_header=read_bytes(hex, 0, 4)
    if (file_header != "da314159")
      quit("input data is not an exported darkstat .db file, wrong header: " file_header, 1, 1)
    section_header=read_bytes(hex, 4, 3)
    if (section_header != "da4853")
      quit("section header da4853 expected: " section_header, 1, 1)
    db_version=read_bytes(hex, 7, 1)
    if (db_version != "01")
      quit("file format supported only in version 01", 1, 1)
    host_count=read_number(8, 4)
    ai=12
    # Print a header into the .csv file.
    printf("IP address;MAC address;host in bytes;host out bytes;IP protos;IP in bytes;IP out bytes;TCP port count;TCP in bytes;TCP out bytes;UDP port count;UDP in bytes;UDP out bytes;ssh in bytes;ssh out bytes;rdp in bytes;rdp out bytes;TCP ports;UDP ports\n")
    for (hi=1; hi<=host_count; hi++) {
      # Make sure all variables to be printed are initially empty.
      ip_address=mac_address=""
      host_bytes_in=host_bytes_out=ip_proto_in=ip_proto_out=tcp_proto_in=tcp_proto_out=udp_proto_in=udp_proto_out=ssh_in=ssh_out=rdp_in=rdp_out=0
      IPprotos=TCPports=UDPports=""
      tcp_proto_count=udp_proto_count=0
      host_header=read_bytes(hex, ai, 3)
      host_version=read_bytes(hex, ai+3, 1)
      ai += 4
      if (host_version == "02") {
        ip_address=read_number(ai+0,1) "." read_number(ai+1,1) "." read_number(ai+2,1) "." read_number(ai+3,1)
        ai += 4
        if ((host_version+0) > 1) {
          last_seen=read_number(ai, 4)
          # This value is always 0 in our files.
          ai += 4
        }
        mac_address=hex[ai+0] ":" hex[ai+1] ":" hex[ai+2] ":" hex[ai+3] ":" hex[ai+4] ":" hex[ai+5]
        ai += 6
        # Weird stuff: the host name should be read.
        # But there are only 5 bytes of nonsense.
        # The first byte should be the length counter, but it isnt.
        # The last byte is in fact a 0 byte.
        # Probably caused by the missing DNS server.
        # ignore 5 bytes with nonsense
        nonsense=read_text(ai, 5)
        ai += 5
        host_bytes_in=read_number(ai, 8)
        ai += 8
        host_bytes_out=read_number(ai, 8)
        ai += 8
        readIPsection()
        readTCPsection()
        readUDPsection()
      } else {
        quit("host format supported only in version 02: " host_version, 1, 1)
        #address_familiy=read_bytes(hex, ai, 1)
        #print "address familiy = " address_familiy
      }
      printf("\"%s\";\"%s\";%d;%d;%s;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%s;%s\n",
              ip_address, mac_address, host_bytes_in, host_bytes_out,
              IPprotos, ip_proto_in, ip_proto_out,
              tcp_proto_count, tcp_proto_in, tcp_proto_out,
              udp_proto_count, udp_proto_in, udp_proto_out,
              ssh_in, ssh_out, rdp_in, rdp_out,
              TCPports, UDPports)
    }
    section_header=read_bytes(hex, ai, 3)
    if (section_header != "da4752")
      quit("section header da4752 expected: " section_header, 1, 1)
    ai += 3
    db_version=read_bytes(hex, ai, 1)
    if (db_version != "01")
      quit("file format supported only in version 01", 1, 1)
    ai += 1
    last_time=read_number(ai, 8)
    ai += 8
    readGraphsection("60 seconds")
    readGraphsection("60 minutes")
    readGraphsection("24 hours")
    readGraphsection("31 days")
    # The complete file has been parsed, no bytes should be left over.
    # Terminate with return value 0 if the byte numbers match.
    quit("", (addr != ai+1) ?0:1, addr != ai+1)
  }
  ' > ${CSVFILENAME}