File: avro_file_decoder.py

package info (click to toggle)
pmacct 1.7.8-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 11,904 kB
  • sloc: ansic: 110,430; sh: 4,794; cpp: 4,375; python: 3,632; makefile: 525
file content (80 lines) | stat: -rwxr-xr-x 2,192 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
#
# If missing 'avro' read how to download it at: 
# https://avro.apache.org/docs/1.8.1/gettingstartedpython.html
#
# 'avro' package is deemed slow. For production scenarios, do consider
# implementing this script using the 'fastavro' Python package that is
# documented here: https://fastavro.readthedocs.io/en/latest/

import sys, os, getopt, io
from avro.datafile import DataFileReader
from avro.io import DatumReader
import avro.schema

def usage(tool):
	print ""
	print "Usage: %s [Args]" % tool
	print ""

	print "Mandatory Args:"
	print "  -i, --input-file".ljust(25) + "Input file in Avro format"
	print "  -s, --schema".ljust(25) + "Schema to decode input file (if not included)"
	print ""
	print "Optional Args:"
	print "  -h, --help".ljust(25) + "Print this help"

def main():
	try:
		opts, args = getopt.getopt(sys.argv[1:], "hi:s:", ["help", "input-file=",
						"schema="])
	except getopt.GetoptError as err:
		# print help information and exit:
		print str(err) # will print something like "option -a not recognized"
		usage(sys.argv[0])
		sys.exit(2)

	avro_file = None
	avro_schema_file = None

	required_cl = 0

	for o, a in opts:
		if o in ("-h", "--help"):
			usage(sys.argv[0])
			sys.exit()
		elif o in ("-i", "--input-file"):
			required_cl += 1
            		avro_file = a
		elif o in ("-s", "--schema"):
			avro_schema_file = a
		else:
			assert False, "unhandled option"

	if (required_cl < 1): 
		print "ERROR: Missing required argument"
		usage(sys.argv[0])
		sys.exit(1)

	if not avro_schema_file:
		reader = DataFileReader(open(avro_file, "r"), DatumReader())
		for datum in reader:
			print datum
		reader.close()
	else:
		reader_schema = open(avro_schema_file, "r")
		avro_schema = reader_schema.read()
		reader_schema.close()
		parsed_avro_schema = avro.schema.parse(avro_schema)

		with open(avro_file, "rb") as reader_data:
			inputio = io.BytesIO(reader_data.read())
			decoder = avro.io.BinaryDecoder(inputio)
			reader = avro.io.DatumReader(parsed_avro_schema)
			while inputio.tell() < len(inputio.getvalue()):
				avro_datum = reader.read(decoder)
				print avro_datum
		reader_data.close()

if __name__ == "__main__":
    main()