File: shlox.py

package info (click to toggle)
maptransfer 0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, squeeze, wheezy
  • size: 216 kB
  • ctags: 93
  • sloc: python: 817; makefile: 10
file content (72 lines) | stat: -rw-r--r-- 1,504 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-

""" Re-Implementation of Python's shlex.split(), because shlex can't cope
    with the input being Unicode.

    Copyright (C) 2009, Michael "Svedrin" Ziegler <diese-addy@funzt-halt.net>
"""

def shlox( line, escape='\\', comment='#', sep=(' ', '\t', '\r', '\n' ) ):
	ST_NORMAL, ST_ESCAPE, ST_SINGLE_QUOTED, ST_DOUBLE_QUOTED, ST_DOUBLE_ESCAPE = range(5)
	
	state = ST_NORMAL
	
	word  = ''
	empty = True
	
	for char in line:
		if   state == ST_NORMAL:
			if   char == escape:
				state = ST_ESCAPE
			elif char == '"':
				empty = False
				state = ST_DOUBLE_QUOTED
			elif char == "'":
				empty = False
				state = ST_SINGLE_QUOTED
			elif char == comment:
				if empty:
					raise StopIteration
				else:
					word += char
			elif char in sep:
				if not empty:
					yield word
					empty = True
					word  = ''
			else:
				empty = False
				word += char
		
		elif state == ST_ESCAPE:
			word += char
			state = ST_NORMAL
		
		elif state == ST_SINGLE_QUOTED:
			if   char == "'":
				state = ST_NORMAL
			else:
				word += char
		
		elif state == ST_DOUBLE_QUOTED:
			if   char == escape:
				state = ST_DOUBLE_ESCAPE
			elif char == '"':
				state = ST_NORMAL
			else:
				word += char
		
		elif state == ST_DOUBLE_ESCAPE:
			if   char in ( escape, comment, '"', "'" ) + sep:
				word += char
			else:
				word += '\\' + char
			state = ST_DOUBLE_QUOTED
	
	if state != ST_NORMAL:
		raise ValueError( "Unclosed quote or \\ at end of line." )
	
	elif not empty:
		yield word