File: lav_sort.py

package info (click to toggle)
lastz 1.04.52-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,604 kB
  • sloc: ansic: 39,808; python: 6,073; makefile: 843; sh: 53
file content (161 lines) | stat: -rwxr-xr-x 4,013 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
"""
Sort the a-stanzas in a lav file, according to the user's choice of key
-----------------------------------------------------------------------

:Author: Bob Harris (rsharris@bx.psu.edu)
"""

import sys

validKeys = ["score","pos1","pos2","beg1","beg2","end1","end2"]

def usage(s=None):
	message = """
lav_sort --key=[-]<score|beg1|beg2|end1|end2> < lav_file > lav_file
"""

	if (s == None): sys.exit (message)
	else:           sys.exit ("%s\n%s" % (s,message))


def main():

	# parse the command line

	if (len(sys.argv) < 2):
		usage("you must specify a key")
	elif (len(sys.argv) > 2):
		usage("wrong number of arguments")

	arg = sys.argv[1]
	if (not arg.startswith("--key=")):
		usage("unrecognized argument: \"%s\"" % arg)

	keyName    = arg[arg.find("=")+1:]
	keyReverse = False
	if (keyName.startswith("-")):
		keyName    = keyName[1:]
		keyReverse = True
	if (keyName.startswith("+")):
		keyName    = keyName[1:]
		keyReverse = False
	if (keyName not in validKeys):
		usage("unrecognized key: \"%s\"" % keyName)

	# process the stanzas

	blocks = []
	for (kind,stanza) in read_stanzas(sys.stdin):
		if (kind == "a"):
			key = get_key_value(keyName,stanza)
			blocks += [(key,stanza)]
			continue
		if (len(blocks) > 0):
			blocks.sort()
			if (keyReverse): blocks.reverse()
			for (key,s) in blocks:
				print ("\n".join(s))
			blocks = []
		print ("\n".join(stanza))

	if (len(blocks) > 0):
		blocks.sort()
		if (keyReverse): blocks.reverse()
		for (key,s) in blocks:
			print ("\n".join(s))

# read_stanzas--
#	Collect the lines that belong to the next stanza.  A stanza has the form
#	shown below.  It consists of several lines bracketed by a pair of curlies,
#	and has a type indicated by a single letter.
#
#		x {
#		  ...
#		}
#
#	In this routine we generalize the stanza concept to include lines not
#	strictly with a pair of curlies.  First, lines beginning with a "#:" are
#	considered to be single line stanzas with no type (e.g. the "#:lav" and
#	"#:eof" lines).  Second, any other blank lines are appended to whatever
#	stanza preceeded them.  This allows for lav+text and other debugging output
#	from lastz to be carried around with the appropriate stanza.

def read_stanzas(f):
	kind    = None
	stanza  = []
	inCurly = False
	for line in f:
		line = line.rstrip()
		if (not inCurly):
			isWaffle = line.startswith("#:")
			inCurly  = (len(line) == 3) and (line.endswith(" {"))
			if (isWaffle) or (inCurly):
				if (len(stanza) > 0):
					yield (kind,stanza)
					stanza = []
				if (isWaffle):
					yield (line[2:],[line])
					kind = None
					continue
				kind = line[0]
			stanza += [line]
		else: # (inCurly)
			stanza += [line]
			if (line == "}"): inCurly = False

	assert (len(stanza) == 0), "premature end of file"

# get_key_value--
#	Extract the specied key value from an a-stanza.  A typical a-stanza looks
#	like this one:
#
#		a {
#		  s 14400
#		  b 425 4438
#		  e 697 4714
#		  l 425 4438 448 4461 96
#		  l 449 4464 579 4594 83
#		  l 581 4595 604 4618 96
#		  l 605 4627 609 4631 100
#		  l 617 4632 648 4663 91
#		  l 649 4666 697 4714 90
#		}

def get_key_value(keyName,aStanza):
	if (keyName == "score"):
		assert (len(aStanza) >= 2) and (aStanza[1].startswith("  s"))
		score = aStanza[1].split()[1]
		try:
			return int(score)
		except:
			try:
				return float(score)
			except:
				pass
		return score

	if (keyName in ["pos1","beg1"]):
		assert (len(aStanza) >= 3) and (aStanza[2].startswith("  b"))
		beg1 = aStanza[2].split()[1]
		return int(beg1)

	if (keyName in ["pos2","beg2"]):
		assert (len(aStanza) >= 3) and (aStanza[2].startswith("  b"))
		beg2 = aStanza[2].split()[2]
		return int(beg2)

	if (keyName in ["end1"]):
		assert (len(aStanza) >= 4) and (aStanza[3].startswith("  e"))
		end1 = aStanza[3].split()[1]
		return int(end1)

	if (keyName in ["end2"]):
		assert (len(aStanza) >= 4) and (aStanza[3].startswith("  e"))
		end2 = aStanza[3].split()[2]
		return int(end2)

	assert False


if __name__ == "__main__": main()