File: sort.py

package info (click to toggle)
ispell-lt 1.3.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,980 kB
  • sloc: perl: 3,675; python: 854; makefile: 337; sh: 155; awk: 56; xml: 6
file content (157 lines) | stat: -rw-r--r-- 3,086 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python3
# -*- coding: iso-8859-13 -*-
#
# Autorius: Laimonas Vbra, 2010
#
"""
sort.py -- surikiuoja (pagal lokal) failo arba STDIN eilutes/odius.
Moka ivalyti komentarus, paalinti besikartojanias eilutes, "gudriai" 
rikiuoti, t.y. atsivelgti  tam tikr failo struktr (kol kas moka 
rikiuoti ukomentuotuos odius, ignoruojant komentar)

Usage: 
	./sort.py [options] file > sorted
	cat file | sort.py [options] > sorted

Options:
	see usage()
"""

import os, sys
import fileinput
import getopt
from locale import setlocale, getdefaultlocale, LC_COLLATE, strxfrm


# sets modulis paseno ir nuo v2.6+ sistemoje (built-in) j keiia
# set/frozenset tipai; importuojant pasenus -- spjama (warning).
if sys.version_info < (2, 6):
    from sets import Set


def _set(arg=''):
    if sys.version_info < (2, 6):
        return Set(arg)  
    else:
        return set(arg)



def usage():
	print \
"""
Usage: 
	sort.py [-h,--help] [-s,--strip] [-u,--unique] file|STDIN

Options:
	-h, --help      Display this help message;
        -c, --clean     Clean/strip all comments (#);
        -s, --smart     Smart sort (inc. commented words);
        -u, --unique    Remove duplicate lines.
"""



try:
	opts, rargs = getopt.getopt(sys.argv[1:], 
			"hcsu", ["help", "clean", "smart", "unique"])

except getopt.GetoptError:
	usage()
	sys.exit(2)               


unique_lines = 0
strip_comments = 0
smart_sort = 0


for opt, arg in opts:
	if opt in ("-h", "--help"):
            usage()                     
            sys.exit(2)


        if opt in ("-c", "--clean"):
            strip_comments = 1

        if opt in ("-s", "--smart"):
            smart_sort = 1

        if opt in ("-u", "--unique"):
            unique_lines = 1



# win lokals atpainimo/nustatymo problemos...
locale = getdefaultlocale()
if os.name is "nt":
	locale = "Lithuanian"

try:
	setlocale(LC_COLLATE, locale)
except:
	sys.stderr.write("Could not set locale\n")


def _tsmart(s):
	""" Smart (custom) transfrom; strxfrm() """
	# Ignoruojame komentaro simbol ir rikiuojame pagal od u jo
	if s.startswith("#"): s = s[1:]
	return strxfrm(s)
		


def sort(lines):
	words = []
	uset = _set()	    	

	for line in lines:
	        line = line.strip()

		if (smart_sort or strip_comments):
		        lwords = line.split("#")
			word1 = lwords[0].strip()
			if len(lwords) >= 2:
				word2 = lwords[1]  
			else: 
				word2 = None
		

			if not word1:
				if (smart_sort and word2):
					# XXX prielaida:
					# po komentaro tarpas; tai komentar 
					# bloko (ar iaip) komentaras, bet ne 
					# ukomentuotas _odyno_ odis.
					if word2.startswith((" ", "\t")):
						if strip_comments: continue

				else: continue				

			else: 
				if strip_comments: line = word1


                       
		if unique_lines:
			if line not in uset: 
				words.append(line)
			uset.add(line)
		else:
			words.append(line)			
	
	
	if smart_sort:
		words.sort(key=_tsmart)
	else:
		words.sort(key=strxfrm)
	

	for line in words: 
		print line



if __name__ == "__main__":
    sort(fileinput.input(rargs))