1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
#!/usr/bin/env python3
# -*- coding: iso-8859-13 -*-
#
# Autorius: Laimonas Vbra, 2010
#
"""
sort.py -- surikiuoja (pagal lokal) failo arba STDIN eilutes/odius.
Moka ivalyti komentarus, paalinti besikartojanias eilutes, "gudriai"
rikiuoti, t.y. atsivelgti tam tikr failo struktr (kol kas moka
rikiuoti ukomentuotuos odius, ignoruojant komentar)
Usage:
./sort.py [options] file > sorted
cat file | sort.py [options] > sorted
Options:
see usage()
"""
import os, sys
import fileinput
import getopt
from locale import setlocale, getdefaultlocale, LC_COLLATE, strxfrm
# sets modulis paseno ir nuo v2.6+ sistemoje (built-in) j keiia
# set/frozenset tipai; importuojant pasenus -- spjama (warning).
if sys.version_info < (2, 6):
from sets import Set
def _set(arg=''):
if sys.version_info < (2, 6):
return Set(arg)
else:
return set(arg)
def usage():
print \
"""
Usage:
sort.py [-h,--help] [-s,--strip] [-u,--unique] file|STDIN
Options:
-h, --help Display this help message;
-c, --clean Clean/strip all comments (#);
-s, --smart Smart sort (inc. commented words);
-u, --unique Remove duplicate lines.
"""
try:
opts, rargs = getopt.getopt(sys.argv[1:],
"hcsu", ["help", "clean", "smart", "unique"])
except getopt.GetoptError:
usage()
sys.exit(2)
unique_lines = 0
strip_comments = 0
smart_sort = 0
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit(2)
if opt in ("-c", "--clean"):
strip_comments = 1
if opt in ("-s", "--smart"):
smart_sort = 1
if opt in ("-u", "--unique"):
unique_lines = 1
# win lokals atpainimo/nustatymo problemos...
locale = getdefaultlocale()
if os.name is "nt":
locale = "Lithuanian"
try:
setlocale(LC_COLLATE, locale)
except:
sys.stderr.write("Could not set locale\n")
def _tsmart(s):
""" Smart (custom) transfrom; strxfrm() """
# Ignoruojame komentaro simbol ir rikiuojame pagal od u jo
if s.startswith("#"): s = s[1:]
return strxfrm(s)
def sort(lines):
words = []
uset = _set()
for line in lines:
line = line.strip()
if (smart_sort or strip_comments):
lwords = line.split("#")
word1 = lwords[0].strip()
if len(lwords) >= 2:
word2 = lwords[1]
else:
word2 = None
if not word1:
if (smart_sort and word2):
# XXX prielaida:
# po komentaro tarpas; tai komentar
# bloko (ar iaip) komentaras, bet ne
# ukomentuotas _odyno_ odis.
if word2.startswith((" ", "\t")):
if strip_comments: continue
else: continue
else:
if strip_comments: line = word1
if unique_lines:
if line not in uset:
words.append(line)
uset.add(line)
else:
words.append(line)
if smart_sort:
words.sort(key=_tsmart)
else:
words.sort(key=strxfrm)
for line in words:
print line
if __name__ == "__main__":
sort(fileinput.input(rargs))
|