1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
# -*- coding: utf-8 -*-
import codecs
import sys
def sort_items(input_filename, output_filename):
input_file = codecs.open(input_filename, "r", encoding="utf-8")
dic = {}
for line in input_file:
if len(line) == 0 or line == '\n':
continue
try:
key, value = line.split("\t")
except ValueError:
print(line)
while value[-1] == "\n" or value[-1] == "\r":
value = value[:-1]
dic[key] = value
input_file.close()
output_file = open(output_filename, "wb")
for key in sorted(dic.keys()):
line = key + "\t" + dic[key] + "\n"
output_file.write(line.encode('utf-8'))
output_file.close()
def reverse_items(input_filename, output_filename):
input_file = codecs.open(input_filename, "r", encoding="utf-8")
dic = {}
for line in input_file:
if len(line) == 0:
continue
key, value = line.split("\t")
while value[-1] == "\n" or value[-1] == "\r":
value = value[:-1]
value_list = value.split(" ")
for value in value_list:
if value in dic:
dic[value].append(key)
else:
dic[value] = [key]
input_file.close()
output_file = open(output_filename, "wb")
for key in sorted(dic.keys()):
line = key + "\t" + " ".join(dic[key]) + "\n"
output_file.write(line.encode('utf-8'))
output_file.close()
def find_target_items(input_filename, keyword):
input_file = codecs.open(input_filename, "r", encoding="utf-8")
for line in input_file:
if len(line) == 0:
continue
key, value = line.split("\t")
while value[-1] == "\n" or value[-1] == "\r":
value = value[:-1]
value_list = value.split(" ")
for value in value_list:
if keyword in value:
sys.stdout.write(line)
input_file.close()
|