File: zim2trac.py

package info (click to toggle)
zim 0.76.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 10,952 kB
  • sloc: python: 68,612; xml: 1,270; javascript: 512; sh: 101; makefile: 47
file content (173 lines) | stat: -rw-r--r-- 5,155 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173

# Copyright 2009 Pablo Angulo

'''Script to export zim wiki pages to trac / mediawiki

To use it, call

	python trac2zim.py notebook output_folder prefix

where prefix is a string you put before each wiki page name. It will
fill output_folder with plain text files ready to be loaded with trac-admin:

	trac-admin /path/to/project wiki load output_folder

zim links like [[:Software:note taking:zim|zim]] are flattened to wiki
entries like [Software_note_taking_zim zim].
'''

import re
import sys
import os

#buscaCabeceras=re.compile('(={1:5})([^=]*)(={1:5})')
def flatten(linkName):
	'''Changes a zim link, possibly with categories, to a trac link

	it also removes accents and other spanish special characters
	'''
	#remove final ':' character and
	name = linkName[:-1] if linkName[-1] == ':' else linkName
	return removeSpecialChars(name.replace(':', '_').replace(' ', '_'))

def removeSpecialChars(s):
	'''certain trac installation reported problems with special chars

	other trac systems loaded all files without problem

	the problem is only for file names and wiki pages names, not for content
	'''
	return s.replace('á', 'a').replace('é', 'e').replace('í', 'i').replace('ó', 'o').replace('ú', 'u').replace('ñ', 'n').replace('Á', 'A').replace('É', 'E').replace('Í', 'I').replace('Ó', 'O').replace('Ú', 'U').replace('Ñ', 'ñ')

cabecera = re.compile("(={1,6})([^=/]+?)(={1,6})")
inlineVerbatim = re.compile("''([^']+?)''")
#~ multilineVerbatim=re.compile("\n[\t](.+?)\n")
negrita = re.compile(r'\*\*([^*]+?)\*\*')
italic = re.compile(r'//([^/\n\]]+?)//')
bracketedURL = re.compile(r'\[\[(http://[^|]+)\|([^|]+?)\]\]')
#TODO: separar links relativos y absolutos
simpleRelLink = re.compile(r'\[\[([^:][^|]+?)\]\]')
namedRelLink = re.compile(r'\[\[([^:][^|]+?)\|([^|]+?)\]\]')
simpleAbsLink = re.compile(r'\[\[:([^|]+?)\]\]')
namedAbsLink = re.compile(r'\[\[:([^|]+?)\|([^|]+?)\]\]')
images = re.compile(r'([^{]){{/(.+?)\}\}')
def translate(nota, prefix1, prefix2):
	'''Takes a note in zim format and returns a note in trac format
	'''
	#duplicate all line breaks
	nota = nota.replace('\n', '\n\n')
	# Headings
	mm = cabecera.search(nota)
	lista = []
	lastIndex = 0
	while mm:
		lista.append(nota[lastIndex:mm.start()])
		gg = mm.groups()
		iguales = len(gg[0])
		lista.append("=" * (7 - iguales) + gg[1] + "=" * (7 - iguales))
		lastIndex = mm.end()
		mm = cabecera.search(nota, lastIndex)
	lista.append(nota[lastIndex:])
	nota = ''.join(lista)

	#inlineVerbatim
	nota = inlineVerbatim.sub("{{{\\1}}}", nota)
	#multiline verbatim
	#TODO
	#bold
	nota = negrita.sub("'''\\1'''", nota)
	#italic
	nota = italic.sub("''\\1''", nota)

	#bracketedURL
	nota = bracketedURL.sub("[\\1 \\2]", nota)
	#~ #simple links
	#~ nota=simpleLink.sub("[wiki:\\1]",nota)
	#~ #named links
	#~ nota=namedLink.sub("[wiki:\\1 \\2]",nota)
	#simple relative links
	mm = simpleRelLink.search(nota)
	lista = []
	lastIndex = 0
	while mm:
		lista.append(nota[lastIndex:mm.start()])
		gg0 = mm.groups()[0]
		lista.append("[wiki:" + prefix1 + prefix2 + flatten(gg0) + " " + gg0 + "]")
		lastIndex = mm.end()
		mm = simpleRelLink.search(nota, lastIndex)
	lista.append(nota[lastIndex:])
	nota = ''.join(lista)

	mm = simpleAbsLink.search(nota)
	lista = []
	lastIndex = 0
	while mm:
		lista.append(nota[lastIndex:mm.start()])
		gg0 = mm.groups()[0]
		lista.append("[wiki:" + prefix1 + flatten(gg0) + " " + gg0 + "]")
		lastIndex = mm.end()
		mm = simpleAbsLink.search(nota, lastIndex)
	lista.append(nota[lastIndex:])
	nota = ''.join(lista)

	#named relativelinks
	mm = namedRelLink.search(nota)
	lista = []
	lastIndex = 0
	while mm:
		lista.append(nota[lastIndex:mm.start()])
		gg = mm.groups()
		lista.append("[wiki:" + prefix1 + prefix2 + flatten(gg[0]) + " " + gg[1] + "]")
		lastIndex = mm.end()
		mm = namedRelLink.search(nota, lastIndex)
	lista.append(nota[lastIndex:])
	nota = ''.join(lista)

	#named absolute links
	mm = namedAbsLink.search(nota)
	lista = []
	lastIndex = 0
	while mm:
		lista.append(nota[lastIndex:mm.start()])
		gg = mm.groups()
		lista.append("[wiki:" + prefix1 + flatten(gg[0]) + " " + gg[1] + "]")
		lastIndex = mm.end()
		mm = namedAbsLink.search(nota, lastIndex)
	lista.append(nota[lastIndex:])
	nota = ''.join(lista)

	#lists
	nota = nota.replace('\n* ', '\n * ')

	#images
	nota = images.sub("\\1[[Image(\\2)]]", nota)

	return nota

def processPath(pathin, pathout, prefix1, prefix2=''):
	for archivo in os.listdir(pathin):
		fullPath = os.path.join(pathin, archivo)
		if archivo[-3:] == 'txt':
			fichero = open(fullPath, mode='r')
			nota = fichero.read()
			fichero.close()

			nota_out = translate(nota, prefix1, prefix2)

			#~ nameout= prefix+"_"+archivo[:-4] if prefix else archivo[:-4]

			fichero = open(os.path.join(pathout, prefix1 + prefix2 + removeSpecialChars(archivo[:-4])), mode='w')
			fichero.write(nota_out)
			fichero.close()
		elif os.path.isdir(fullPath):
			print(pathin, archivo, fullPath)

			processPath(fullPath, pathout, prefix1, prefix2 + removeSpecialChars(archivo) + "_")

if __name__ == '__main__':

	pathin = sys.argv[1]
	pathout = sys.argv[2]
	prefix = sys.argv[3]

	processPath(pathin, pathout, prefix)