1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
|
# -*- coding: iso-8859-1 -*-
"""
MoinMoin - convert content in 1.5.8 wiki markup to 1.6.0 style
by using a modified 1.5.8 parser as translator.
Assuming we have this "renames" map:
-------------------------------------------------------
'PAGE', 'some_page' -> 'some page'
'FILE', 'with%20blank.txt' -> 'with blank.txt'
Markup transformations needed:
-------------------------------------------------------
["some_page"] -> [[some page]] # renamed
[:some_page:some text] -> [[some page|some text]]
[:page:text] -> [[page|text]]
(with a page not being renamed)
attachment:with%20blank.txt -> [[attachment:with blank.txt]]
attachment:some_page/with%20blank.txt -> [[attachment:some page/with blank.txt]]
The attachment processing should also urllib.unquote the filename (or at
least replace %20 by space) and put it into "quotes" if it contains spaces.
@copyright: 2007 MoinMoin:JohannesBerg,
2007 MoinMoin:ThomasWaldmann
@license: GNU GPL, see COPYING for details.
"""
import re
from MoinMoin import i18n
i18n.wikiLanguages = lambda: {}
from MoinMoin import config, wikiutil, macro
from MoinMoin.action import AttachFile
from MoinMoin.Page import Page
from text_moin158_wiki import Parser
def convert_wiki(request, pagename, intext, renames):
""" Convert content written in wiki markup """
noeol = False
if not intext.endswith('\r\n'):
intext += '\r\n'
noeol = True
c = Converter(request, pagename, intext, renames)
result = request.redirectedOutput(c.convert, request)
if noeol and result.endswith('\r\n'):
result = result[:-2]
return result
STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
# copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
# ... and slightly modified/refactored for our needs here.
# hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
# quoted urls when they contain a '=' char in the 1.5 data input.
def explore_args(args):
""" explore args for positional and keyword parameters """
if args:
args = args.split(',')
args = [arg.strip() for arg in args]
else:
args = []
kw_count = 0
kw = {} # keyword args
pp = [] # positional parameters
kwAllowed = ('width', 'height', 'alt')
for arg in args:
if '=' in arg:
key, value = arg.split('=', 1)
key_lowerstr = str(key.lower())
# avoid that urls with "=" are interpreted as keyword
if key_lowerstr in kwAllowed:
kw_count += 1
kw[key_lowerstr] = value
elif not kw_count and '://' in arg:
# assuming that this is the image
pp.append(arg)
else:
pp.append(arg)
if STONEAGE_IMAGELINK and len(pp) >= 2:
pp[0], pp[1] = pp[1], pp[0]
return pp, kw
class Converter(Parser):
def __init__(self, request, pagename, raw, renames):
self.pagename = pagename
self.raw = raw
self.renames = renames
self.request = request
self._ = None
self.in_pre = 0
self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
# no change
def return_word(self, word):
return word
_emph_repl = return_word
_emph_ibb_repl = return_word
_emph_ibi_repl = return_word
_emph_ib_or_bi_repl = return_word
_u_repl = return_word
_strike_repl = return_word
_sup_repl = return_word
_sub_repl = return_word
_small_repl = return_word
_big_repl = return_word
_tt_repl = return_word
_tt_bt_repl = return_word
_remark_repl = return_word
_table_repl = return_word
_tableZ_repl = return_word
_rule_repl = return_word
_smiley_repl = return_word
_smileyA_repl = return_word
_ent_repl = return_word
_ent_numeric_repl = return_word
_ent_symbolic_repl = return_word
_heading_repl = return_word
_email_repl = return_word
_notword_repl = return_word
_indent_repl = return_word
_li_none_repl = return_word
_li_repl = return_word
_ol_repl = return_word
_dl_repl = return_word
_comment_repl = return_word
# translate pagenames using pagename translation map
def _replace(self, key):
""" replace a item_name if it is in the renames dict
key is either a 2-tuple ('PAGE', pagename)
or a 3-tuple ('FILE', pagename, filename)
"""
current_page = self.pagename
item_type, page_name, file_name = (key + (None, ))[:3]
abs_page_name = wikiutil.AbsPageName(current_page, page_name)
if item_type == 'PAGE':
key = (item_type, abs_page_name)
new_name = self.renames.get(key)
if new_name is None:
# we don't have an entry in rename map - apply the same magic
# to the page name as 1.5 did (" " -> "_") and try again:
abs_magic_name = abs_page_name.replace(u' ', u'_')
key = (item_type, abs_magic_name)
new_name = self.renames.get(key)
if new_name is None:
# we didn't find it under the magic name either -
# that means we do not rename it!
new_name = page_name
if new_name != page_name and abs_page_name != page_name:
# we have to fix the (absolute) new_name to be a relative name (as it was before)
new_name = wikiutil.RelPageName(current_page, new_name)
elif item_type == 'FILE':
key = (item_type, abs_page_name, file_name)
new_name = self.renames.get(key)
if new_name is None:
# we don't have an entry in rename map - apply the same magic
# to the page name as 1.5 did (" " -> "_") and try again:
abs_magic_name = abs_page_name.replace(u' ', u'_')
key = (item_type, abs_magic_name, file_name)
new_name = self.renames.get(key)
if new_name is None:
# we didn't find it under the magic name either -
# that means we do not rename it!
new_name = file_name
return new_name
def _replace_target(self, target):
target_and_anchor = target.rsplit('#', 1)
if len(target_and_anchor) > 1:
target, anchor = target_and_anchor
target = self._replace(('PAGE', target))
return '%s#%s' % (target, anchor)
else:
target = self._replace(('PAGE', target))
return target
# markup conversion
def _macro_repl(self, word):
# we use [[...]] for links now, macros will be <<...>>
macro_rule = ur"""
\[\[
(?P<macro_name>\w+)
(\((?P<macro_args>.*?)\))?
\]\]
"""
word = unicode(word) # XXX why is word not unicode before???
m = re.match(macro_rule, word, re.X|re.U)
macro_name = m.group('macro_name')
macro_args = m.group('macro_args')
if macro_name == 'ImageLink':
fixed, kw = explore_args(macro_args)
#print "macro_args=%r" % macro_args
#print "fixed=%r, kw=%r" % (fixed, kw)
image, target = (fixed + ['', ''])[:2]
if image is None:
image = ''
if target is None:
target = ''
if '://' not in image:
# if it is not a URL, it is meant as attachment
image = u'attachment:%s' % image
if not target:
target = image
elif target.startswith('inline:'):
target = 'attachment:' + target[7:] # we don't support inline:
elif target.startswith('wiki:'):
target = target[5:] # drop wiki:
image_attrs = []
alt = kw.get('alt') or ''
width = kw.get('width')
if width is not None:
image_attrs.append(u"width=%s" % width)
height = kw.get('height')
if height is not None:
image_attrs.append(u"height=%s" % height)
image_attrs = u", ".join(image_attrs)
if image_attrs:
image_attrs = u'|' + image_attrs
if alt or image_attrs:
alt = u'|' + alt
result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
else:
if macro_args:
macro_args = u"(%s)" % macro_args
else:
macro_args = u''
result = u"<<%s%s>>" % (macro_name, macro_args)
# XXX later check whether some to be renamed pagename is used as macro param
return result
def _word_repl(self, word, text=None):
"""Handle WikiNames."""
if not text:
return word
else: # internal use:
return '[[%s|%s]]' % (word, text)
def _wikiname_bracket_repl(self, word):
"""Handle special-char wikinames."""
pagename = word[2:-2]
if pagename:
pagename = self._replace(('PAGE', pagename))
return '[[%s]]' % pagename
else:
return word
def _interwiki_repl(self, word):
"""Handle InterWiki links."""
wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
if wikitag_bad:
return word
else:
wikiname, pagename = word.split(':', 1)
pagename = wikiutil.url_unquote(pagename) # maybe someone has used %20 for blanks in pagename
camelcase = wikiutil.isStrictWikiname(pagename)
if wikiname in ('Self', self.request.cfg.interwikiname):
pagename = self._replace(('PAGE', pagename))
if camelcase:
return '%s' % pagename # optimize special case
else:
return '[[%s]]' % pagename # optimize special case
else:
if ' ' in pagename: # we could get a ' ' by urlunquoting
return '[[%s:%s]]' % (wikiname, pagename)
else:
return '%s:%s' % (wikiname, pagename)
def interwiki(self, url_and_text):
if len(url_and_text) == 1:
url = url_and_text[0]
text = ''
else:
url, text = url_and_text
text = '|' + text
# keep track of whether this is a self-reference, so links
# are always shown even the page doesn't exist.
scheme, url = url.split(':', 1)
wikiname, pagename = wikiutil.split_wiki(url)
if (url.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
Page(self.request, url).exists()): # fancy link to local page [wiki:LocalPage text]
pagename = wikiutil.url_unquote(url)
pagename = self._replace_target(pagename)
return '[[%s%s]]' % (pagename, text)
if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
pagename = wikiutil.url_unquote(pagename)
pagename = self._replace_target(pagename)
return '[[%s%s]]' % (pagename, text)
wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, url)
if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
pagename = wikiutil.url_unquote(pagename)
pagename = self._replace_target(pagename)
wikitail = pagename
else: # good
wikitail = wikiutil.url_unquote(wikitail)
# link to self?
if wikiutil.isPicture(wikitail):
return '{{%s:%s%s}}' % (wikitag, wikitail, text)
else:
if ' ' not in wikitail and not text:
return '%s:%s' % (wikitag, wikitail)
else:
return '[[%s:%s%s]]' % (wikitag, wikitail, text)
def attachment(self, url_and_text):
""" This gets called on attachment URLs. """
if len(url_and_text) == 1:
url = url_and_text[0]
text = ''
else:
url, text = url_and_text
text = '|' + text
scheme, fname = url.split(":", 1)
#scheme, fname, text = wikiutil.split_wiki(target_and_text)
pagename, fname = AttachFile.absoluteName(fname, self.pagename)
from_this_page = pagename == self.pagename
fname = self._replace(('FILE', pagename, fname))
fname = wikiutil.url_unquote(fname)
fname = self._replace(('FILE', pagename, fname))
pagename = self._replace(('PAGE', pagename))
if from_this_page:
name = fname
else:
name = "%s/%s" % (pagename, fname)
if scheme == 'drawing':
return "{{drawing:%s%s}}" % (name, text)
# check for image URL, and possibly return IMG tag
# (images are always inlined, just like for other URLs)
if wikiutil.isPicture(name):
return "{{attachment:%s%s}}" % (name, text)
# inline the attachment
if scheme == 'inline':
return '{{attachment:%s%s}}' % (name, text)
else: # 'attachment'
return '[[attachment:%s%s]]' % (name, text)
def _url_repl(self, word):
"""Handle literal URLs including inline images."""
scheme = word.split(":", 1)[0]
if scheme == 'wiki':
return self.interwiki([word])
if scheme in self.attachment_schemas:
return '%s' % self.attachment([word])
if wikiutil.isPicture(word): # magic will go away in 1.6!
return '{{%s}}' % word # new markup for inline images
else:
return word
def _url_bracket_repl(self, word):
"""Handle bracketed URLs."""
word = word[1:-1] # strip brackets
# Local extended link?
if word[0] == ':':
words = word[1:].split(':', 1)
link, text = (words + ['', ''])[:2]
if link.strip() == text.strip():
text = ''
link = self._replace_target(link)
if text:
text = '|' + text
return '[[%s%s]]' % (link, text)
# Traditional split on space
words = word.split(None, 1)
if words[0][0] == '#':
# anchor link
link, text = (words + ['', ''])[:2]
if link.strip() == text.strip():
text = ''
#link = self._replace_target(link)
if text:
text = '|' + text
return '[[%s%s]]' % (link, text)
scheme = words[0].split(":", 1)[0]
if scheme == "wiki":
return self.interwiki(words)
#scheme, wikiname, pagename, text = self.interwiki(word)
#print "%r %r %r %r" % (scheme, wikiname, pagename, text)
#if wikiname in ('Self', self.request.cfg.interwikiname, ''):
# if text:
# text = '|' + text
# return '[[%s%s]]' % (pagename, text)
#else:
# if text:
# text = '|' + text
# return "[[%s:%s%s]]" % (wikiname, pagename, text)
if scheme in self.attachment_schemas:
m = self.attachment(words)
if m.startswith('{{') and m.endswith('}}'):
# with url_bracket markup, 1.5.8 parser does not embed, but link!
m = '[[%s]]' % m[2:-2]
return m
target, desc = (words + ['', ''])[:2]
if wikiutil.isPicture(desc) and re.match(self.url_rule, desc):
#return '[[%s|{{%s|%s}}]]' % (words[0], words[1], words[0])
return '[[%s|{{%s}}]]' % (target, desc)
else:
if desc:
desc = '|' + desc
return '[[%s%s]]' % (target, desc)
def _pre_repl(self, word):
w = word.strip()
if w == '{{{' and not self.in_pre:
self.in_pre = True
elif w == '}}}' and self.in_pre:
self.in_pre = False
return word
def _processor_repl(self, word):
self.in_pre = True
return word
def scan(self, scan_re, line):
""" Scans one line - append text before match, invoke replace() with match, and add text after match. """
result = []
lastpos = 0
for match in scan_re.finditer(line):
# Add text before the match
if lastpos < match.start():
result.append(line[lastpos:match.start()])
# Replace match with markup
result.append(self.replace(match))
lastpos = match.end()
# Add remainder of the line
result.append(line[lastpos:])
return u''.join(result)
def replace(self, match):
""" Replace match using type name """
result = []
for _type, hit in match.groupdict().items():
if hit is not None and not _type in ["hmarker", ]:
# Get replace method and replace hit
replace = getattr(self, '_' + _type + '_repl')
# print _type, hit
result.append(replace(hit))
return ''.join(result)
else:
# We should never get here
import pprint
raise Exception("Can't handle match %r\n%s\n%s" % (
match,
pprint.pformat(match.groupdict()),
pprint.pformat(match.groups()),
))
return ""
def convert(self, request):
""" For each line, scan through looking for magic
strings, outputting verbatim any intervening text.
"""
self.request = request
# prepare regex patterns
rules = self.formatting_rules.replace('\n', '|')
if self.request.cfg.bang_meta:
rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
'word_rule': self.word_rule,
'rules': rules,
}
pre_rules = r'''(?P<pre>\}\}\})'''
pre_scan_re = re.compile(pre_rules, re.UNICODE)
scan_re = re.compile(rules, re.UNICODE)
eol_re = re.compile(r'\r?\n', re.UNICODE)
rawtext = self.raw
# remove last item because it's guaranteed to be empty
self.lines = eol_re.split(rawtext)[:-1]
self.in_processing_instructions = True
# Main loop
for line in self.lines:
# ignore processing instructions
if self.in_processing_instructions:
found = False
for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
"#pragma", "#form", "#acl", "#language"):
if line.lower().startswith(pi):
self.request.write(line + '\r\n')
found = True
break
if not found:
self.in_processing_instructions = False
else:
continue # do not parse this line
if not line.strip():
self.request.write(line + '\r\n')
else:
# Scan line, format and write
scanning_re = self.in_pre and pre_scan_re or scan_re
formatted_line = self.scan(scanning_re, line)
self.request.write(formatted_line + '\r\n')
|