1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
# encoding: utf-8
#
# Take a wiki document and extract the template options of the specified template
#
# {{Infobox person
# |name = Casanova
# |image = Casanova_self_portrait.jpg
# |caption = A self portrait of Casanova
# |website =
# }}
#
# and returns the template data in json...
#
# {"name":"Casanova","caption":"A self portrait of Casanova","website":"","image":"Casanova_self_portrait.jpg"}
#
# This file takes two arguments: filename, and template name
# ex: ./template_extractor test.wiki "Infobox person"
#
require 'wikicloth'
require 'json'
class TemplateExtractor < WikiCloth::Parser
def initialize(args = {})
@templates = []
super(args)
to_html # parse the document
end
def extract(name)
ret = []
@templates.each do |template|
ret << template[:data] if template[:name] == name
end
ret.length == 1 ? ret.first : ret
end
link_for do |url,text|
text.blank? ? url : text
end
include_resource do |resource,options|
data = {}
options.each do |opt|
data[opt[:name]] = opt[:value]
end
@templates << { :name => resource, :data => data }
""
end
end
wiki_data = ""
if ARGV[0] && File.exists?(ARGV[0])
wiki_data = File.read(ARGV[0])
else
wiki_data = <<END_OF_DOC
{{Infobox person
|name = Casanova
|image = Casanova_self_portrait.jpg
|caption = A self portrait of Casanova
|website =
}}
END_OF_DOC
end
@wiki = TemplateExtractor.new(:data => wiki_data)
puts @wiki.extract(ARGV[1] ? ARGV[1] : "Infobox person").to_json
|