#!/usr/bin/env python # -*- coding: utf-8 -*- import sys from optparse import OptionParser # Save file (to debug output) def save(contents, file_name): out_file = open(file_name,"w") out_file.write(contents) out_file.close() # Get handbook from the web def obtainHTML(url, verbose, language_code='en'): if verbose: print 'Obtain HTML from musescore.org' import urllib2 sock = urllib2.urlopen(url) html_source = sock.read() sock.close() if verbose: print 'Save HTML sources to the sources directory' import os if not os.path.isdir('sources'): os.mkdir('sources') file_name = 'MuseScore-'+language_code+'.html' out_file = open('sources/'+file_name,"w") out_file.write(html_source) out_file.close() return html_source # Give level 1 headings an anchor tag # with a name attribute based on the heading text def insertH1Anchors(html_source, anchors, verbose): if verbose: print 'Insert anchors for level one headings' import urllib2 split = html_source.split('')+1:split[i].index('')].decode("utf-8").lower().encode("utf-8").replace(" ","-") name = name.replace("'","") #remove HTML encoding for French apostrophe name = name.replace(",","").replace("(","").replace(")","") #remove punctuation name = name.replace("-a-","-") #drop unnessary words name = urllib2.quote(name).lower() #percent encode name to match URLs name = name.replace('%c3%89','%c3%a9') #work-around for text encoding bug name = name.replace('%c5%81','%c5%82') #manually convert to lower case (Python doesn't seem know the lowercase equivalent of this charater name = name.replace('%c3%9a','%c3%ba') #manually convert Ú to lower case ú (Hungarian handbook) name = name.replace('%c3%96','%c3%b6') #manually convert Ö to lower case ö (Hungarian handbook) name = name.replace('%c3%9c','%c3%bc') #manually convert Ü to lower case ü (Hungarian handbook) name = name.replace('li%c3%b1as','li%c3%b1') #workaround incorrect url on website (Galacian handbook) split[i-1] = split[i-1] + '' anchors.append(name) #print name html_source = '','',1) return html_source # Give h1 tags a chapter heading def chapterHeading(html_source, verbose, language_code): if verbose: print "Add chapter headings" chapter = 'Chapter [number]' #Default English if language_code == 'nl': chapter = 'Hoofdstuk [number]' elif language_code == 'bg': chapter = 'глава [number]' elif language_code == 'ca': chapter = 'Capítol [number]' elif language_code == 'ca': chapter = 'Kapitola [number]' elif language_code == 'da': chapter = 'Kapitel [number]' elif language_code == 'de': chapter = 'Kapitel [number]' elif language_code == 'el': chapter = 'Κεφάλαιο [number]' elif language_code == 'es': chapter = 'Capítulo [number]' elif language_code == 'fi': chapter = 'Luku [number]' elif language_code == 'fr': chapter = 'Chapitre [number]' elif language_code == 'gl': chapter = 'Capítulo [number]' elif language_code == 'hu': chapter = '[number] Fejezet' elif language_code == 'it': chapter = 'Capitolo [number]' elif language_code == 'ja': chapter = '章[number]' elif language_code == 'nb': chapter = 'Kapittel [number]' elif language_code == 'pl': chapter = 'Rozdział [number]' elif language_code == 'pt-br': chapter = 'Capítulo [number]' elif language_code == 'ro': chapter = 'Capitolul [number]' elif language_code == 'ru': chapter = 'Глава [number]' elif language_code == 'zh-hans': chapter = '第 [number] 章' html_source = html_source.replace('

','') #remove empty header counter = 1 i = html_source.find('

') while i > -1: i = html_source.find('

',i+60) html_source = html_source[:i] + html_source[i:].replace('

','' + chapter.replace('[number]',str(counter)) + '

',1) counter = counter + 1 return html_source # Give level 3 headings an anchor tag # with a name attribute based on h3 id attribute def insertH3Anchors(html_source, anchors, verbose): if verbose: print 'Insert anchors for level three headings' split = html_source.split('

' anchors.append(id) # list of anchors throughout document html_source = '

-1 and language_code != 'en': #check for website bug that sometimes links to English URL instead of local language URL if internal_href.find('/node/1257') < 0: # check it is not a link to a bug report print " * WARNING: English language link: ", internal_href elif internal_href.find('freelinking') > -1: #if url contains the "freelinking" text it means there is no matching page in the handbook print " * WARNING: page does not exist: ", internal_href elif url_language: if internal_href[url_language.start()+1:url_language.end()-1] != language_code: #check whether url language code and handbook language code match print " * WARNING: Language does not match handbook ", internal_href elif internal_href[0:7] != 'mailto:' and internal_href[0:4] != 'http': print " * WARNING: no anchor tag corresponding to ", internal_href html_source = 'href="'.join(split) return html_source # Remove base tag which interfers with internal links def removeBaseTag(html_source, language_code='en'): from BeautifulSoup import BeautifulSoup BeautifulSoup.NESTABLE_TAGS.update({'kbd':[]}) # add 'kbd' to list of nestable tags html_soup = BeautifulSoup(html_source) if (html_source.find('base') > -1): html_soup('base')[0].extract() # remove base tag from document html_source = str(html_soup) return html_source # Link pdfstyle.css and remove css from website def addCustomStyles(html_source, verbose, language_code='en'): # Allow for language-specific fonts def externalFonts(full_css, language_code='en'): import re external_fonts = 'default' if (language_code == 'ja'): external_fonts = '''/* Normal / @font-face { font-family: "Sazanami Gothic"; src: url(font/sazanami-20040629/sazanami-gothic.ttf); } / Normal / @font-face { font-family: "Sazanami Mincho"; src: url(font/sazanami-20040629/sazanami-mincho.ttf); } ''' full_css = re.sub('DejaVu Sans','Sazanami Gothic',full_css) full_css = re.sub('DejaVu Serif','Sazanami Mincho',full_css) elif (language_code == 'zh-hans'): external_fonts = '''/ Normal / @font-face { font-family: "Zenhei"; src: url(font/zh-hans/wqy-zenhei.ttf); } / Normal / @font-face { font-family: "Ukai"; src: url(font/zh-hans/ukai00.ttf); } ''' full_css = re.sub('DejaVu Sans','Zenhei',full_css) full_css = re.sub('DejaVu Serif','Ukai',full_css) if (external_fonts != 'default'): pattern = re.compile(r'/\ Begin External Fonts \/./\* End External Fonts \/',re.DOTALL) full_css = re.sub(pattern, external_fonts, full_css) return full_css if verbose: print 'Add custom styles' css_file = open("pdfstyle.css","r") sock = css_file.read() css_file.close() sock = externalFonts(sock,language_code) if language_code == 'ja' or language_code == 'zh-hans': sock += 'body {-pdf-word-wrap:"CJK"}' html_source = html_source.replace('','\n') if verbose: print 'Remove unwanted styles' from BeautifulSoup import BeautifulSoup BeautifulSoup.NESTABLE_TAGS.update({'kbd':[]}) # add 'kbd' to list of nestable tags html_soup = BeautifulSoup(html_source) for i in reversed( range(0, len(html_soup('style')) ) ): ##if html_soup('h1')[i].parent.parent.parent.name == 'div': if html_soup('style')[i].parent.name != 'head': if verbose: print ' ' + str(i) + " " + html_soup('style')[i].name html_soup('style')[i].extract() # remove style from document for i in reversed( range(0, len(html_soup('link')) ) ): try: if verbose: print ' * external stylesheet: %s' % html_soup('link')[i].get("href") except: if verbose: print ' * external stylesheet' html_soup('link')[i].extract() html_source = str(html_soup) return html_source # Add page number tag for PDF def addPageNumbers(html_source, verbose): if verbose: print 'Add page numbers' html_source = html_source.replace('', '\n
\n\n
') return html_source # Get images from web def downloadImages(html_source, verbose, download_images='all'): if verbose: print 'Obtain necessary images from musescore.org' import urllib import os i = 1 unusual_urls = 0 file_name = "" if not os.path.isdir('sources'): os.mkdir('sources') broken_image = html_source.find('NOT FOUND:') #indicates a broken image on the website if broken_image > -1: broken_image_name = html_source[broken_image+11:html_source.find('',broken_image)] print ' * WARNING: At least one broken image (' + broken_image_name + ')' while html_source[i:].find('src="') > -1: i = html_source[i:].index('src="') + i + 5 url = 'http://musescore.org' + html_source[i : html_source[i:].index('"') + i] if url.find('files/') > -1: file_name = url[url.index('files/')+6:] else: unusual_urls = unusual_urls + 1 if verbose: print "WARNING: Unusual image url:", url download_image = True if url.find('files/js/') > -1: #don't download javascript files download_image = False if download_images == 'missing': if os.path.isfile('sources/'+file_name): # if file already exists of local computer download_image = False if download_image: if verbose: print ' *', file_name, url sock = urllib.urlopen(url) out_file = open('sources/'+file_name,"wb") out_file.write(sock.read()) out_file.close() sock.close() if unusual_urls > 0: print "WARNING:",unusual_urls,"unusual image urls found" #reports a bug with the website and language-dependent images # Fix img src attribute now that I removed the base tag def fixImgSrc(html_source, verbose): if verbose: print 'Fix image src attributes' html_source = html_source.replace('src="/sites/musescore.org/files/','src="sources/') html_source = html_source.replace('http://musescore.org/sites/all/modules/filefield/icons/protocons/16x16/mimetypes/image-x-generic.png','sources/image-x-generic.png') #Work-around for temporary bug return html_source # Change first page def addCoverPage(html_source, verbose): if verbose: print 'Add cover page' # Replace cover text for English version html_source = html_source.replace( '

Handbook

This handbook is for MuseScore version 0.9.2 and above. In order to help improving or translating the handbook, leave a post in the MuseScore documentation forum and apply to become a handbook contributor.

', '''

MuseScore Handbook

MuseScore 0.9.5

English handbook written by Werner Schweer and David Bolton. Contributions by Thomas Bonte, Toby Smithe, and others.

''') return html_source # Change/fix last page def addLastPage(html_source, verbose, handbook_url, language_code='en'): if verbose: print 'Add last page' import re #Replace Source link (that got changed with the link fixes) html_source = re.sub( ' #.*', ' '+handbook_url+'', html_source) return html_source # Save modified HTML file # which is ready for converting to PDF def saveHTML(html_source, language_code='en'): file_name = 'MuseScore-' + language_code + '.html' print 'Save changes to HTML:',file_name out_file = open(file_name,"w") out_file.write(html_source) out_file.close() # Add spaces between characters so Japanese wraps def insertSpaces(html_source): import re h = html_source cnt = 0 space = " " # thin space space = " " # normal space space = "~" # alternate text_pattern = re.compile('>[^'+space+'<\n]([^<]+)<') try: while (re.search(text_pattern,h)) and (cnt < 99999): s = re.search(text_pattern,h) text = s.group(0) spaced_text = re.sub("(.)",space+"\\1",text) #print text+'\n' #print spaced_text+'\n' h = h.replace(text, spaced_text, 1) #print h[h.find(text)-100:h.find(text)+100],'\n' cnt = cnt + 1 #print cnt,'\n\n' except: print "fail" raise h = re.sub(space+'>','>',h) h = re.sub('>'+space,'>',h) h = re.sub(space+'<','<',h) h = re.sub(space,' ',h) #print h html_source = h return html_source # Generate and save PDF file def generatePDF(html_source, verbose, language_code='en', pdf_parameter='openpdf'): file_name = 'MuseScore-' + language_code + '.pdf' print 'Create PDF handbook:',file_name try: import ho.pisa as pisa if verbose: pisa.showLogging() except: print "\nPisa library required from creating PDFs. See README.txt for information\n" return #import re #html_source = re.sub('(.)','\\1 ',html_source) #m = re.search(">([^<]*)<",h) #m.group(0) #if (language_code == 'ja'): # html_source = insertSpaces(html_source) pdf = pisa.CreatePDF( html_source, file(file_name, "wb"), None, None, 0, False, None, False, None, None, False) if not pdf.err and pdf_parameter=='openpdf': pisa.startViewer(file_name) # Create handbook based on language parameter def createHandbook(language_code, download_images='missing', pdf='openpdf', verbose=False, heading_switch=True,offline=False): url = '' internal = '' language_code = language_code.lower() language_code_pdf = language_code; if language_code == 'en': url = 'http://musescore.org/en/print/book/export/html/51' internal = 'http://musescore.org/en/handbook' elif language_code == 'bg': url = 'http://musescore.org/bg/print/book/export/html/5246' elif language_code == 'ca': url = 'http://musescore.org/ca/print/book/export/html/3414' internal = 'http://musescore.org/ca/manual' elif language_code == 'cs': url = 'http://musescore.org/cs/print/book/export/html/11825' internal = 'http://musescore.org/cs/příručka' elif language_code == 'da': url = 'http://musescore.org/da/print/book/export/html/1947' internal = 'http://musescore.org/da/håndbog' elif language_code == 'de': url = 'http://musescore.org/de/print/book/export/html/98' internal = 'http://musescore.org/de/handbuch' elif language_code == 'el': url = 'http://musescore.org/el/print/book/export/html/3533' internal = 'http://musescore.org/el/εγχειρίδιο' #%CE%B5%CE%B3%CF%87%CE%B5%CE%B9%CF%81%CE%AF%CE%B4%CE%B9%CE%BF elif language_code == 'es': url = 'http://musescore.org/es/print/book/export/html/137' internal = 'http://musescore.org/es/manual' elif language_code == 'fi': url = 'http://musescore.org/fi/print/book/export/html/1057' internal = 'http://musescore.org/fi/käsikirja' #k%e4sikirja' elif language_code == 'fr': url = 'http://musescore.org/fr/print/book/export/html/115' internal = 'http://musescore.org/fr/manuel' elif language_code == 'gl': url = 'http://musescore.org/gl/print/book/export/html/534' internal = 'http://musescore.org/gl/manual-galego' elif language_code == 'hu': url = 'http://musescore.org/hu/print/book/export/html/1935' internal = 'http://musescore.org/hu/kézikönyv' #k%C3%A9zik%C3%B6nyv elif language_code == 'it': url = 'http://musescore.org/it/print/book/export/html/772' internal = 'http://musescore.org/it/manuale' elif language_code == 'ja': url = 'http://musescore.org/ja/print/book/export/html/2696' internal = 'http://musescore.org/ja/ハンドブック' #%E3%83%8F%E3%83%B3%E3%83%89%E3%83%96%E3%83%83%E3%82%AF' elif language_code == 'nb': url = 'http://musescore.org/nb/print/book/export/html/2122' internal = 'http://musescore.org/nb/håndbok' #h%C3%A5ndbok' elif language_code == 'nl': url = 'http://musescore.org/nl/print/book/export/html/375' internal = 'http://musescore.org/nl/handboek' elif language_code == 'pl': url = 'http://musescore.org/pl/print/book/export/html/2495' internal = 'http://musescore.org/pl/podręcznik' #podr%C4%99cznik' elif language_code == 'pt-br': url = 'http://musescore.org/pt-br/print/book/export/html/1248' internal = 'http://musescore.org/pt-br/manual-pt-br' #podr%C4%99cznik' language_code_pdf = "pt_BR"; elif language_code == 'ro': url = 'http://musescore.org/ro/print/book/export/html/3081' internal = 'http://musescore.org/ro/manual' elif language_code == 'ru': url = 'http://musescore.org/ru/print/book/export/html/2352' internal = 'http://musescore.org/ru/cправочник' #c%D0%BF%D1%80%D0%B0%D0%B2%D0%BE%D1%87%D0%BD%D0%B8%D0%BA' elif language_code == 'zh-hans': url = 'http://musescore.org/zh-hans/print/book/export/html/5541' internal = 'http://musescore.org/zh-hans/用户手册' #%E7%94%A8%E6%88%B7%E6%89%8B%E5%86%8C' language_code_pdf = "zh_CN"; print "Create handbook for",language_code if not offline: html = obtainHTML(url, verbose, language_code) else: file_name = 'MuseScore-'+language_code+'.html' html_file = open('sources/'+file_name,"r") html = html_file.read() html_file.close() anchors = [] #list of anchor names throughout document html, anchors = insertH1Anchors(html, anchors, verbose) if heading_switch: html = markAsH2(html, verbose) html = changeToH2(html) html = chapterHeading(html, verbose, language_code) html, anchors = insertH3Anchors(html, anchors, verbose) html = fixLinks(html, anchors, verbose, internal, language_code) html = removeBaseTag(html, language_code) html = addCustomStyles(html, verbose, language_code) html = addPageNumbers(html, verbose) if download_images != 'local' and not offline: downloadImages(html, verbose, download_images) html = fixImgSrc(html, verbose) html = addCoverPage(html, verbose) html = addLastPage(html, verbose, internal, language_code) saveHTML(html, language_code) if pdf != 'nopdf': generatePDF(html, verbose, language_code_pdf, pdf) print '' def main(): language_choices = ['all','en','bg','cs','ca','da','de','el','es','fi','fr','gl','hu','it','ja','nb','nl','pl','pt-BR','ro','ru', 'zh-hans'] parser = OptionParser() parser.add_option("-l","--lang", dest="language_code", help="Specify language code for which to build manual", choices=language_choices, default="all") parser.add_option("-o","--offline", dest="offline", help="Specify for offline mode", action="store_true", default=False) parser.add_option("-v","--verbose", dest="verbose", action="store_true", help="Verbose output", default=False) parser.add_option("-t", "--type", dest="pdf", help="PDF type", choices=('default','pdf','openpdf','nopdf'), default='pdf') parser.add_option("-n", "--no-heading", dest="heading_switch", action="store_false", help="Heading level switching off", default=True) (opts, args) = parser.parse_args() language_code = opts.language_code pdf = opts.pdf heading_switch = opts.heading_switch verbose = opts.verbose offline = opts.offline download_images = 'missing' # Check for PDF library dependency if pdf != "nopdf": try: import ho.pisa as pisa except: print "ImportError: No module named ho.pisa" print "\nPisa library required from creating PDFs. See README.txt for information\n" return # Create Handbooks for all languages if language_code == 'all': print 'Creating handbooks for all languages...' if pdf == 'default': pdf = 'pdf' for language in language_choices: if language != "all": createHandbook(language, download_images, pdf, verbose, heading_switch, offline) # Create Handbook for specific language else: if pdf == 'default': pdf = 'openpdf' createHandbook(language_code, download_images, pdf, verbose, heading_switch, offline) print "Done" if __name__ == '__main__': main() #createHandbook("hu")