#!/usr/bin/python3 # -*- coding: utf-8 -*- # $Id: htmlhelp-qthelp.py $ """ A python script to create a .qhp file out of a given htmlhelp folder. Lots of things about the said folder is assumed. Please see the code and inlined comments. """ __copyright__ = \ """ Copyright (C) 2006-2024 Oracle and/or its affiliates. This file is part of VirtualBox base platform packages, as available from https://www.virtualbox.org. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, in version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . SPDX-License-Identifier: GPL-3.0-only """ import getopt import logging import os.path import re import sys if sys.version_info[0] >= 3: from html.parser import HTMLParser else: from HTMLParser import HTMLParser # number of opened and not yet closed section tags of toc section open_section_tags = 0 html_files = [] def create_keywords_section(folder): """ use html_parser stuff to collect tags """ keywords_section_lines = [''] for html_file_name in html_files: # dita-ot creates htmlhelp output for en-us language in iso-8859-1 encoding, not utf-8 full_html_path = os.path.join(folder, html_file_name) file_content = open(full_html_path, encoding='iso-8859-1').read() class html_parser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.a_tag = [] def handle_starttag(self, tag, attrs): if tag != 'div' and tag != 'a': return if tag == 'a': for a in attrs: if a[0] == 'name': self.a_tag.append(a[1]) parser = html_parser() parser.feed(file_content) for k in parser.a_tag: line = '' keywords_section_lines.append(line) keywords_section_lines.append('') return keywords_section_lines def create_image_list(folder): """ find the png files under topics/images folder and create a part of the qhelp project file with tags """ sFullImageFolderPath = os.path.join(folder, 'topics', 'images'); if not os.path.isdir(sFullImageFolderPath): logging.error('Image subfolder "topics/images" is not found under "%s"!', folder) sys.exit(1); return ['topics/images/%s' % sFile for sFile in os.listdir(sFullImageFolderPath)]; def create_html_list(folder, list_file): """ open files list and read the list of html files from there """ global html_files html_file_lines = [] if not list_file in os.listdir(folder): logging.error('Could not find the file "%s" in "%s"', list_file, folder) return html_file_lines full_path = os.path.join(folder, list_file) with open(full_path, encoding='utf-8') as file: lines = file.readlines() # first search for the [FILES] marker then collect .html lines marker_found = 0 for line in lines: if '[FILES]' in line: marker_found = 1 continue if marker_found == 0: continue if '.html' in line: html_file_lines.append('' + line.strip('\n') + '') html_files.append(line.strip('\n')) return html_file_lines def create_files_section(folder, list_file): files_section_lines = [''] files_section_lines += create_image_list(folder) files_section_lines += create_html_list(folder, list_file) files_section_lines.append('') return files_section_lines def parse_param_tag(line): label = 'value="' start = line.find(label) if start == -1: return '' start += len(label) end = line.find('"', start) if end == -1: return '' return line[start:end] def parse_object_tag(lines, index): """ look at next two lines. they are supposed to look like the following parse out value fields and return title="Oracle VirtualBox" ref="index.html """ result = '' if index + 2 > len(lines): logging.warning('Not enough tags after this one "%s"', lines[index]) return result if not re.match(r'^\s*' else: logging.warning('Title or ref part is empty for the tag "%s"', lines[index]) return result def parse_non_object_tag(lines, index): """ parse any string other than staring with

' elif re.match(r'^\s*' return '' def parse_line(lines, index): result = '' # if the line starts with