File: build_id_to_file_mapping.py

package info (click to toggle)
virtualbox 7.1.12-dfsg-2
  • links: PTS, VCS
  • area: contrib
  • in suites: sid
  • size: 565,672 kB
  • sloc: ansic: 2,330,854; cpp: 2,193,228; asm: 230,777; python: 223,895; xml: 86,771; sh: 25,541; makefile: 8,158; perl: 5,697; java: 5,337; cs: 4,872; pascal: 1,782; javascript: 1,692; objc: 1,131; lex: 931; php: 906; sed: 899; yacc: 707
file content (150 lines) | stat: -rwxr-xr-x 5,081 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# -*- coding: utf-8 -*-
# $Id: build_id_to_file_mapping.py $

"""
Scans the given files (globbed) for topic id and stores records the filename
in the output file.

This is used by add_file_to_id_only_references.py after converting man_V*.xml
refentry files to dita to correct links.
"""

__copyright__ = \
"""
Copyright (C) 2023-2024 Oracle and/or its affiliates.

This file is part of VirtualBox base platform packages, as
available from https://www.virtualbox.org.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation, in version 3 of the
License.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, see <https://www.gnu.org/licenses>.

SPDX-License-Identifier: GPL-3.0-only
"""
__version__ = "$Revision: 164827 $"


# Standard python imports.
import glob;
import os;
import re;
import sys;


g_oReDita = re.compile(r'<topic[^><]*\sid=("[^">]+"|\'[^\'>]+\')');

def scanDitaFileForIds(dIdToFile, sContent, sFile):
    """
    Scans the given content of a .dita-file for topic IDs that can be referenced.
    """
    for oMatch in g_oReDita.finditer(sContent):
        sId = oMatch.group(1)[1:-1];
        if sId:
            dIdToFile[sId] = sFile;

g_oReRefentry = re.compile(r'<(refentry\b|refsect[12345]\b|cmdsynopsis\b)[^><]*\sid=("[^">]+"|\'[^\'>]+\')');

def scanDocbookRefentryForIds(dIdToFile, sContent, sFile):
    """
    Scans the given content of a Docbook refentry file for topic IDs that can be referenced.
    """
    for oMatch in g_oReRefentry.finditer(sContent):
        sId = oMatch.group(2)[1:-1];
        if sId:
            #dIdToFile[sId] = sFile;
            dIdToFile[sId] = '%s.dita' % (sId,);

def isDocbook(sContent):
    """
    Check if the file is a Docbook one.
    """
    return sContent.find('<refentry ') >= 0 and sContent.find('<refentryinfo>');

def error(sMessage):
    """ Reports an error. """
    print('build_id_to_file_mapping.py: error: %s' % sMessage, file = sys.stderr);
    return 1;

def syntax(sMessage):
    """ Reports a syntax error. """
    print('build_id_to_file_mapping.py: syntax error: %s' % sMessage, file = sys.stderr);
    return 2;

def usage():
    """ Reports usage. """
    print('usage: build_id_to_file_mapping.py --output <map.db> file1.dita docbook2.xml wild*card.* [...]');
    return 0;

def main(asArgs):
    """
    C-like main function.
    """
    #
    # Process arguments.
    #
    dIdToFile  = {};
    sOutput    = None;
    fEndOfArgs = False;
    iArg       = 1;
    while iArg < len(asArgs):
        sArg = asArgs[iArg];
        if sArg[0] == '-' and not fEndOfArgs:
            # Options.
            if sArg == '--':
                fEndOfArgs = True;
            elif sArg in ('--help', '-h', '-?'):
                return usage();
            elif sArg in ('--version', '-V' ):
                print(__version__[__version__.find(':') + 2:-2]);
            elif sArg in ('--output', '-o'):
                iArg += 1;
                if iArg >= len(asArgs):
                    return syntax('Expected filename following "--output"!');
                sOutput = asArgs[iArg];
            else:
                return syntax('Unknown option: %s' % (sArg,));
        else:
            # Input files.
            if sArg[0] == '@':
                with open(sArg[1:], 'r', encoding = 'utf-8') as oFile:
                    asFiles = oFile.read().split();
            else:
                asFiles = glob.glob(sArg);
            if not asFiles:
                return error('File not found: %s' % (sArg,));
            for sFile in asFiles:
                try:
                    with open(sFile, 'r', encoding = 'utf-8') as oFile:
                        sContent = oFile.read();
                except Exception as oXcpt: # pylint: disable=broad-exception-caught
                    return error('Failed to open and read "%s": %s' % (sFile, oXcpt,));
                if isDocbook(sContent):
                    scanDocbookRefentryForIds(dIdToFile, sContent, os.path.splitext(os.path.basename(sFile))[0] + '.dita');
                else:
                    scanDitaFileForIds(dIdToFile, sContent, os.path.basename(sFile));
        iArg += 1;

    # Dump the dictionary.
    asDict = sorted(['%s=%s' % (sKey, sValue) for sKey, sValue in dIdToFile.items()]);
    if sOutput is not None:
        try:
            with open(sOutput, 'w', encoding = 'utf-8') as oFile:
                oFile.write('\n'.join(asDict));
        except Exception as oXcpt: # pylint: disable=broad-exception-caught
            return error('Failed to open and write "%s": %s' % (sFile, oXcpt,));
    else:
        sys.stdout.write('\n'.join(asDict));
    return 0;

if __name__ == "__main__":
    sys.exit(main(sys.argv));