File: ncrystal_ncmat2cpp

package info (click to toggle)
ncrystal 3.4.1%2Bds1-1.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,392 kB
  • sloc: cpp: 37,662; python: 5,801; sh: 90; ansic: 42; makefile: 2
file content (337 lines) | stat: -rwxr-xr-x 14,672 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#!/usr/bin/env python3

################################################################################
##                                                                            ##
##  This file is part of NCrystal (see https://mctools.github.io/ncrystal/)   ##
##                                                                            ##
##  Copyright 2015-2022 NCrystal developers                                   ##
##                                                                            ##
##  Licensed under the Apache License, Version 2.0 (the "License");           ##
##  you may not use this file except in compliance with the License.          ##
##  You may obtain a copy of the License at                                   ##
##                                                                            ##
##      http://www.apache.org/licenses/LICENSE-2.0                            ##
##                                                                            ##
##  Unless required by applicable law or agreed to in writing, software       ##
##  distributed under the License is distributed on an "AS IS" BASIS,         ##
##  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  ##
##  See the License for the specific language governing permissions and       ##
##  limitations under the License.                                            ##
##                                                                            ##
################################################################################

"""

Script which can be used to embed the content of .ncmat files (or actually any
ASCII/UTF8 excoded text files) directly into a C++ library. It does so by
reading the .ncmat files and creating C++ code which keeps the contents of the
files in static strings, and registers those strings with NCrystal, using the
original filename as key. Naturally, those file must be compiled along with the
rest of the C++ library, and the enclosing function must be invoked.

"""

################################################################################################
####### Common code for all NCrystal cmdline scripts needing to import NCrystal modules ########
import sys
pyversion = sys.version_info[0:3]
_minpyversion=(3,6,0)
if pyversion < _minpyversion:
    raise SystemExit('Unsupported python version %i.%i.%i detected (needs %i.%i.%i or later).'%(pyversion+_minpyversion))
import os
import pathlib

def maybeThisIsConda():
    return ( os.environ.get('CONDA_PREFIX',None) or
             os.path.exists(os.path.join(sys.base_prefix, 'conda-meta')) )

def fixSysPathAndImportNCrystal( *, allowfail = False ):
    thisdir = pathlib.Path( __file__ ).parent
    def extract_cmake_pymodloc():
        p = thisdir / 'ncrystal-config'
        if not p.exists():
            return
        with p.open('rt') as fh:
            for i,l in enumerate(fh):
                if i==30:
                    break
                if l.startswith('#CMAKE_RELPATH_TO_PYMOD:'):
                    l = l[24:].strip()
                    return ( thisdir / l ) if l else None
    pml = extract_cmake_pymodloc()
    hack_syspath = pml and ( pml / 'NCrystal' / '__init__.py' ).exists()
    if hack_syspath:
        sys.path.insert(0,str(pml.absolute().resolve()))
    try:
        import NCrystal
    except ImportError:
        if allowfail:
            return
        msg = 'ERROR: Could not import the NCrystal Python module'
        if maybeThisIsConda():
            msg += ' (if using conda it might help to close your terminal and activate your environment again)'
        elif not hack_syspath:
            msg += ' (perhaps your PYTHONPATH is misconfigured)'
        raise SystemExit(msg)
    return NCrystal
################################################################################################
import argparse

def parseArgs():
    descr="""

Script which can be used to embed the content of .ncmat files (or actually any
ASCII/UTF8 excoded text files) directly into a C++ library. It does so by
reading the .ncmat files and creating C++ code which keeps the contents of the
files in static strings, and registers those strings with NCrystal, using the
original filename as key. Naturally, those file must be compiled along with the
rest of the C++ library, and the enclosing function must be invoked.

"""
    parser = argparse.ArgumentParser(description=descr)
    parser.add_argument('FILE', type=str, nargs='+',
                        help="""One or more NCMAT (or other text) files. They will be registered with a key equal to their
                        filename (without preceding directory name), which must
                        therefore be unique in the list""")
    parser.add_argument('--full','-f', action='store_true',
                        help="""Unless this option is provided, all comments and excess whitespace will be
                        stripped from the file data.""")
    parser.add_argument('--validate','-v', action='store_true',
                        help="""If specified input files will be validated by confirming that they can be
                        loaded with NCrystal. For this to work, the NCrystal python module must be
                        available.""")
    parser.add_argument("--name",'-n',default='registerNCMATData',type=str,
                        help="""Name of C++ function to create which must be called in order to register the
                        data with NCrystal. If desired, it can contain namespace(s), e.g. a value of
                        "MyNameSpace::myFunction" will create a function "void myFunction()" in the
                        namespace "MyNameSpace.""")
    parser.add_argument("--regfctname",default='NCrystal::registerInMemoryStaticFileData(const std::string&,const char*)',type=str,
                        help="""Name of C++ function used to register string objects with NCrystal.""")
    parser.add_argument("--include",nargs='+',type=str,action='append',
                        help="""One or more extra include statements for the top of the file. The file
                                NCrystal/NCDefs.hh will always be included by default (prevent this by
                                adding a special entry 'no-ncrystal-includes' to the list).""")
    parser.add_argument("--width",'-w',type=int,default=80,
                        help="""Wrap C++ code at this column width. Ignored when running with --full.""")
    parser.add_argument('--outfile','-o',type=argparse.FileType('w'),default=sys.stdout,
                        help="Name of output file (default: stdout)")

    args=parser.parse_args()
    if not args.name or ' ' in args.name:
        parser.error('Invalid C++ function name provided to --name')

    filepaths = set()
    bns=set()
    for f in set(args.FILE):
        p=pathlib.Path(f)
        if not p.exists():
            parser.error('File not found: %s'%f)
        p=p.resolve().absolute()
        if p in filepaths:
            parser.error('The same file is specified more than once: %s'%p)
        if p.name in bns:
            parser.error('Filenames without directory part is not unique: %s'%f)
        filepaths.add(p)
        bns.add(p.name)
    args.files = list(sorted(filepaths))
    args.FILE=None

    wmin=30
    wmax=999999
    if args.width>wmax:
        args.width=wmax
    if args.width < wmin:
        parser.error('Out of range value of --width (must be at least %i)'%wmin)

    #flatten args.include, so we get single list with 3 elements from:
    #        --inc 'foobla.hh' --inc '<vector>' Bla/Bla.hh
    args.include = [item for sublist in (args.include or []) for item in sublist]

    return args

def files2cppcode(infiles,outfile,
                  cppfunctionname='registerData',
                  compact=True,
                  compactwidth=140,
                  validatefct=None,
                  extra_includes=None,
                  regfctname='NCrystal::registerInMemoryStaticFileData(const std::string&,const char*)' ):
    out=['// Code automatically generated by ncrystal_ncmat2cpp','']

    if 'no-ncrystal-includes' in extra_includes:
        extra_includes = [ e for e in extra_includes if e!='no-ncrystal-includes' ]
    else:
        extra_includes.insert(0,'NCrystal/NCDefs.hh')

    for inc in extra_includes:
        if inc.startswith('#include'):
            out.append(inc)
        elif inc.startswith('<'):
            out.append('#include %s'%inc)
        else:
            out.append('#include "%s"'%inc)
    out.append('')

    large_files = False

    def fwddeclare(out,fctname,args_str=''):
        if not '(' in fctname:
            fctname+='()'
        _ = fctname.split('(',1)[0].split('::')
        namespaces,justname = _[0:-1],_[-1]
        argssignature='('+fctname.split('(',1)[1]
        tmp=''
        for ns in namespaces:
            tmp += 'namespace %s { '%ns
        tmp += 'void %s%s;'%(justname,argssignature)
        tmp += ' }'*len(namespaces)
        out+=[tmp]
        out+=['']

    fwddeclare(out,regfctname)
    if '::' in cppfunctionname:
        fwddeclare(out,cppfunctionname)

    out+=['void %s()'%cppfunctionname,'{']
    prefix='  '
    seen = set()

    for p in [pathlib.Path(f) for f in infiles]:
        print("ncmat2cpp : Processing %s"%p.name)
        fn=p.name
        assert not fn in seen, "ERROR: Multiple files in input named: %s"%fn
        seen.add(fn)
        if validatefct:
            print("Trying to validate: %s"%fn)
            validatefct(p)
            print('  -> OK')

        def fmtline(line):
            if compact:
                ncmatcfg=None
                if 'NCRYSTALMATCFG[' in line:
                    #special case: preserve NCRYSTALMATCFG
                    _=line.split('NCRYSTALMATCFG[',1)[1]
                    assert not 'NCRYSTALMATCFG' in _, "multiple NCRYSTALMATCFG entries in a single line"
                    assert ']' in _, "NCRYSTALMATCFG[ entry without closing ] bracket"
                    ncmatcfg=_.split(']',1)[0].strip()
                    ncmatcfg.encode('utf8')#Just a check
                line=' '.join(line.split('#',1)[0].split())
                line.encode('ascii')#Just a check
                if ncmatcfg:
                    line += '#NCRYSTALMATCFG[%s]'%ncmatcfg
                if not line:
                    return ''
            else:
                line.encode('utf8')#Just a check
            return line.replace('"',r'\"')+r'\n'

        def as_c_str(strdata):
            try:
                strdata.encode('ascii')
                return '"%s"'%strdata
            except UnicodeEncodeError:
                pass
            try:
                strdata.encode('utf8')
                return 'u8"%s"'%strdata
            except UnicodeEncodeError:
                raise SystemExit('Invalid encoding encountered in input (must be ASCII or UTF8)')

        lines = list(p.read_text().splitlines())
        assert lines,"file was empty: %s"%fn

        #string literals have a limit of 65K in the standard. For such large
        #files we must embed contents in const std::array<std::uint8_t, 12>
        #arrays.
        is_large = ( sum(len(as_c_str(fmtline(line))) for line in lines) > 60000 )

        if is_large:
            large_files=True

        out+= [prefix+"{"]
        out+= [prefix+"  // File %s%s%s"%(fn,
                                          (' (compact form without comments)' if compact else ''),
                                          (' (too large for string literals)' if is_large else ''))]

        count_all_entries = [0]
        if is_large:
            def as_ints(strdata):
                _=list(str(e) for e in strdata.replace(r'\n','\n').encode('utf8'))
                count_all_entries[0] += len(_)
                return ','.join(_)
            linepattern = lambda strdata : '    %s,'%as_ints(strdata)
        else:
            linepattern = lambda strdata : '    %s'%as_c_str(strdata)

        fix_nentries_iout = len(out)

        if is_large:
            out += [ prefix+'  static const std::array<std::uint8_t, {{NENTRIESHERE}}> rawdata {']
        else:
            out += [ prefix+'  const char * textdata =']
        if not compact:
            for line in lines:
                out+= [prefix+linepattern(fmtline(line))]
        else:
            alldata=''
            for line in lines:
                alldata += fmtline(line)
            while alldata:
                n = max(10,compactwidth-(len(linepattern(''))))
                if is_large:
                    #Rather approximate, each character takes 2-3 characters in the file:
                    n//=2
                if alldata[n-1:n+1]==r'\n':
                    n+=1#dont break up '\n' entries
                out += [ prefix+linepattern(alldata[0:n])]
                alldata = alldata[n:]
        if is_large:
            out[fix_nentries_iout] = out[fix_nentries_iout].replace('{{NENTRIESHERE}}',str(count_all_entries[0]))
            if out[-1].endswith(','):
                out[-1] = out[-1][0:-1]
            out[-1]+='};'
            out += ['    const char * textdata = (const char*)(&rawdata[0]);']
        else:
            out[-1]+=';'
        out+= [prefix+"  ::%s(\"%s\",textdata);"%(regfctname.split('(')[0],fn)]
        out+= [prefix+"}"]
    out+= ['}','']

    if large_files:
        out.insert(1,'#include <cstdint>')
        out.insert(1,'#include <array>')

    out = '\n'.join(out)
    if hasattr(outfile,'write'):
        outfile.write(out)
        if hasattr(outfile,'name'):
            print('Wrote: %s'%outfile.name)
    else:
        with pathlib.Path(outfile).open('wt') as fh:
            fh.write(out)
        print('Wrote: %s'%outfile)

def main():
    args=parseArgs()

    validatefct=None
    if args.validate:
        nc = fixSysPathAndImportNCrystal( allowfail = True )
        if not nc:
            raise SystemExit("ERROR: Could not import the NCrystal Python module (this is required"
                             " when running with --validate). If it is installed, make sure your"
                             " PYTHONPATH is setup correctly.")
        validatefct = lambda filename : nc.createInfo('%s;inelas=sterile'%filename)

    files2cppcode( args.files,
                   outfile = args.outfile,
                   cppfunctionname = args.name,
                   compact = not args.full,
                   compactwidth = args.width,
                   extra_includes = args.include,
                   validatefct = validatefct,
                   regfctname = args.regfctname )

if __name__=='__main__':
    main()