File: jsExtractFunctions.py

package info (click to toggle)
minexpert2 9.6.0-1
links: PTS, VCS
area: main
in suites: trixie
size: 65,916 kB
sloc: cpp: 27,271; javascript: 22,140; xml: 8,066; python: 214; makefile: 91; sh: 86
file content (332 lines) | stat: -rw-r--r-- 11,804 bytes
parent folder | download | duplicates (3)
import os
import sys
import argparse
import re
import shutil
import subprocess
import shlex
import copy

def checkFileJSMarkup(fileName):

    # print("Checking JS markup for fileName: " + fileName);

    if not os.path.exists(fileName):
        print("File " + fileName + " was not found\n");
        return;

    # We want to iterate in the file  in search for the JS markup
    # that is in the form
    #
    # /*/js/ Class: MassSpectrum
    #  */

    foundDocTag = False;
    openCommentTag = False;

    tagName = "";
    className = "";
    commentLines = [ ];

    with open(fileName) as fileHandle:
        line = fileHandle.readline()
        # print("line: " + line);

        while line:
            # print("line: " + line);

            # Check if there is that JS comment
            regexp = re.compile("^\s*/\*/js/\s*([A-Z][a-z]+):\s*([A-Za-z]+)$");
            match = regexp.match(line);

            if match:
                tagName = match.group(1);
                # print("tagName: "+ tagName);
                className = match.group(2);
                # print("className: "+ className);

                # Let us know that we found the JS doc tag.
                foundDocTag = True;

                # Read one more line, because we may have a comment line
                line = fileHandle.readline();
                continue;

            if foundDocTag:
                # We had already found the first line, let's see if there is
                # a comment associated to it.

                # If the comment is all in one single line:

                regexp = re.compile("^\s*\*\s*<comment>\s*(.*)</comment>\s*$");
                match = regexp.match(line);

                if match:
                    commentLines.append(match.group(1));

                    # Because the comment was contained in a single line, we
                    # have effectively finished parsing the JS doc tag.

                    break;

                # The comment is distributed over multiple lines?

                # Note that we capture the spaces before <comment> and the
                # spaces after <comment>.
                regexp = re.compile("^\s*\**(\s*)<comment>(\s*.*\n)");
                match = regexp.match(line);

                if match:
                    # Ok, there was the beginning of a multiline comment.
                    # We need to reconstitute the spaces before and after
                    # <comment> such that the text alignment is ok with the rest
                    # of the lines in the <comment> element.
                    commentLines.append(match.group(1) + match.group(2));

                    # We need to know that we are inside a <comment> element.
                    openCommentTag = True;

                    # Read up a new line, we are looking for the </comment> tag.
                    line = fileHandle.readline();
                    continue;

                # We are actually closing the comment element ? Note how we
                # capture the spaces that precede the text in front of
                # </comment>, so as to maintain the text alignment.
                regexp = re.compile("^\s*\**(\s*.*)</comment>\s*$");
                match = regexp.match(line);

                if match:
                    # Ok, there was the closing </comment> tag preceded or not
                    # by some text.
                    commentLines.append(match.group(1));

                    # Because we have finally closed the comment, we have
                    # effectively finished parsing the JS doc tag.

                    break;

                if openCommentTag:

                    # At this point we are inside a <comment></comment> but a line
                    # that has neither opening/closing element tag. The comment is
                    # more than two-lines, probably. Append simply the line to the
                    # commentLines list.

                    # We need to remove the * character from the start of the
                    # line, if there is one. Note how we
                    # capture the spaces that precede the text in front of
                    # </comment>, so as to maintain the text alignment.

                    regexp = re.compile("^\s*\**(\s*.*$\n)");
                    match = regexp.match(line);
                    commentLines.append(match.group(1));

                    # Go on to the next line
                    line = fileHandle.readline();
                    continue;

            # Just get a new line.
            line = fileHandle.readline();

    theComment = "".join(commentLines);
    theComment = theComment.replace("<", "&lt;");
    theComment = theComment.replace(">", "&gt;");

    # print("tagName: " + tagName);
    # print("className: " + className);
    # print("theComment: " + theComment);

    return ([tagName, className], theComment);


def extractJSMarkup(fileName):

    print("extractJSMarkup -- fileName: " + fileName);

    if not os.path.exists(fileName):
        print("File " + fileName + " was not found\n");
        return;
    else:
        print("\nRunning extractJSMarkup on file:" + fileName + "\n");


    # We want to iterate in the file in search for the JS markup
    # that is in the form
    #
    # /*/js/
    # * help text
    # */

    stanzas = [ ];

    with open(fileName) as fileHandle:
        line = fileHandle.readline()
        print("In the with loop -- line: " + line);

        startTagFound = False;
        lines = [ ];

        while line:
            print("In the while loop -- line: " + line);

            if startTagFound == False:
                # If we are outside of a js doc stanza, then look for an opening
                # line.
                # Trying to catch this line : <spaces>/*/js/<spaces>
                # that is the opening line of a JS-related doc stanza.
                #regexp = re.compile("^\s*/\*/js/\s*$");
                regexp = re.compile("^\s*/\*/js/\s*([A-Z][a-z]+):\s*([A-Za-z]+)$");
                match = regexp.match(line);
                if match == None:
                    # We are not opening a js doc stanza, that is a normal code
                    # line. Continue to next line.
                    line = fileHandle.readline();
                    continue;
                else:
                    # We just found the stanza start line (<startofline><spaces>/*/js/<endofline>)
                    # There is nothing to do unless setting the boolean value to true
                    # that we found the start tag and then go on to next line
                    # print("Starting a new stanza.\n");
                    startTagFound = True;
                    line = fileHandle.readline();
                    continue;
                # At next iterations, we'll store the lines encountered until
                    # the end of the stanza line is found, that is,
                    # <startofline><spaces>*/<endofline>.
            else:
                # We are inside of a js doc stanza because startTagFound is
                # True.

                # We should first check if we are closing the stanza
                # Trying to catch this line: <spaces>*/<spaces>
                regexp = re.compile("^\s*\*/\s*$");
                match = regexp.match(line);
                if match == None:
                    # We are not closing the stanza, so we must be
                    # iterating in a line that is part of a stanza:
                    # simply append it to the lines
                   
                    line = line.replace("<", "&lt;");
                    line = line.replace(">", "&gt;");
 
                    lines.append(line);
                    print("Appending new doc line " + line);
                    line = fileHandle.readline();
                    continue;
                else:
                    # We are closing the stanza, so append the stanza we have
                    # been crafting to the lists of stanzae.
                    print("Closing a stanza that has " + str(len(lines)) + " lines\n");

                    # for debugging purposes:
                    print("".join(lines));
                    # for docLine in lines:
                        # print(docLine);
                    print("\n\n");

                    stanzas.append(lines[:]);

                    # Now that we have made the copy we can empty the temporary
                    # lines list.
                    del lines[:];

                    # Now start a new round of stanza searching:
                    startTagFound = False;
                    line = fileHandle.readline();
                    continue;

    # We are now at the end of the file, so make some checks

    if startTagFound == True:
        print("Error, a JS doc stanza was not closed properly\n");

    # Finally return the stanzas that we could read in the file.
    return stanzas;


def listAllJsDocFiles():

    dirList = [ "src/nongui", "src/nongui/js" "src/gui" ];

    for curDir in dirList:
        for root, dirs, files in os.walk(curDir):
            for file in files:

                if file.endswith(('.hpp', '.cpp')):
                    filePath = os.path.join(root,file);

                    if not os.path.exists(filePath):
                        print("File " + filePath + " was not found\n");

                    # names is a list of two strings: tagName and className
                    names,comment = checkFileJSMarkup(filePath);

                    if names[0]:
                        print(filePath);
                        print(": ".join(names));
                        print(comment);


def processFile(fileName, stanzaListList, classList, tagList, commentList):

    # print("fileName: " + fileName);

    if not os.path.exists(fileName):
        print("File " + fileName + " was not found\n");
        return;

    # names is a list of two strings: tagName and className
    names,comment = checkFileJSMarkup(fileName);

    # print("names:" + str(names));
    # print("comment line(s) start" + comment + " end");

    tagName = names[0];
    className = names[1];

    if not tagName:
        print("File " + filePath + " does not contain JS doc\n");
        return ;

    # print("Processing file: " + filePath + "\n");

    stanzaList = extractJSMarkup(fileName);

    if len(stanzaList) > 0:

        # print("New list of stanzae with " + str(len(stanzaList)) + " stanzae\n");

        # Check if the class name was found already.

        if not className in classList:
            # print(className + " was encountered for first time\n");
            tagList.append(tagName);
            classList.append(className);

            # The comment might be empty
            if not comment:
                commentList.append("");
            else:
                commentList.append(comment);

            stanzaListList.append(stanzaList[:]);
            # print("After appending new stanza list to empty list, list has "
                # + str(len(stanzaList)) + " items\n");
        else:
            # print(className + " was found already\n");
            index = classList.index(className);
            # print("Prior to extending class stanza list, list has "
                # + str(len(stanzaListList[index])) + " items\n");
            stanzaListList[index].extend(stanzaList[:]);
            # print("After extending class stanza list, list has "
                # + str(len(stanzaListList[index])) + " items\n");

        # print("Appending new stanzaList:\n");
        # for stanza in stanzaList:
          # print("Begin Stanza:\n\n" + "".join(stanza) + "End Stanza\n\n");

        # Finally empty the stanzaList for next round.
        del stanzaList[:];