File: languageinfer.cpp

package info (click to toggle)
source-highlight 3.1.7-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 10,332 kB
  • ctags: 5,233
  • sloc: sh: 11,270; cpp: 10,206; ansic: 9,515; makefile: 1,865; lex: 1,200; yacc: 1,021; php: 213; perl: 211; awk: 98; erlang: 94; lisp: 90; java: 75; ruby: 69; python: 61; asm: 43; ml: 38; ada: 36; haskell: 27; xml: 23; cs: 11; sql: 8; tcl: 6; sed: 4
file content (128 lines) | stat: -rw-r--r-- 3,488 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
//
// C++ Implementation: languageinfer
//
// Description:
//
//
// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2006
//
// Copyright: See COPYING file that comes with this distribution
//
//

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "languageinfer.h"
#include "fileutil.h"

#include <boost/regex.hpp>

using namespace std;

namespace srchilite {

LanguageInfer::LanguageInfer() {
}

LanguageInfer::~LanguageInfer() {
}

const string LanguageInfer::infer(const string &filename) {
    istream *stream = open_file_istream_or_error(filename);

    string result = infer(*stream);

    delete stream;

    return result;
}

const string LanguageInfer::infer(istream &stream) {
    // the regular expression for finding the language specification in a script file
    // this such as #! /bin/bash
    static boost::regex
            langRegEx(
                    "#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*([[:alnum:]]+)");

    // the regular expression for finding the language specification in a script file
    // this such as #! /usr/bin/env perl
    static boost::regex
            langEnvRegEx(
                    "#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*(?:env)[[:blank:]]+([[:alnum:]]+)");

    // the regular expression for finding the language specification in a script file
    // according to Emacs convention: # -*- language -*-
    static boost::regex
            langRegExEmacs("-\\*-[[:blank:]]*([[:alnum:]]+).*-\\*-");

    // the Emacs specification has the precedence in order to correctly infer
    // that scripts of the shape
    // #!/bin/sh
    // #  -*- tcl -*-
    // are Tcl scripts and not shell scripts

    // the regular expression for scripts starting with <?...
    // such as xml and php
    static boost::regex langXMLLikeScripts("<\\?([[:alnum:]]+)");

    // the regular expression for <!DOCTYPE
    static boost::regex langDocType("<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]");

    string firstLine;
    string secondLine;

    // read only the first line of the input
    read_line(&stream, firstLine);
    // and the second line
    read_line(&stream, secondLine);

    boost::match_results<std::string::const_iterator> what;
    boost::match_results<std::string::const_iterator> whatEnv;
    boost::match_results<std::string::const_iterator> whatEmacs;

    // first try the emacs specification
    boost::regex_search(secondLine, whatEmacs, langRegExEmacs,
            boost::match_default);

    if (whatEmacs[1].matched)
        return whatEmacs[1];
    else {
        // try also on the first line
        boost::regex_search(firstLine, whatEmacs, langRegExEmacs,
                boost::match_default);
        if (whatEmacs[1].matched)
            return whatEmacs[1];
    }

    // try also the env specification
    boost::regex_search(firstLine, whatEnv, langEnvRegEx, boost::match_default);

    if (whatEnv[1].matched)
        return whatEnv[1];

    // try the sha-bang specification
    boost::regex_search(firstLine, what, langRegEx, boost::match_default);

    if (what[1].matched)
        return what[1];

    // the xml like starting scripts
    boost::regex_search(firstLine, what, langXMLLikeScripts,
            boost::match_default);

    if (what[1].matched)
        return what[1];

    // the doctype case
    boost::regex_search(firstLine, what, langDocType,
            boost::match_default);

    if (what[0].matched)
        return "xml";

    return "";
}

}