1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
//
// C++ Implementation: languageinfer
//
// Description:
//
//
// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2006
//
// Copyright: See COPYING file that comes with this distribution
//
//
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "languageinfer.h"
#include "fileutil.h"
#include <boost/algorithm/string.hpp>
#include <boost/regex.hpp>
#include <vector>
using namespace std;
namespace srchilite {
LanguageInfer::LanguageInfer() {
}
LanguageInfer::~LanguageInfer() {
}
const string LanguageInfer::infer(const string &filename) {
istream *stream = open_file_istream_or_error(filename);
string result = infer(*stream);
delete stream;
return result;
}
const string guessEmacsMode(const string &modeline) {
vector<string> strv;
boost::split(strv, modeline, boost::is_any_of(";"));
for (vector<string>::iterator it = strv.begin(); it != strv.end(); ++it) {
boost::trim(*it);
vector<string> tokens;
boost::split(tokens, *it, boost::is_any_of(":"));
boost::trim(tokens[0]);
// A single token is considered a language definition
if (tokens.size() == 1)
return tokens[0];
// otherwise, look for -*- mode: lang -*-
boost::trim(tokens[1]);
if (tokens[0] == "mode")
return tokens[1];
}
return "";
}
const string LanguageInfer::infer(istream &stream) {
// the regular expression for finding the language specification in a script file
// this such as #! /bin/bash
static boost::regex
langRegEx(
"#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*([[:alnum:]]+)");
// the regular expression for finding the language specification in a script file
// this such as #! /usr/bin/env perl
static boost::regex
langEnvRegEx(
"#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*(?:env)[[:blank:]]+([[:alnum:]]+)");
// the regular expression for finding the language specification in a script file
// according to Emacs convention: # -*- language -*-
static boost::regex
langRegExEmacs("-\\*-[[:blank:]]*([[:print:]]+).*-\\*-");
// the Emacs specification has the precedence in order to correctly infer
// that scripts of the shape
// #!/bin/sh
// # -*- tcl -*-
// are Tcl scripts and not shell scripts
// the regular expression for scripts starting with <?...
// such as xml and php
static boost::regex langXMLLikeScripts("<\\?([[:alnum:]]+)");
// the regular expression for <!DOCTYPE
static boost::regex langDocType("<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]");
string firstLine;
string secondLine;
// read only the first line of the input
read_line(&stream, firstLine);
// and the second line
read_line(&stream, secondLine);
boost::match_results<std::string::const_iterator> what;
boost::match_results<std::string::const_iterator> whatEnv;
boost::match_results<std::string::const_iterator> whatEmacs;
// first try the emacs specification
boost::regex_search(secondLine, whatEmacs, langRegExEmacs,
boost::match_default);
if (whatEmacs[1].matched) {
string guess = guessEmacsMode(whatEmacs[1]);
if (guess != "")
return guess;
}
// try also on the first line
boost::regex_search(firstLine, whatEmacs, langRegExEmacs,
boost::match_default);
if (whatEmacs[1].matched) {
string guess = guessEmacsMode(whatEmacs[1]);
if (guess != "")
return guess;
}
// try also the env specification
boost::regex_search(firstLine, whatEnv, langEnvRegEx, boost::match_default);
if (whatEnv[1].matched)
return whatEnv[1];
// try the sha-bang specification
boost::regex_search(firstLine, what, langRegEx, boost::match_default);
if (what[1].matched)
return what[1];
// the xml like starting scripts
boost::regex_search(firstLine, what, langXMLLikeScripts,
boost::match_default);
if (what[1].matched)
return what[1];
// the doctype case
boost::regex_search(firstLine, what, langDocType,
boost::match_default);
if (what[0].matched)
return "xml";
return "";
}
}
|