1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
|
#!/usr/bin/env python
#
# Copyright (c) 2014
# Yoshitaro Makise
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
import io
import os
import subprocess
import sys
import re
import string
import optparse
import pygments.lexers
from pygments.token import Token
UNIVERSAL_CTAGS = ""
# In most cases, lexers can be looked up with lowercase form of formal
# language names. This dictionary defines exceptions.
LANGUAGE_ALIASES = {
'fantom': 'fan',
'haxe': 'haXe',
'sourcepawn': 'sp',
'typescript': 'ts',
'xbase': 'XBase'
}
# All punctuation characters except some characters which are valid
# identifier characters in some languages
if sys.version_info < (3,):
PUNCTUATION_CHARACTERS = string.punctuation.translate(None, '-_.')
else:
PUNCTUATION_CHARACTERS = string.punctuation.translate(str.maketrans('', '', '-_.'))
CLOSEFDS = sys.platform != 'win32';
TERMINATOR = '###terminator###\n'
class ParserOptions:
def __init__(self):
self.strip_punctuation = False
class PygmentsParser:
class ContentParser:
def __init__(self, path, text, lexer, options):
self.path = path
self.text = text
self.lexer = lexer
self.options = options
self.lines_index = None
def parse(self):
self.lines_index = self.build_lines_index(self.text)
tokens = self.lexer.get_tokens_unprocessed(self.text)
return self.parse_tokens(tokens)
# builds index of beginning of line
def build_lines_index(self, text):
lines_index = []
cur = 0
while True:
i = text.find('\n', cur)
if i == -1:
break
cur = i + 1
lines_index.append(cur)
lines_index.append(len(text)) # sentinel
return lines_index
def parse_tokens(self, tokens):
result = {}
cur_line = 0
for index, tokentype, tag in tokens:
if tokentype in Token.Name:
# we can assume index are delivered in ascending order
while self.lines_index[cur_line] <= index:
cur_line += 1
tag = re.sub(r'\s+', '', tag) # remove newline and spaces
if self.options.strip_punctuation:
tag = tag.strip(PUNCTUATION_CHARACTERS)
if tag:
result[(False, tag, cur_line + 1)] = ''
return result
def __init__(self, langmap, options):
self.langmap = langmap
self.options = options
def parse(self, path):
lexer = self.get_lexer_by_langmap(path)
if lexer:
text = self.read_file(path)
if text:
parser = self.ContentParser(path, text, lexer, self.options)
return parser.parse()
return {}
def get_lexer_by_langmap(self, path):
ext = os.path.splitext(path)[1]
if sys.platform == 'win32':
lang = self.langmap.get(ext.lower(), None)
else:
lang = self.langmap.get(ext, None)
if lang:
name = lang.lower()
if name in LANGUAGE_ALIASES:
name = LANGUAGE_ALIASES[name]
lexer = pygments.lexers.get_lexer_by_name(name)
return lexer
return None
def read_file(self, path):
try:
if sys.version_info < (3,):
with open(path, 'r') as f:
text = f.read()
return text
else:
with open(path, 'r', encoding='latin1') as f:
text = f.read()
return text
except Exception as e:
print(e, file=sys.stderr)
return None
class CtagsParser:
def __init__(self, ctags_command, options):
self.process = subprocess.Popen([ctags_command, '-xu', '--tag-relative=no', '--filter', '--filter-terminator=' + TERMINATOR, '--format=1'], bufsize=-1,
stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=CLOSEFDS,
universal_newlines=True)
if sys.version_info < (3,):
self.child_stdout = self.process.stdout
else:
self.child_stdout = io.TextIOWrapper(self.process.stdout.buffer, encoding='latin1')
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='latin1')
self.child_stdin = self.process.stdin
self.options = options
def parse(self, path):
print(path, file=self.child_stdin)
self.child_stdin.flush()
result = {}
while True:
line = self.child_stdout.readline()
if not line or line.startswith(TERMINATOR):
break
match = re.search(r'(\S+)\s+(\d+)\s+' + re.escape(path) + r'\s+(.*)$', line)
if match:
(tag, lnum, image) = match.groups()
if self.options.strip_punctuation:
tag = tag.strip(PUNCTUATION_CHARACTERS)
if tag:
result[(True, tag, int(lnum))] = image
return result
class MergingParser:
def __init__(self, def_parser, ref_parser):
self.def_parser = def_parser
self.ref_parser = ref_parser
pass
def parse(self, path):
def_result = self.def_parser.parse(path)
ref_result = self.ref_parser.parse(path)
result = def_result.copy()
result.update(ref_result)
for (isdef, tag, lnum) in def_result:
ref_entry = (False, tag, lnum)
if ref_entry in ref_result:
del result[ref_entry]
return result
def parse_langmap(string):
langmap = {}
mappings = string.split(',')
for mapping in mappings:
lang, exts = mapping.split(':')
if not lang[0].islower(): # skip lowercase, that is for builtin parser
for ext in exts.split('.'):
if ext:
if sys.platform == 'win32':
langmap['.' + ext.lower()] = lang
else:
langmap['.' + ext] = lang
return langmap
def handle_requests(langmap, options):
# Update ctags's path from the configuration file
global UNIVERSAL_CTAGS
path = load_ctags_path()
if path != '':
UNIVERSAL_CTAGS = path
if UNIVERSAL_CTAGS != '' and UNIVERSAL_CTAGS != 'no':
pygments_parser = PygmentsParser(langmap, options)
try:
ctags_parser = CtagsParser(UNIVERSAL_CTAGS, options)
parser = MergingParser(ctags_parser, pygments_parser)
except Exception as e:
parser = pygments_parser
else:
parser = PygmentsParser(langmap, options)
while True:
path = sys.stdin.readline()
if not path:
break
path = path.rstrip()
tags = parser.parse(path)
for (isdef, tag, lnum),image in tags.items():
if isdef:
typ = 'D'
else:
typ = 'R'
print(typ, tag, lnum, path, image)
print(TERMINATOR, end='')
sys.stdout.flush()
def get_parser_options_from_env(parser_options):
env = os.getenv('GTAGSPYGMENTSOPTS')
if env:
for s in env.split(','):
s = s.strip()
if s == 'strippunctuation':
parser_options.strip_punctuation = True
def load_ctags_path():
path = ''
p = subprocess.Popen("gtags --config=ctagscom", shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if p.wait() == 0:
if sys.platform == 'win32' and sys.version_info >= (3,):
path = io.TextIOWrapper(p.stdout, encoding='latin1').readline().rstrip()
else:
path = p.stdout.readline().rstrip().decode('latin1')
return path
def main():
opt_parser = optparse.OptionParser()
opt_parser.add_option('--langmap', dest='langmap')
(options, args) = opt_parser.parse_args()
if not options.langmap:
opt_parser.error('--langmap option not given')
langmap = parse_langmap(options.langmap)
parser_options = ParserOptions()
get_parser_options_from_env(parser_options)
handle_requests(langmap, parser_options)
if __name__ == '__main__':
main()
|