1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
|
# coding: utf-8
require 'posix/spawn'
require 'yajl'
require 'timeout'
require 'logger'
require 'time'
# Error class
class MentosError < IOError
end
# Pygments provides access to the Pygments library via a pipe and a long-running
# Python process.
module Pygments
module Popen
include POSIX::Spawn
extend self
# Get things started by opening a pipe to mentos (the freshmaker), a
# Python process that talks to the Pygments library. We'll talk back and
# forth across this pipe.
def start #(pygments_path = File.expand_path('../../../vendor/pygments-main/', __FILE__))
is_windows = RUBY_PLATFORM =~ /mswin|mingw/
begin
@log = Logger.new(ENV['MENTOS_LOG'] ||= is_windows ? 'NUL:' : '/dev/null')
@log.level = Logger::INFO
@log.datetime_format = "%Y-%m-%d %H:%M "
rescue
@log = Logger.new(is_windows ? 'NUL:' : '/dev/null')
end
# in debian we have python-pygments installed
#ENV['PYGMENTS_PATH'] = pygments_path
# Make sure we kill off the child when we're done
at_exit { stop "Exiting" }
# A pipe to the mentos python process. #popen4 gives us
# the pid and three IO objects to write and read.
script = "#{File.expand_path('../mentos.py', __FILE__)}"
script = "python " + script if is_windows
@pid, @in, @out, @err = popen4(script)
@log.info "[#{Time.now.iso8601}] Starting pid #{@pid.to_s} with fd #{@out.to_i.to_s}."
end
# Cross platform which command
# from http://stackoverflow.com/a/5471032/284795
def which(command)
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
ENV['PATH'].split(File::PATH_SEPARATOR).each do |dir|
exts.each { |ext|
path = File.join(dir, "#{command}#{ext}")
return path if File.executable?(path) && !File.directory?(path)
}
end
return nil
end
# Stop the child process by issuing a kill -9.
#
# We then call waitpid() with the pid, which waits for that particular
# child and reaps it.
#
# kill() can set errno to ESRCH if, for some reason, the file
# is gone; regardless the final outcome of this method
# will be to set our @pid variable to nil.
#
# Technically, kill() can also fail with EPERM or EINVAL (wherein
# the signal isn't sent); but we have permissions, and
# we're not doing anything invalid here.
def stop(reason)
if @pid
begin
Process.kill('KILL', @pid)
Process.waitpid(@pid)
rescue Errno::ESRCH, Errno::ECHILD
end
end
@log.info "[#{Time.now.iso8601}] Killing pid: #{@pid.to_s}. Reason: #{reason}"
@pid = nil
end
# Check for a @pid variable, and then hit `kill -0` with the pid to
# check if the pid is still in the process table. If this function
# gives us an ENOENT or ESRCH, we can also safely return false (no process
# to worry about). Defensively, if EPERM is raised, in a odd/rare
# dying process situation (e.g., mentos is checking on the pid of a dead
# process and the pid has already been re-used) we'll want to raise
# that as a more informative Mentos exception.
#
# Returns true if the child is alive.
def alive?
return true if @pid && Process.kill(0, @pid)
false
rescue Errno::ENOENT, Errno::ESRCH
false
rescue Errno::EPERM
raise MentosError, "EPERM checking if child process is alive."
end
# Public: Get an array of available Pygments formatters
#
# Returns an array of formatters.
def formatters
mentos(:get_all_formatters).inject(Hash.new) do | hash, (name, desc, aliases) |
# Remove the long-winded and repetitive 'Formatter' suffix
name.sub!(/Formatter$/, '')
hash[name] = {
:name => name,
:description => desc,
:aliases => aliases
}
hash
end
end
# Public: Get all lexers from a serialized array. This avoids needing to spawn
# mentos when it's not really needed (e.g,. one-off jobs, loading the Rails env, etc).
#
# Should be preferred to #lexers!
#
# Returns an array of lexers
def lexers
begin
# use upstream file path during build, use shipped file once installed
upstream_file = File.expand_path('../../../lexers', __FILE__)
debian_file = '/usr/share/ruby-pygments.rb/lexers'
lexer_file = File.exists?(debian_file) ? debian_file : upstream_file
raw = File.open(lexer_file, "rb").read
Marshal.load(raw)
rescue Errno::ENOENT
raise MentosError, "Error loading lexer file. Was it created and vendored?"
end
end
# Public: Get back all available lexers from mentos itself
#
# Returns an array of lexers
def lexers!
mentos(:get_all_lexers).inject(Hash.new) do |hash, lxr|
name = lxr[0]
hash[name] = {
:name => name,
:aliases => lxr[1],
:filenames => lxr[2],
:mimetypes => lxr[3]
}
#hash["dasm16"] = {:name=>"dasm16", :aliases=>["DASM16"], :filenames=>["*.dasm16", "*.dasm"], :mimetypes=>['text/x-dasm16']}
#hash["Puppet"] = {:name=>"Puppet", :aliases=>["puppet"], :filenames=>["*.pp"], :mimetypes=>[]}
#hash["Augeas"] = {:name=>"Augeas", :aliases=>["augeas"], :filenames=>["*.aug"], :mimetypes=>[]}
#hash["TOML"] = {:name=>"TOML", :aliases=>["toml"], :filenames=>["*.toml"], :mimetypes=>[]}
#hash["Slash"] = {:name=>"Slash", :aliases=>["slash"], :filenames=>["*.sl"], :mimetypes=>[]}
hash
end
end
# Public: Return an array of all available filters
def filters
mentos(:get_all_filters)
end
# Public: Return an array of all available styles
def styles
mentos(:get_all_styles)
end
# Public: Return css for highlighted code
def css(klass='', opts={})
if klass.is_a?(Hash)
opts = klass
klass = ''
end
mentos(:css, ['html', klass], opts)
end
# Public: Return the name of a lexer.
def lexer_name_for(*args)
# Pop off the last arg if it's a hash, which becomes our opts
if args.last.is_a?(Hash)
opts = args.pop
else
opts = {}
end
if args.last.is_a?(String)
code = args.pop
else
code = nil
end
mentos(:lexer_name_for, args, opts, code)
end
# Public: Highlight code.
#
# Takes a first-position argument of the code to be highlighted, and a
# second-position hash of various arguments specifiying highlighting properties.
def highlight(code, opts={})
# If the caller didn't give us any code, we have nothing to do,
# so return right away.
return code if code.nil? || code.empty?
# Callers pass along options in the hash
opts[:options] ||= {}
# Default to utf-8 for the output encoding, if not given.
opts[:options][:outencoding] ||= 'utf-8'
# Get back the string from mentos and force encoding if we can
str = mentos(:highlight, nil, opts, code)
str.force_encoding(opts[:options][:outencoding]) if str.respond_to?(:force_encoding)
str
end
private
# Our 'rpc'-ish request to mentos. Requires a method name, and then optional
# args, kwargs, code.
def mentos(method, args=[], kwargs={}, original_code=nil)
# Open the pipe if necessary
start unless alive?
begin
# Timeout requests that take too long.
# Invalid MENTOS_TIMEOUT results in just using default.
timeout_time = Integer(ENV["MENTOS_TIMEOUT"]) rescue 8
Timeout::timeout(timeout_time) do
# For sanity checking on both sides of the pipe when highlighting, we prepend and
# append an id. mentos checks that these are 8 character ids and that they match.
# It then returns the id's back to Rubyland.
id = (0...8).map{65.+(rand(25)).chr}.join
code = add_ids(original_code, id) if original_code
# Add metadata to the header and generate it.
if code
bytesize = code.bytesize
else
bytesize = 0
end
kwargs.freeze
kwargs = kwargs.merge("fd" => @out.to_i, "id" => id, "bytes" => bytesize)
out_header = Yajl.dump(:method => method, :args => args, :kwargs => kwargs)
# Get the size of the header itself and write that.
bits = get_fixed_bits_from_header(out_header)
@in.write(bits)
# mentos is now waiting for the header, and, potentially, code.
write_data(out_header, code)
# mentos will now return data to us. First it sends the header.
header = get_header
# Now handle the header, any read any more data required.
res = handle_header_and_return(header, id)
# Finally, return what we got.
return_result(res, method)
end
rescue Timeout::Error
# If we timeout, we need to clear out the pipe and start over.
@log.error "[#{Time.now.iso8601}] Timeout on a mentos #{method} call"
stop "Timeout on mentos #{method} call."
end
rescue Errno::EPIPE, EOFError
stop "EPIPE"
raise MentosError, "EPIPE"
end
# Based on the header we receive, determine if we need
# to read more bytes, and read those bytes if necessary.
#
# Then, do a sanity check wih the ids.
#
# Returns a result — either highlighted text or metadata.
def handle_header_and_return(header, id)
if header
header = header_to_json(header)
bytes = header["bytes"]
# Read more bytes (the actual response body)
res = @out.read(bytes.to_i)
if header["method"] == "highlight"
# Make sure we have a result back; else consider this an error.
if res.nil?
@log.warn "[#{Time.now.iso8601}] No highlight result back from mentos."
stop "No highlight result back from mentos."
raise MentosError, "No highlight result back from mentos."
end
# Remove the newline from Python
res = res[0..-2]
@log.info "[#{Time.now.iso8601}] Highlight in process."
# Get the id's
start_id = res[0..7]
end_id = res[-8..-1]
# Sanity check.
if not (start_id == id and end_id == id)
@log.error "[#{Time.now.iso8601}] ID's did not match. Aborting."
stop "ID's did not match. Aborting."
raise MentosError, "ID's did not match. Aborting."
else
# We're good. Remove the padding
res = res[10..-11]
@log.info "[#{Time.now.iso8601}] Highlighting complete."
res
end
end
res
else
@log.error "[#{Time.now.iso8601}] No header data back."
stop "No header data back."
raise MentosError, "No header received back."
end
end
# With the code, prepend the id (with two spaces to avoid escaping weirdness if
# the following text starts with a slash (like terminal code), and append the
# id, with two padding also. This means we are sending over the 8 characters +
# code + 8 characters.
def add_ids(code, id)
code.freeze
code = id + " #{code} #{id}"
code
end
# Write data to mentos, the Python Process.
#
# Returns nothing.
def write_data(out_header, code=nil)
@in.write(out_header)
@log.info "[#{Time.now.iso8601}] Out header: #{out_header.to_s}"
@in.write(code) if code
end
# Sanity check for size (32-arity of 0's and 1's)
def size_check(size)
size_regex = /[0-1]{32}/
if size_regex.match(size)
true
else
false
end
end
# Read the header via the pipe.
#
# Returns a header.
def get_header
begin
size = @out.read(33)
size = size[0..-2]
# Sanity check the size
if not size_check(size)
@log.error "[#{Time.now.iso8601}] Size returned from mentos.py invalid."
stop "Size returned from mentos.py invalid."
raise MentosError, "Size returned from mentos.py invalid."
end
# Read the amount of bytes we should be expecting. We first
# convert the string of bits into an integer.
header_bytes = size.to_s.to_i(2) + 1
@log.info "[#{Time.now.iso8601}] Size in: #{size.to_s} (#{header_bytes.to_s})"
@out.read(header_bytes)
rescue
@log.error "[#{Time.now.iso8601}] Failed to get header."
stop "Failed to get header."
raise MentosError, "Failed to get header."
end
end
# Return the final result for the API. Return Ruby objects for the methods that
# want them, text otherwise.
def return_result(res, method)
unless method == :lexer_name_for || method == :highlight || method == :css
res = Yajl.load(res, :symbolize_keys => true)
end
res = res.rstrip if res.class == String
res
end
# Convert a text header into JSON for easy access.
def header_to_json(header)
@log.info "[#{Time.now.iso8601}] In header: #{header.to_s} "
header = Yajl.load(header)
if header["error"]
# Raise this as a Ruby exception of the MentosError class.
# Stop so we don't leave the pipe in an inconsistent state.
@log.error "[#{Time.now.iso8601}] Failed to convert header to JSON."
stop header["error"]
raise MentosError, header["error"]
else
header
end
end
def get_fixed_bits_from_header(out_header)
size = out_header.bytesize
# Fixed 32 bits to represent the int. We return a string
# represenation: e.g, "00000000000000000000000000011110"
Array.new(32) { |i| size[i] }.reverse!.join
end
end
end
|