1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
# -*- coding: utf-8 -*- #
# frozen_string_literal: true
module Rouge
module Lexers
# The {ConsoleLexer} class is intended to lex content that represents the
# text that would display in a console/terminal. As distinct from the
# {Shell} lexer, {ConsoleLexer} will try to parse out the prompt from each
# line before passing the remainder of the line to the language lexer for
# the shell (by default, the {Shell} lexer).
#
# The {ConsoleLexer} class accepts five options:
# 1. **lang**: the shell language to lex (default: `shell`);
# 2. **output**: the output language (default: `plaintext?token=Generic.Output`);
# 3. **prompt**: comma-separated list of strings that indicate the end of a
# prompt (default: `$,#,>,;`);
# 4. **comments**: whether to enable comments.
# 5. **error**: comma-separated list of strings that indicate the start of an
# error message
#
# The comments option, if enabled, will lex lines that begin with a `#` as a
# comment. Please note that this option will only work if the prompt is
# either not manually specified or, if manually specified, does not include
# the `#` character.
#
# Most Markdown lexers that recognise GitHub-Flavored Markdown syntax, will
# pass the language string to Rouge as written in the original document.
# This allows an end user to pass options to {ConsoleLexer} by passing them
# as CGI-style parameters as in the example below.
#
# @example
# <pre>Here's some regular text.
#
# ```console?comments=true
# # This is a comment
# $ cp foo bar
# ```
#
# Some more regular text.</pre>
class ConsoleLexer < Lexer
tag 'console'
aliases 'terminal', 'shell_session', 'shell-session'
filenames '*.cap'
desc 'A generic lexer for shell sessions. Accepts ?lang and ?output lexer options, a ?prompt option, ?comments to enable # comments, and ?error to handle error messages.'
option :lang, 'the shell language to lex (default: shell)'
option :output, 'the output language (default: plaintext?token=Generic.Output)'
option :prompt, 'comma-separated list of strings that indicate the end of a prompt. (default: $,#,>,;)'
option :comments, 'enable hash-comments at the start of a line - otherwise interpreted as a prompt. (default: false, implied by ?prompt not containing `#`)'
option :error, 'comma-separated list of strings that indicate the start of an error message'
def initialize(*)
super
@prompt = list_option(:prompt) { nil }
@lang = lexer_option(:lang) { 'shell' }
@output = lexer_option(:output) { PlainText.new(token: Generic::Output) }
@comments = bool_option(:comments) { :guess }
@error = list_option(:error) { nil }
end
# whether to allow comments. if manually specifying a prompt that isn't
# simply "#", we flag this to on
def allow_comments?
case @comments
when :guess
@prompt && !@prompt.empty? && !end_chars.include?('#')
else
@comments
end
end
def comment_regex
/\A\s*?#/
end
def end_chars
@end_chars ||= if @prompt.any?
@prompt.reject { |c| c.empty? }
elsif allow_comments?
%w($ > ;)
else
%w($ # > ;)
end
end
def error_regex
@error_regex ||= if @error.any?
/^(?:#{@error.map(&Regexp.method(:escape)).join('|')})/
end
end
def lang_lexer
@lang_lexer ||= case @lang
when Lexer
@lang
when nil
Shell.new(options)
when Class
@lang.new(options)
when String
Lexer.find(@lang).new(options)
end
end
def line_regex
/(.*?)(\n|$)/
end
def output_lexer
@output_lexer ||= case @output
when nil
PlainText.new(token: Generic::Output)
when Lexer
@output
when Class
@output.new(options)
when String
Lexer.find(@output).new(options)
end
end
def process_line(input, &output)
input.scan(line_regex)
# As a nicety, support the use of elisions in input text. A user can
# write a line with only `<...>` or one or more `.` characters and
# Rouge will treat it as a comment.
if input[0] =~ /\A\s*(?:<[.]+>|[.]+)\s*\z/
puts "console: matched snip #{input[0].inspect}" if @debug
output_lexer.reset!
lang_lexer.reset!
yield Comment, input[0]
elsif prompt_regex =~ input[0]
puts "console: matched prompt #{input[0].inspect}" if @debug
output_lexer.reset!
yield Generic::Prompt, $&
# make sure to take care of initial whitespace
# before we pass to the lang lexer so it can determine where
# the "real" beginning of the line is
$' =~ /\A\s*/
yield Text::Whitespace, $& unless $&.empty?
lang_lexer.continue_lex($', &output)
elsif comment_regex =~ input[0].strip
puts "console: matched comment #{input[0].inspect}" if @debug
output_lexer.reset!
lang_lexer.reset!
yield Comment, input[0]
elsif error_regex =~ input[0]
puts "console: matched error #{input[0].inspect}" if @debug
output_lexer.reset!
lang_lexer.reset!
yield Generic::Error, input[0]
else
puts "console: matched output #{input[0].inspect}" if @debug
lang_lexer.reset!
output_lexer.continue_lex(input[0], &output)
end
end
def prompt_prefix_regex
if allow_comments?
/[^<#]*?/m
else
/.*?/m
end
end
def prompt_regex
@prompt_regex ||= begin
/^#{prompt_prefix_regex}(?:#{end_chars.map(&Regexp.method(:escape)).join('|')})/
end
end
def stream_tokens(input, &output)
input = StringScanner.new(input)
lang_lexer.reset!
output_lexer.reset!
process_line(input, &output) while !input.eos?
end
end
end
end
|