1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
package chroma
import (
"fmt"
"strings"
)
var (
defaultOptions = &TokeniseOptions{
State: "root",
EnsureLF: true,
}
)
// Config for a lexer.
type Config struct {
// Name of the lexer.
Name string `xml:"name,omitempty"`
// Shortcuts for the lexer
Aliases []string `xml:"alias,omitempty"`
// File name globs
Filenames []string `xml:"filename,omitempty"`
// Secondary file name globs
AliasFilenames []string `xml:"alias_filename,omitempty"`
// MIME types
MimeTypes []string `xml:"mime_type,omitempty"`
// Regex matching is case-insensitive.
CaseInsensitive bool `xml:"case_insensitive,omitempty"`
// Regex matches all characters.
DotAll bool `xml:"dot_all,omitempty"`
// Regex does not match across lines ($ matches EOL).
//
// Defaults to multiline.
NotMultiline bool `xml:"not_multiline,omitempty"`
// Don't strip leading and trailing newlines from the input.
// DontStripNL bool
// Strip all leading and trailing whitespace from the input
// StripAll bool
// Make sure that the input ends with a newline. This
// is required for some lexers that consume input linewise.
EnsureNL bool `xml:"ensure_nl,omitempty"`
// If given and greater than 0, expand tabs in the input.
// TabSize int
// Priority of lexer.
//
// If this is 0 it will be treated as a default of 1.
Priority float32 `xml:"priority,omitempty"`
// Analyse is a list of regexes to match against the input.
//
// If a match is found, the score is returned if single attribute is set to true,
// otherwise the sum of all the score of matching patterns will be
// used as the final score.
Analyse *AnalyseConfig `xml:"analyse,omitempty"`
}
// AnalyseConfig defines the list of regexes analysers.
type AnalyseConfig struct {
Regexes []RegexConfig `xml:"regex,omitempty"`
// If true, the first matching score is returned.
First bool `xml:"first,attr"`
}
// RegexConfig defines a single regex pattern and its score in case of match.
type RegexConfig struct {
Pattern string `xml:"pattern,attr"`
Score float32 `xml:"score,attr"`
}
// Token output to formatter.
type Token struct {
Type TokenType `json:"type"`
Value string `json:"value"`
}
func (t *Token) String() string { return t.Value }
func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
// Clone returns a clone of the Token.
func (t *Token) Clone() Token {
return *t
}
// EOF is returned by lexers at the end of input.
var EOF Token
// TokeniseOptions contains options for tokenisers.
type TokeniseOptions struct {
// State to start tokenisation in. Defaults to "root".
State string
// Nested tokenisation.
Nested bool
// If true, all EOLs are converted into LF
// by replacing CRLF and CR
EnsureLF bool
}
// A Lexer for tokenising source code.
type Lexer interface {
// Config describing the features of the Lexer.
Config() *Config
// Tokenise returns an Iterator over tokens in text.
Tokenise(options *TokeniseOptions, text string) (Iterator, error)
// SetRegistry sets the registry this Lexer is associated with.
//
// The registry should be used by the Lexer if it needs to look up other
// lexers.
SetRegistry(registry *LexerRegistry) Lexer
// SetAnalyser sets a function the Lexer should use for scoring how
// likely a fragment of text is to match this lexer, between 0.0 and 1.0.
// A value of 1 indicates high confidence.
//
// Lexers may ignore this if they implement their own analysers.
SetAnalyser(analyser func(text string) float32) Lexer
// AnalyseText scores how likely a fragment of text is to match
// this lexer, between 0.0 and 1.0. A value of 1 indicates high confidence.
AnalyseText(text string) float32
}
// Lexers is a slice of lexers sortable by name.
type Lexers []Lexer
func (l Lexers) Len() int { return len(l) }
func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l Lexers) Less(i, j int) bool {
return strings.ToLower(l[i].Config().Name) < strings.ToLower(l[j].Config().Name)
}
// PrioritisedLexers is a slice of lexers sortable by priority.
type PrioritisedLexers []Lexer
func (l PrioritisedLexers) Len() int { return len(l) }
func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l PrioritisedLexers) Less(i, j int) bool {
ip := l[i].Config().Priority
if ip == 0 {
ip = 1
}
jp := l[j].Config().Priority
if jp == 0 {
jp = 1
}
return ip > jp
}
// Analyser determines how appropriate this lexer is for the given text.
type Analyser interface {
AnalyseText(text string) float32
}
|