1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
package chroma
import (
"bytes"
)
type delegatingLexer struct {
root Lexer
language Lexer
}
// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
// inside HTML or PHP inside plain text.
//
// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
// Finally, these two sets of tokens are merged.
//
// The lexers from the template lexer package use this base lexer.
func DelegatingLexer(root Lexer, language Lexer) Lexer {
return &delegatingLexer{
root: root,
language: language,
}
}
func (d *delegatingLexer) AnalyseText(text string) float32 {
return d.root.AnalyseText(text)
}
func (d *delegatingLexer) SetAnalyser(analyser func(text string) float32) Lexer {
d.root.SetAnalyser(analyser)
return d
}
func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
d.root.SetRegistry(r)
d.language.SetRegistry(r)
return d
}
func (d *delegatingLexer) Config() *Config {
return d.language.Config()
}
// An insertion is the character range where language tokens should be inserted.
type insertion struct {
start, end int
tokens []Token
}
func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
tokens, err := Tokenise(Coalesce(d.language), options, text)
if err != nil {
return nil, err
}
// Compute insertions and gather "Other" tokens.
others := &bytes.Buffer{}
insertions := []*insertion{}
var insert *insertion
offset := 0
var last Token
for _, t := range tokens {
if t.Type == Other {
if last != EOF && insert != nil && last.Type != Other {
insert.end = offset
}
others.WriteString(t.Value)
} else {
if last == EOF || last.Type == Other {
insert = &insertion{start: offset}
insertions = append(insertions, insert)
}
insert.tokens = append(insert.tokens, t)
}
last = t
offset += len(t.Value)
}
if len(insertions) == 0 {
return d.root.Tokenise(options, text)
}
// Lex the other tokens.
rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
if err != nil {
return nil, err
}
// Interleave the two sets of tokens.
var out []Token
offset = 0 // Offset into text.
tokenIndex := 0
nextToken := func() Token {
if tokenIndex >= len(rootTokens) {
return EOF
}
t := rootTokens[tokenIndex]
tokenIndex++
return t
}
insertionIndex := 0
nextInsertion := func() *insertion {
if insertionIndex >= len(insertions) {
return nil
}
i := insertions[insertionIndex]
insertionIndex++
return i
}
t := nextToken()
i := nextInsertion()
for t != EOF || i != nil {
// fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
var l Token
l, t = splitToken(t, i.start-offset)
if l != EOF {
out = append(out, l)
offset += len(l.Value)
}
out = append(out, i.tokens...)
offset += i.end - i.start
if t == EOF {
t = nextToken()
}
i = nextInsertion()
} else {
out = append(out, t)
offset += len(t.Value)
t = nextToken()
}
}
return Literator(out...), nil
}
func splitToken(t Token, offset int) (l Token, r Token) {
if t == EOF {
return EOF, EOF
}
if offset == 0 {
return EOF, t
}
if offset == len(t.Value) {
return t, EOF
}
l = t.Clone()
r = t.Clone()
l.Value = l.Value[:offset]
r.Value = r.Value[offset:]
return
}
|