1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package lsppos provides utilities for working with LSP positions. Much of
// this functionality is duplicated from the internal/span package, but this
// package is simpler and more accurate with respect to newline terminated
// content.
//
// See https://microsoft.github.io/language-server-protocol/specification#textDocuments
// for a description of LSP positions. Notably:
// - Positions are specified by a 0-based line count and 0-based utf-16
// character offset.
// - Positions are line-ending agnostic: there is no way to specify \r|\n or
// \n|. Instead the former maps to the end of the current line, and the
// latter to the start of the next line.
package lsppos
import (
"bytes"
"errors"
"sort"
"unicode/utf8"
"golang.org/x/tools/gopls/internal/lsp/protocol"
)
// Mapper maps utf-8 byte offsets to LSP positions for a single file.
type Mapper struct {
nonASCII bool
content []byte
// Start-of-line positions. If src is newline-terminated, the final entry
// will be len(content).
lines []int
}
// NewMapper creates a new Mapper for the given content.
func NewMapper(content []byte) *Mapper {
nlines := bytes.Count(content, []byte("\n"))
m := &Mapper{
content: content,
lines: make([]int, 1, nlines+1), // initially []int{0}
}
for offset, b := range content {
if b == '\n' {
m.lines = append(m.lines, offset+1)
}
if b >= utf8.RuneSelf {
m.nonASCII = true
}
}
return m
}
// LineColUTF16 returns the 0-based UTF-16 line and character index for the
// given offset. It returns -1, -1 if offset is out of bounds for the file
// being mapped.
func (m *Mapper) LineColUTF16(offset int) (line, char int) {
if offset < 0 || offset > len(m.content) {
return -1, -1
}
nextLine := sort.Search(len(m.lines), func(i int) bool {
return offset < m.lines[i]
})
if nextLine == 0 {
return -1, -1
}
line = nextLine - 1
start := m.lines[line]
var charOffset int
if m.nonASCII {
charOffset = UTF16len(m.content[start:offset])
} else {
charOffset = offset - start
}
var eol int
if line == len(m.lines)-1 {
eol = len(m.content)
} else {
eol = m.lines[line+1] - 1
}
// Adjustment for line-endings: \r|\n is the same as |\r\n.
if offset == eol && offset > 0 && m.content[offset-1] == '\r' {
charOffset--
}
return line, charOffset
}
// Position returns the protocol position corresponding to the given offset. It
// returns false if offset is out of bounds for the file being mapped.
func (m *Mapper) Position(offset int) (protocol.Position, bool) {
l, c := m.LineColUTF16(offset)
if l < 0 {
return protocol.Position{}, false
}
return protocol.Position{
Line: uint32(l),
Character: uint32(c),
}, true
}
// Range returns the protocol range corresponding to the given start and end
// offsets.
func (m *Mapper) Range(start, end int) (protocol.Range, error) {
startPos, ok := m.Position(start)
if !ok {
return protocol.Range{}, errors.New("invalid start position")
}
endPos, ok := m.Position(end)
if !ok {
return protocol.Range{}, errors.New("invalid end position")
}
return protocol.Range{Start: startPos, End: endPos}, nil
}
// UTF16len returns the UTF-16 length of the UTF-8 encoded content, were it to
// be re-encoded as UTF-16.
func UTF16len(buf []byte) int {
// This function copies buf, but microbenchmarks showed it to be faster than
// using utf8.DecodeRune due to inlining and avoiding bounds checks.
cnt := 0
for _, r := range string(buf) {
cnt++
if r >= 1<<16 {
cnt++
}
}
return cnt
}
|