File: mapper_test.go

package info (click to toggle)
golang-golang-x-tools 1%3A0.25.0%2Bds-1
links: PTS, VCS
area: main
in suites: experimental, forky, sid, trixie
size: 22,724 kB
sloc: javascript: 2,027; asm: 1,645; sh: 166; yacc: 155; makefile: 49; ansic: 8
file content (449 lines) | stat: -rw-r--r-- 12,938 bytes
parent folder | download | duplicates (2)
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package protocol_test

import (
	"fmt"
	"strings"
	"testing"

	"golang.org/x/tools/gopls/internal/protocol"
)

// This file tests Mapper's logic for converting between offsets,
// UTF-8 columns, and UTF-16 columns. (The strange form attests to
// earlier abstractions.)

// 𐐀 is U+10400 = [F0 90 90 80] in UTF-8, [D801 DC00] in UTF-16.
var funnyString = []byte("𐐀23\n𐐀45")

var toUTF16Tests = []struct {
	scenario    string
	input       []byte
	line        int    // 1-indexed count
	col         int    // 1-indexed byte position in line
	offset      int    // 0-indexed byte offset into input
	resUTF16col int    // 1-indexed UTF-16 col number
	pre         string // everything before the cursor on the line
	post        string // everything from the cursor onwards
	err         string // expected error string in call to ToUTF16Column
	issue       *bool
}{
	{
		scenario: "cursor missing content",
		input:    nil,
		offset:   -1,
		err:      "point has neither offset nor line/column",
	},
	{
		scenario: "cursor missing position",
		input:    funnyString,
		line:     -1,
		col:      -1,
		offset:   -1,
		err:      "point has neither offset nor line/column",
	},
	{
		scenario:    "zero length input; cursor at first col, first line",
		input:       []byte(""),
		line:        1,
		col:         1,
		offset:      0,
		resUTF16col: 1,
	},
	{
		scenario:    "cursor before funny character; first line",
		input:       funnyString,
		line:        1,
		col:         1,
		offset:      0,
		resUTF16col: 1,
		pre:         "",
		post:        "𐐀23",
	},
	{
		scenario:    "cursor after funny character; first line",
		input:       funnyString,
		line:        1,
		col:         5, // 4 + 1 (1-indexed)
		offset:      4, // (unused since we have line+col)
		resUTF16col: 3, // 2 + 1 (1-indexed)
		pre:         "𐐀",
		post:        "23",
	},
	{
		scenario:    "cursor after last character on first line",
		input:       funnyString,
		line:        1,
		col:         7, // 4 + 1 + 1 + 1 (1-indexed)
		offset:      6, // 4 + 1 + 1 (unused since we have line+col)
		resUTF16col: 5, // 2 + 1 + 1 + 1 (1-indexed)
		pre:         "𐐀23",
		post:        "",
	},
	{
		scenario:    "cursor before funny character; second line",
		input:       funnyString,
		line:        2,
		col:         1,
		offset:      7, // length of first line (unused since we have line+col)
		resUTF16col: 1,
		pre:         "",
		post:        "𐐀45",
	},
	{
		scenario:    "cursor after funny character; second line",
		input:       funnyString,
		line:        1,
		col:         5,  // 4 + 1 (1-indexed)
		offset:      11, // 7 (length of first line) + 4 (unused since we have line+col)
		resUTF16col: 3,  // 2 + 1 (1-indexed)
		pre:         "𐐀",
		post:        "45",
	},
	{
		scenario:    "cursor after last character on second line",
		input:       funnyString,
		line:        2,
		col:         7,  // 4 + 1 + 1 + 1 (1-indexed)
		offset:      13, // 7 (length of first line) + 4 + 1 + 1 (unused since we have line+col)
		resUTF16col: 5,  // 2 + 1 + 1 + 1 (1-indexed)
		pre:         "𐐀45",
		post:        "",
	},
	{
		scenario: "cursor beyond end of file",
		input:    funnyString,
		line:     2,
		col:      8,  // 4 + 1 + 1 + 1 + 1 (1-indexed)
		offset:   14, // 4 + 1 + 1 + 1 (unused since we have line+col)
		err:      "column is beyond end of file",
	},
}

var fromUTF16Tests = []struct {
	scenario  string
	input     []byte
	line      int    // 1-indexed line number (isn't actually used)
	utf16col  int    // 1-indexed UTF-16 col number
	resCol    int    // 1-indexed byte position in line
	resOffset int    // 0-indexed byte offset into input
	pre       string // everything before the cursor on the line
	post      string // everything from the cursor onwards
	err       string // expected error string in call to ToUTF16Column
}{
	{
		scenario:  "zero length input; cursor at first col, first line",
		input:     []byte(""),
		line:      1,
		utf16col:  1,
		resCol:    1,
		resOffset: 0,
		pre:       "",
		post:      "",
	},
	{
		scenario:  "cursor before funny character",
		input:     funnyString,
		line:      1,
		utf16col:  1,
		resCol:    1,
		resOffset: 0,
		pre:       "",
		post:      "𐐀23",
	},
	{
		scenario:  "cursor after funny character",
		input:     funnyString,
		line:      1,
		utf16col:  3,
		resCol:    5,
		resOffset: 4,
		pre:       "𐐀",
		post:      "23",
	},
	{
		scenario:  "cursor after last character on line",
		input:     funnyString,
		line:      1,
		utf16col:  5,
		resCol:    7,
		resOffset: 6,
		pre:       "𐐀23",
		post:      "",
	},
	{
		scenario:  "cursor beyond last character on line",
		input:     funnyString,
		line:      1,
		utf16col:  6,
		resCol:    7,
		resOffset: 6,
		pre:       "𐐀23",
		post:      "",
		err:       "column is beyond end of line",
	},
	{
		scenario:  "cursor before funny character; second line",
		input:     funnyString,
		line:      2,
		utf16col:  1,
		resCol:    1,
		resOffset: 7,
		pre:       "",
		post:      "𐐀45",
	},
	{
		scenario:  "cursor after funny character; second line",
		input:     funnyString,
		line:      2,
		utf16col:  3,  // 2 + 1 (1-indexed)
		resCol:    5,  // 4 + 1 (1-indexed)
		resOffset: 11, // 7 (length of first line) + 4
		pre:       "𐐀",
		post:      "45",
	},
	{
		scenario:  "cursor after last character on second line",
		input:     funnyString,
		line:      2,
		utf16col:  5,  // 2 + 1 + 1 + 1 (1-indexed)
		resCol:    7,  // 4 + 1 + 1 + 1 (1-indexed)
		resOffset: 13, // 7 (length of first line) + 4 + 1 + 1
		pre:       "𐐀45",
		post:      "",
	},
	{
		scenario:  "cursor beyond end of file",
		input:     funnyString,
		line:      2,
		utf16col:  6,  // 2 + 1 + 1 + 1 + 1(1-indexed)
		resCol:    8,  // 4 + 1 + 1 + 1 + 1 (1-indexed)
		resOffset: 14, // 7 (length of first line) + 4 + 1 + 1 + 1
		err:       "column is beyond end of file",
	},
}

func TestToUTF16(t *testing.T) {
	for _, e := range toUTF16Tests {
		t.Run(e.scenario, func(t *testing.T) {
			if e.issue != nil && !*e.issue {
				t.Skip("expected to fail")
			}
			m := protocol.NewMapper("", e.input)
			var pos protocol.Position
			var err error
			if e.line > 0 {
				pos, err = m.LineCol8Position(e.line, e.col)
			} else if e.offset >= 0 {
				pos, err = m.OffsetPosition(e.offset)
			} else {
				err = fmt.Errorf("point has neither offset nor line/column")
			}
			if err != nil {
				if err.Error() != e.err {
					t.Fatalf("expected error %v; got %v", e.err, err)
				}
				return
			}
			if e.err != "" {
				t.Fatalf("unexpected success; wanted %v", e.err)
			}
			got := int(pos.Character) + 1
			if got != e.resUTF16col {
				t.Fatalf("expected result %v; got %v", e.resUTF16col, got)
			}
			pre, post := getPrePost(e.input, e.offset)
			if pre != e.pre {
				t.Fatalf("expected #%d pre %q; got %q", e.offset, e.pre, pre)
			}
			if post != e.post {
				t.Fatalf("expected #%d, post %q; got %q", e.offset, e.post, post)
			}
		})
	}
}

func TestFromUTF16(t *testing.T) {
	for _, e := range fromUTF16Tests {
		t.Run(e.scenario, func(t *testing.T) {
			m := protocol.NewMapper("", e.input)
			offset, err := m.PositionOffset(protocol.Position{
				Line:      uint32(e.line - 1),
				Character: uint32(e.utf16col - 1),
			})
			if err != nil {
				if err.Error() != e.err {
					t.Fatalf("expected error %v; got %v", e.err, err)
				}
				return
			}
			if e.err != "" {
				t.Fatalf("unexpected success; wanted %v", e.err)
			}
			if offset != e.resOffset {
				t.Fatalf("expected offset %v; got %v", e.resOffset, offset)
			}
			line, col8 := m.OffsetLineCol8(offset)
			if line != e.line {
				t.Fatalf("expected resulting line %v; got %v", e.line, line)
			}
			if col8 != e.resCol {
				t.Fatalf("expected resulting col %v; got %v", e.resCol, col8)
			}
			pre, post := getPrePost(e.input, offset)
			if pre != e.pre {
				t.Fatalf("expected #%d pre %q; got %q", offset, e.pre, pre)
			}
			if post != e.post {
				t.Fatalf("expected #%d post %q; got %q", offset, e.post, post)
			}
		})
	}
}

func getPrePost(content []byte, offset int) (string, string) {
	pre, post := string(content)[:offset], string(content)[offset:]
	if i := strings.LastIndex(pre, "\n"); i >= 0 {
		pre = pre[i+1:]
	}
	if i := strings.IndexRune(post, '\n'); i >= 0 {
		post = post[:i]
	}
	return pre, post
}

// -- these are the historical lsppos tests --

type testCase struct {
	content            string      // input text
	substrOrOffset     interface{} // explicit integer offset, or a substring
	wantLine, wantChar int         // expected LSP position information
}

// offset returns the test case byte offset
func (c testCase) offset() int {
	switch x := c.substrOrOffset.(type) {
	case int:
		return x
	case string:
		i := strings.Index(c.content, x)
		if i < 0 {
			panic(fmt.Sprintf("%q does not contain substring %q", c.content, x))
		}
		return i
	}
	panic("substrOrIndex must be an integer or string")
}

var tests = []testCase{
	{"a𐐀b", "a", 0, 0},
	{"a𐐀b", "𐐀", 0, 1},
	{"a𐐀b", "b", 0, 3},
	{"a𐐀b\n", "\n", 0, 4},
	{"a𐐀b\r\n", "\n", 0, 4}, // \r|\n is not a valid position, so we move back to the end of the first line.
	{"a𐐀b\r\nx", "x", 1, 0},
	{"a𐐀b\r\nx\ny", "y", 2, 0},

	// Testing EOL and EOF positions
	{"", 0, 0, 0}, // 0th position of an empty buffer is (0, 0)
	{"abc", "c", 0, 2},
	{"abc", 3, 0, 3},
	{"abc\n", "\n", 0, 3},
	{"abc\n", 4, 1, 0}, // position after a newline is on the next line
}

func TestLineChar(t *testing.T) {
	for _, test := range tests {
		m := protocol.NewMapper("", []byte(test.content))
		offset := test.offset()
		posn, _ := m.OffsetPosition(offset)
		gotLine, gotChar := int(posn.Line), int(posn.Character)
		if gotLine != test.wantLine || gotChar != test.wantChar {
			t.Errorf("LineChar(%d) = (%d,%d), want (%d,%d)", offset, gotLine, gotChar, test.wantLine, test.wantChar)
		}
	}
}

func TestInvalidOffset(t *testing.T) {
	content := []byte("a𐐀b\r\nx\ny")
	m := protocol.NewMapper("", content)
	for _, offset := range []int{-1, 100} {
		posn, err := m.OffsetPosition(offset)
		if err == nil {
			t.Errorf("OffsetPosition(%d) = %s, want error", offset, posn)
		}
	}
}

func TestPosition(t *testing.T) {
	for _, test := range tests {
		m := protocol.NewMapper("", []byte(test.content))
		offset := test.offset()
		got, err := m.OffsetPosition(offset)
		if err != nil {
			t.Errorf("OffsetPosition(%d) failed: %v", offset, err)
			continue
		}
		want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}
		if got != want {
			t.Errorf("Position(%d) = %v, want %v", offset, got, want)
		}
	}
}

func TestRange(t *testing.T) {
	for _, test := range tests {
		m := protocol.NewMapper("", []byte(test.content))
		offset := test.offset()
		got, err := m.OffsetRange(0, offset)
		if err != nil {
			t.Fatal(err)
		}
		want := protocol.Range{
			End: protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)},
		}
		if got != want {
			t.Errorf("Range(%d) = %v, want %v", offset, got, want)
		}
	}
}

func TestBytesOffset(t *testing.T) {
	tests := []struct {
		text string
		pos  protocol.Position
		want int
	}{
		// U+10400 encodes as [F0 90 90 80] in UTF-8 and [D801 DC00] in UTF-16.
		{text: `a𐐀b`, pos: protocol.Position{Line: 0, Character: 0}, want: 0},
		{text: `a𐐀b`, pos: protocol.Position{Line: 0, Character: 1}, want: 1},
		{text: `a𐐀b`, pos: protocol.Position{Line: 0, Character: 2}, want: 1},
		{text: `a𐐀b`, pos: protocol.Position{Line: 0, Character: 3}, want: 5},
		{text: `a𐐀b`, pos: protocol.Position{Line: 0, Character: 4}, want: 6},
		{text: `a𐐀b`, pos: protocol.Position{Line: 0, Character: 5}, want: -1},
		{text: "aaa\nbbb\n", pos: protocol.Position{Line: 0, Character: 3}, want: 3},
		{text: "aaa\nbbb\n", pos: protocol.Position{Line: 0, Character: 4}, want: -1},
		{text: "aaa\nbbb\n", pos: protocol.Position{Line: 1, Character: 0}, want: 4},
		{text: "aaa\nbbb\n", pos: protocol.Position{Line: 1, Character: 3}, want: 7},
		{text: "aaa\nbbb\n", pos: protocol.Position{Line: 1, Character: 4}, want: -1},
		{text: "aaa\nbbb\n", pos: protocol.Position{Line: 2, Character: 0}, want: 8},
		{text: "aaa\nbbb\n", pos: protocol.Position{Line: 2, Character: 1}, want: -1},
		{text: "aaa\nbbb\n\n", pos: protocol.Position{Line: 2, Character: 0}, want: 8},
	}

	for i, test := range tests {
		fname := fmt.Sprintf("test %d", i)
		uri := protocol.URIFromPath(fname)
		mapper := protocol.NewMapper(uri, []byte(test.text))
		got, err := mapper.PositionOffset(test.pos)
		if err != nil && test.want != -1 {
			t.Errorf("%d: unexpected error: %v", i, err)
		}
		if err == nil && got != test.want {
			t.Errorf("want %d for %q(Line:%d,Character:%d), but got %d", test.want, test.text, int(test.pos.Line), int(test.pos.Character), got)
		}
	}
}