1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
|
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package source
import (
"context"
"fmt"
"go/types"
"path"
"path/filepath"
"regexp"
"runtime"
"sort"
"strings"
"unicode"
"golang.org/x/tools/gopls/internal/lsp/protocol"
"golang.org/x/tools/gopls/internal/span"
"golang.org/x/tools/internal/event"
"golang.org/x/tools/internal/fuzzy"
)
// Symbol holds a precomputed symbol value. Note: we avoid using the
// protocol.SymbolInformation struct here in order to reduce the size of each
// symbol.
type Symbol struct {
Name string
Kind protocol.SymbolKind
Range protocol.Range
}
// maxSymbols defines the maximum number of symbol results that should ever be
// sent in response to a client.
const maxSymbols = 100
// WorkspaceSymbols matches symbols across all views using the given query,
// according to the match semantics parameterized by matcherType and style.
//
// The workspace symbol method is defined in the spec as follows:
//
// The workspace symbol request is sent from the client to the server to
// list project-wide symbols matching the query string.
//
// It is unclear what "project-wide" means here, but given the parameters of
// workspace/symbol do not include any workspace identifier, then it has to be
// assumed that "project-wide" means "across all workspaces". Hence why
// WorkspaceSymbols receives the views []View.
//
// However, it then becomes unclear what it would mean to call WorkspaceSymbols
// with a different configured SymbolMatcher per View. Therefore we assume that
// Session level configuration will define the SymbolMatcher to be used for the
// WorkspaceSymbols method.
func WorkspaceSymbols(ctx context.Context, matcher SymbolMatcher, style SymbolStyle, views []View, query string) ([]protocol.SymbolInformation, error) {
ctx, done := event.Start(ctx, "source.WorkspaceSymbols")
defer done()
if query == "" {
return nil, nil
}
var s symbolizer
switch style {
case DynamicSymbols:
s = dynamicSymbolMatch
case FullyQualifiedSymbols:
s = fullyQualifiedSymbolMatch
case PackageQualifiedSymbols:
s = packageSymbolMatch
default:
panic(fmt.Errorf("unknown symbol style: %v", style))
}
return collectSymbols(ctx, views, matcher, s, query)
}
// A matcherFunc returns the index and score of a symbol match.
//
// See the comment for symbolCollector for more information.
type matcherFunc func(chunks []string) (int, float64)
// A symbolizer returns the best symbol match for a name with pkg, according to
// some heuristic. The symbol name is passed as the slice nameParts of logical
// name pieces. For example, for myType.field the caller can pass either
// []string{"myType.field"} or []string{"myType.", "field"}.
//
// See the comment for symbolCollector for more information.
//
// The space argument is an empty slice with spare capacity that may be used
// to allocate the result.
type symbolizer func(space []string, name string, pkg *Metadata, m matcherFunc) ([]string, float64)
func fullyQualifiedSymbolMatch(space []string, name string, pkg *Metadata, matcher matcherFunc) ([]string, float64) {
if _, score := dynamicSymbolMatch(space, name, pkg, matcher); score > 0 {
return append(space, string(pkg.PkgPath), ".", name), score
}
return nil, 0
}
func dynamicSymbolMatch(space []string, name string, pkg *Metadata, matcher matcherFunc) ([]string, float64) {
if IsCommandLineArguments(pkg.ID) {
// command-line-arguments packages have a non-sensical package path, so
// just use their package name.
return packageSymbolMatch(space, name, pkg, matcher)
}
var score float64
endsInPkgName := strings.HasSuffix(string(pkg.PkgPath), string(pkg.Name))
// If the package path does not end in the package name, we need to check the
// package-qualified symbol as an extra pass first.
if !endsInPkgName {
pkgQualified := append(space, string(pkg.Name), ".", name)
idx, score := matcher(pkgQualified)
nameStart := len(pkg.Name) + 1
if score > 0 {
// If our match is contained entirely within the unqualified portion,
// just return that.
if idx >= nameStart {
return append(space, name), score
}
// Lower the score for matches that include the package name.
return pkgQualified, score * 0.8
}
}
// Now try matching the fully qualified symbol.
fullyQualified := append(space, string(pkg.PkgPath), ".", name)
idx, score := matcher(fullyQualified)
// As above, check if we matched just the unqualified symbol name.
nameStart := len(pkg.PkgPath) + 1
if idx >= nameStart {
return append(space, name), score
}
// If our package path ends in the package name, we'll have skipped the
// initial pass above, so check if we matched just the package-qualified
// name.
if endsInPkgName && idx >= 0 {
pkgStart := len(pkg.PkgPath) - len(pkg.Name)
if idx >= pkgStart {
return append(space, string(pkg.Name), ".", name), score
}
}
// Our match was not contained within the unqualified or package qualified
// symbol. Return the fully qualified symbol but discount the score.
return fullyQualified, score * 0.6
}
func packageSymbolMatch(space []string, name string, pkg *Metadata, matcher matcherFunc) ([]string, float64) {
qualified := append(space, string(pkg.Name), ".", name)
if _, s := matcher(qualified); s > 0 {
return qualified, s
}
return nil, 0
}
func buildMatcher(matcher SymbolMatcher, query string) matcherFunc {
switch matcher {
case SymbolFuzzy:
return parseQuery(query, newFuzzyMatcher)
case SymbolFastFuzzy:
return parseQuery(query, func(query string) matcherFunc {
return fuzzy.NewSymbolMatcher(query).Match
})
case SymbolCaseSensitive:
return matchExact(query)
case SymbolCaseInsensitive:
q := strings.ToLower(query)
exact := matchExact(q)
wrapper := []string{""}
return func(chunks []string) (int, float64) {
s := strings.Join(chunks, "")
wrapper[0] = strings.ToLower(s)
return exact(wrapper)
}
}
panic(fmt.Errorf("unknown symbol matcher: %v", matcher))
}
func newFuzzyMatcher(query string) matcherFunc {
fm := fuzzy.NewMatcher(query)
return func(chunks []string) (int, float64) {
score := float64(fm.ScoreChunks(chunks))
ranges := fm.MatchedRanges()
if len(ranges) > 0 {
return ranges[0], score
}
return -1, score
}
}
// parseQuery parses a field-separated symbol query, extracting the special
// characters listed below, and returns a matcherFunc corresponding to the AND
// of all field queries.
//
// Special characters:
//
// ^ match exact prefix
// $ match exact suffix
// ' match exact
//
// In all three of these special queries, matches are 'smart-cased', meaning
// they are case sensitive if the symbol query contains any upper-case
// characters, and case insensitive otherwise.
func parseQuery(q string, newMatcher func(string) matcherFunc) matcherFunc {
fields := strings.Fields(q)
if len(fields) == 0 {
return func([]string) (int, float64) { return -1, 0 }
}
var funcs []matcherFunc
for _, field := range fields {
var f matcherFunc
switch {
case strings.HasPrefix(field, "^"):
prefix := field[1:]
f = smartCase(prefix, func(chunks []string) (int, float64) {
s := strings.Join(chunks, "")
if strings.HasPrefix(s, prefix) {
return 0, 1
}
return -1, 0
})
case strings.HasPrefix(field, "'"):
exact := field[1:]
f = smartCase(exact, matchExact(exact))
case strings.HasSuffix(field, "$"):
suffix := field[0 : len(field)-1]
f = smartCase(suffix, func(chunks []string) (int, float64) {
s := strings.Join(chunks, "")
if strings.HasSuffix(s, suffix) {
return len(s) - len(suffix), 1
}
return -1, 0
})
default:
f = newMatcher(field)
}
funcs = append(funcs, f)
}
if len(funcs) == 1 {
return funcs[0]
}
return comboMatcher(funcs).match
}
func matchExact(exact string) matcherFunc {
return func(chunks []string) (int, float64) {
s := strings.Join(chunks, "")
if idx := strings.LastIndex(s, exact); idx >= 0 {
return idx, 1
}
return -1, 0
}
}
// smartCase returns a matcherFunc that is case-sensitive if q contains any
// upper-case characters, and case-insensitive otherwise.
func smartCase(q string, m matcherFunc) matcherFunc {
insensitive := strings.ToLower(q) == q
wrapper := []string{""}
return func(chunks []string) (int, float64) {
s := strings.Join(chunks, "")
if insensitive {
s = strings.ToLower(s)
}
wrapper[0] = s
return m(wrapper)
}
}
type comboMatcher []matcherFunc
func (c comboMatcher) match(chunks []string) (int, float64) {
score := 1.0
first := 0
for _, f := range c {
idx, s := f(chunks)
if idx < first {
first = idx
}
score *= s
}
return first, score
}
// collectSymbols calls snapshot.Symbols to walk the syntax trees of
// all files in the views' current snapshots, and returns a sorted,
// scored list of symbols that best match the parameters.
//
// How it matches symbols is parameterized by two interfaces:
// - A matcherFunc determines how well a string symbol matches a query. It
// returns a non-negative score indicating the quality of the match. A score
// of zero indicates no match.
// - A symbolizer determines how we extract the symbol for an object. This
// enables the 'symbolStyle' configuration option.
func collectSymbols(ctx context.Context, views []View, matcherType SymbolMatcher, symbolizer symbolizer, query string) ([]protocol.SymbolInformation, error) {
// Extract symbols from all files.
var work []symbolFile
var roots []string
seen := make(map[span.URI]bool)
// TODO(adonovan): opt: parallelize this loop? How often is len > 1?
for _, v := range views {
snapshot, release := v.Snapshot(ctx)
defer release()
// Use the root view URIs for determining (lexically)
// whether a URI is in any open workspace.
roots = append(roots, strings.TrimRight(string(v.Folder()), "/"))
filters := v.Options().DirectoryFilters
filterer := NewFilterer(filters)
folder := filepath.ToSlash(v.Folder().Filename())
for uri, syms := range snapshot.Symbols(ctx) {
norm := filepath.ToSlash(uri.Filename())
nm := strings.TrimPrefix(norm, folder)
if filterer.Disallow(nm) {
continue
}
// Only scan each file once.
if seen[uri] {
continue
}
mds, err := snapshot.MetadataForFile(ctx, uri)
if err != nil {
event.Error(ctx, fmt.Sprintf("missing metadata for %q", uri), err)
continue
}
if len(mds) == 0 {
// TODO: should use the bug reporting API
continue
}
seen[uri] = true
work = append(work, symbolFile{uri, mds[0], syms})
}
}
// Match symbols in parallel.
// Each worker has its own symbolStore,
// which we merge at the end.
nmatchers := runtime.GOMAXPROCS(-1) // matching is CPU bound
results := make(chan *symbolStore)
for i := 0; i < nmatchers; i++ {
go func(i int) {
matcher := buildMatcher(matcherType, query)
store := new(symbolStore)
// Assign files to workers in round-robin fashion.
for j := i; j < len(work); j += nmatchers {
matchFile(store, symbolizer, matcher, roots, work[j])
}
results <- store
}(i)
}
// Gather and merge results as they arrive.
var unified symbolStore
for i := 0; i < nmatchers; i++ {
store := <-results
for _, syms := range store.res {
unified.store(syms)
}
}
return unified.results(), nil
}
type Filterer struct {
// Whether a filter is excluded depends on the operator (first char of the raw filter).
// Slices filters and excluded then should have the same length.
filters []*regexp.Regexp
excluded []bool
}
// NewFilterer computes regular expression form of all raw filters
func NewFilterer(rawFilters []string) *Filterer {
var f Filterer
for _, filter := range rawFilters {
filter = path.Clean(filepath.ToSlash(filter))
// TODO(dungtuanle): fix: validate [+-] prefix.
op, prefix := filter[0], filter[1:]
// convertFilterToRegexp adds "/" at the end of prefix to handle cases where a filter is a prefix of another filter.
// For example, it prevents [+foobar, -foo] from excluding "foobar".
f.filters = append(f.filters, convertFilterToRegexp(filepath.ToSlash(prefix)))
f.excluded = append(f.excluded, op == '-')
}
return &f
}
// Disallow return true if the path is excluded from the filterer's filters.
func (f *Filterer) Disallow(path string) bool {
// Ensure trailing but not leading slash.
path = strings.TrimPrefix(path, "/")
if !strings.HasSuffix(path, "/") {
path += "/"
}
// TODO(adonovan): opt: iterate in reverse and break at first match.
excluded := false
for i, filter := range f.filters {
if filter.MatchString(path) {
excluded = f.excluded[i] // last match wins
}
}
return excluded
}
// convertFilterToRegexp replaces glob-like operator substrings in a string file path to their equivalent regex forms.
// Supporting glob-like operators:
// - **: match zero or more complete path segments
func convertFilterToRegexp(filter string) *regexp.Regexp {
if filter == "" {
return regexp.MustCompile(".*")
}
var ret strings.Builder
ret.WriteString("^")
segs := strings.Split(filter, "/")
for _, seg := range segs {
// Inv: seg != "" since path is clean.
if seg == "**" {
ret.WriteString(".*")
} else {
ret.WriteString(regexp.QuoteMeta(seg))
}
ret.WriteString("/")
}
pattern := ret.String()
// Remove unnecessary "^.*" prefix, which increased
// BenchmarkWorkspaceSymbols time by ~20% (even though
// filter CPU time increased by only by ~2.5%) when the
// default filter was changed to "**/node_modules".
pattern = strings.TrimPrefix(pattern, "^.*")
return regexp.MustCompile(pattern)
}
// symbolFile holds symbol information for a single file.
type symbolFile struct {
uri span.URI
md *Metadata
syms []Symbol
}
// matchFile scans a symbol file and adds matching symbols to the store.
func matchFile(store *symbolStore, symbolizer symbolizer, matcher matcherFunc, roots []string, i symbolFile) {
space := make([]string, 0, 3)
for _, sym := range i.syms {
symbolParts, score := symbolizer(space, sym.Name, i.md, matcher)
// Check if the score is too low before applying any downranking.
if store.tooLow(score) {
continue
}
// Factors to apply to the match score for the purpose of downranking
// results.
//
// These numbers were crudely calibrated based on trial-and-error using a
// small number of sample queries. Adjust as necessary.
//
// All factors are multiplicative, meaning if more than one applies they are
// multiplied together.
const (
// nonWorkspaceFactor is applied to symbols outside of any active
// workspace. Developers are less likely to want to jump to code that they
// are not actively working on.
nonWorkspaceFactor = 0.5
// nonWorkspaceUnexportedFactor is applied to unexported symbols outside of
// any active workspace. Since one wouldn't usually jump to unexported
// symbols to understand a package API, they are particularly irrelevant.
nonWorkspaceUnexportedFactor = 0.5
// every field or method nesting level to access the field decreases
// the score by a factor of 1.0 - depth*depthFactor, up to a depth of
// 3.
depthFactor = 0.2
)
startWord := true
exported := true
depth := 0.0
for _, r := range sym.Name {
if startWord && !unicode.IsUpper(r) {
exported = false
}
if r == '.' {
startWord = true
depth++
} else {
startWord = false
}
}
inWorkspace := false
for _, root := range roots {
if strings.HasPrefix(string(i.uri), root) {
inWorkspace = true
break
}
}
// Apply downranking based on workspace position.
if !inWorkspace {
score *= nonWorkspaceFactor
if !exported {
score *= nonWorkspaceUnexportedFactor
}
}
// Apply downranking based on symbol depth.
if depth > 3 {
depth = 3
}
score *= 1.0 - depth*depthFactor
if store.tooLow(score) {
continue
}
si := symbolInformation{
score: score,
symbol: strings.Join(symbolParts, ""),
kind: sym.Kind,
uri: i.uri,
rng: sym.Range,
container: string(i.md.PkgPath),
}
store.store(si)
}
}
type symbolStore struct {
res [maxSymbols]symbolInformation
}
// store inserts si into the sorted results, if si has a high enough score.
func (sc *symbolStore) store(si symbolInformation) {
if sc.tooLow(si.score) {
return
}
insertAt := sort.Search(len(sc.res), func(i int) bool {
// Sort by score, then symbol length, and finally lexically.
if sc.res[i].score != si.score {
return sc.res[i].score < si.score
}
if len(sc.res[i].symbol) != len(si.symbol) {
return len(sc.res[i].symbol) > len(si.symbol)
}
return sc.res[i].symbol > si.symbol
})
if insertAt < len(sc.res)-1 {
copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1])
}
sc.res[insertAt] = si
}
func (sc *symbolStore) tooLow(score float64) bool {
return score <= sc.res[len(sc.res)-1].score
}
func (sc *symbolStore) results() []protocol.SymbolInformation {
var res []protocol.SymbolInformation
for _, si := range sc.res {
if si.score <= 0 {
return res
}
res = append(res, si.asProtocolSymbolInformation())
}
return res
}
func typeToKind(typ types.Type) protocol.SymbolKind {
switch typ := typ.Underlying().(type) {
case *types.Interface:
return protocol.Interface
case *types.Struct:
return protocol.Struct
case *types.Signature:
if typ.Recv() != nil {
return protocol.Method
}
return protocol.Function
case *types.Named:
return typeToKind(typ.Underlying())
case *types.Basic:
i := typ.Info()
switch {
case i&types.IsNumeric != 0:
return protocol.Number
case i&types.IsBoolean != 0:
return protocol.Boolean
case i&types.IsString != 0:
return protocol.String
}
}
return protocol.Variable
}
// symbolInformation is a cut-down version of protocol.SymbolInformation that
// allows struct values of this type to be used as map keys.
type symbolInformation struct {
score float64
symbol string
container string
kind protocol.SymbolKind
uri span.URI
rng protocol.Range
}
// asProtocolSymbolInformation converts s to a protocol.SymbolInformation value.
//
// TODO: work out how to handle tags if/when they are needed.
func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation {
return protocol.SymbolInformation{
Name: s.symbol,
Kind: s.kind,
Location: protocol.Location{
URI: protocol.URIFromSpanURI(s.uri),
Range: s.rng,
},
ContainerName: s.container,
}
}
|