fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "sort" 22 "strings" 23 "unicode/utf8" 24) 25 26var _ = log.Println 27 28// contentProvider is an abstraction to treat matches for names and 29// content with the same code. 30type contentProvider struct { 31 id *indexData 32 stats *Stats 33 34 // mutable 35 err error 36 idx uint32 37 _data []byte 38 _nl []uint32 39 _nlBuf []uint32 40 _sects []DocumentSection 41 _sectBuf []DocumentSection 42 fileSize uint32 43} 44 45// setDocument skips to the given document. 46func (p *contentProvider) setDocument(docID uint32) { 47 fileStart := p.id.boundaries[docID] 48 49 p.idx = docID 50 p.fileSize = p.id.boundaries[docID+1] - fileStart 51 52 p._nl = nil 53 p._sects = nil 54 p._data = nil 55} 56 57func (p *contentProvider) docSections() []DocumentSection { 58 if p._sects == nil { 59 var sz uint32 60 p._sects, sz, p.err = p.id.readDocSections(p.idx, p._sectBuf) 61 p.stats.ContentBytesLoaded += int64(sz) 62 p._sectBuf = p._sects 63 } 64 return p._sects 65} 66 67func (p *contentProvider) newlines() newlines { 68 if p._nl == nil { 69 var sz uint32 70 p._nl, sz, p.err = p.id.readNewlines(p.idx, p._nlBuf) 71 p._nlBuf = p._nl 72 p.stats.ContentBytesLoaded += int64(sz) 73 } 74 return newlines{locs: p._nl, fileSize: p.fileSize} 75} 76 77func (p *contentProvider) data(fileName bool) []byte { 78 if fileName { 79 return p.id.fileNameContent[p.id.fileNameIndex[p.idx]:p.id.fileNameIndex[p.idx+1]] 80 } 81 82 if p._data == nil { 83 p._data, p.err = p.id.readContents(p.idx) 84 p.stats.FilesLoaded++ 85 p.stats.ContentBytesLoaded += int64(len(p._data)) 86 } 87 return p._data 88} 89 90// Find offset in bytes (relative to corpus start) for an offset in 91// runes (relative to document start). If filename is set, the corpus 92// is the set of filenames, with the document being the name itself. 93func (p *contentProvider) findOffset(filename bool, r uint32) uint32 { 94 if p.id.metaData.PlainASCII { 95 return r 96 } 97 98 sample := p.id.runeOffsets 99 runeEnds := p.id.fileEndRunes 100 fileStartByte := p.id.boundaries[p.idx] 101 if filename { 102 sample = p.id.fileNameRuneOffsets 103 runeEnds = p.id.fileNameEndRunes 104 fileStartByte = p.id.fileNameIndex[p.idx] 105 } 106 107 absR := r 108 if p.idx > 0 { 109 absR += runeEnds[p.idx-1] 110 } 111 112 byteOff, left := sample.lookup(absR) 113 114 var data []byte 115 116 if filename { 117 data = p.id.fileNameContent[byteOff:] 118 } else { 119 data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency) 120 if p.err != nil { 121 return 0 122 } 123 } 124 for left > 0 { 125 _, sz := utf8.DecodeRune(data) 126 byteOff += uint32(sz) 127 data = data[sz:] 128 left-- 129 } 130 131 byteOff -= fileStartByte 132 return byteOff 133} 134 135func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch { 136 var result []LineMatch 137 if ms[0].fileName { 138 // There is only "line" in a filename. 139 res := LineMatch{ 140 Line: p.id.fileName(p.idx), 141 FileName: true, 142 } 143 144 for _, m := range ms { 145 res.LineFragments = append(res.LineFragments, LineFragmentMatch{ 146 LineOffset: int(m.byteOffset), 147 MatchLength: int(m.byteMatchSz), 148 Offset: m.byteOffset, 149 }) 150 151 result = []LineMatch{res} 152 } 153 } else { 154 ms = breakMatchesOnNewlines(ms, p.data(false)) 155 result = p.fillContentMatches(ms, numContextLines) 156 } 157 158 sects := p.docSections() 159 for i, m := range result { 160 result[i].Score, result[i].DebugScore = p.matchScore(sects, &m, language, debug) 161 } 162 163 return result 164} 165 166func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch { 167 var result []ChunkMatch 168 if ms[0].fileName { 169 // If the first match is a filename match, there will only be 170 // one match and the matched content will be the filename. 171 172 fileName := p.id.fileName(p.idx) 173 ranges := make([]Range, 0, len(ms)) 174 for _, m := range ms { 175 ranges = append(ranges, Range{ 176 Start: Location{ 177 ByteOffset: m.byteOffset, 178 LineNumber: 1, 179 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1), 180 }, 181 End: Location{ 182 ByteOffset: m.byteOffset + m.byteMatchSz, 183 LineNumber: 1, 184 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1), 185 }, 186 }) 187 } 188 189 result = []ChunkMatch{{ 190 Content: fileName, 191 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 192 Ranges: ranges, 193 FileName: true, 194 }} 195 } else { 196 result = p.fillContentChunkMatches(ms, numContextLines) 197 } 198 199 sects := p.docSections() 200 for i, m := range result { 201 result[i].Score, result[i].DebugScore = p.chunkMatchScore(sects, &m, language, debug) 202 } 203 204 return result 205} 206 207func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int) []LineMatch { 208 var result []LineMatch 209 for len(ms) > 0 { 210 m := ms[0] 211 num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset) 212 213 var lineCands []*candidateMatch 214 215 endMatch := m.byteOffset + m.byteMatchSz 216 217 for len(ms) > 0 { 218 m := ms[0] 219 if int(m.byteOffset) <= lineEnd { 220 endMatch = m.byteOffset + m.byteMatchSz 221 lineCands = append(lineCands, m) 222 ms = ms[1:] 223 } else { 224 break 225 } 226 } 227 228 if len(lineCands) == 0 { 229 log.Panicf( 230 "%s %v infinite loop: num %d start,end %d,%d, offset %d", 231 p.id.fileName(p.idx), p.id.metaData, 232 num, lineStart, lineEnd, 233 m.byteOffset) 234 } 235 236 data := p.data(false) 237 238 // Due to merging matches, we may have a match that 239 // crosses a line boundary. Prevent confusion by 240 // taking lines until we pass the last match 241 for lineEnd < len(data) && endMatch > uint32(lineEnd) { 242 next := bytes.IndexByte(data[lineEnd+1:], '\n') 243 if next == -1 { 244 lineEnd = len(data) 245 } else { 246 // TODO(hanwen): test that checks "+1" part here. 247 lineEnd += next + 1 248 } 249 } 250 251 finalMatch := LineMatch{ 252 LineStart: lineStart, 253 LineEnd: lineEnd, 254 LineNumber: num, 255 } 256 finalMatch.Line = data[lineStart:lineEnd] 257 258 if numContextLines > 0 { 259 finalMatch.Before = p.newlines().getLines(data, num-numContextLines, num) 260 finalMatch.After = p.newlines().getLines(data, num+1, num+1+numContextLines) 261 } 262 263 for _, m := range lineCands { 264 fragment := LineFragmentMatch{ 265 Offset: m.byteOffset, 266 LineOffset: int(m.byteOffset) - lineStart, 267 MatchLength: int(m.byteMatchSz), 268 } 269 if m.symbol { 270 start := p.id.fileEndSymbol[p.idx] 271 fragment.SymbolInfo = p.id.symbols.data(start + m.symbolIdx) 272 if fragment.SymbolInfo != nil { 273 sec := p.docSections()[m.symbolIdx] 274 fragment.SymbolInfo.Sym = string(data[sec.Start:sec.End]) 275 } 276 } 277 278 finalMatch.LineFragments = append(finalMatch.LineFragments, fragment) 279 } 280 result = append(result, finalMatch) 281 } 282 return result 283} 284 285func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int) []ChunkMatch { 286 newlines := p.newlines() 287 chunks := chunkCandidates(ms, newlines, numContextLines) 288 data := p.data(false) 289 chunkMatches := make([]ChunkMatch, 0, len(chunks)) 290 for _, chunk := range chunks { 291 ranges := make([]Range, 0, len(chunk.candidates)) 292 var symbolInfo []*Symbol 293 for i, cm := range chunk.candidates { 294 startOffset := cm.byteOffset 295 endOffset := cm.byteOffset + cm.byteMatchSz 296 startLine, startLineOffset, _ := newlines.atOffset(startOffset) 297 endLine, endLineOffset, _ := newlines.atOffset(endOffset) 298 299 ranges = append(ranges, Range{ 300 Start: Location{ 301 ByteOffset: startOffset, 302 LineNumber: uint32(startLine), 303 Column: uint32(utf8.RuneCount(data[startLineOffset:startOffset]) + 1), 304 }, 305 End: Location{ 306 ByteOffset: endOffset, 307 LineNumber: uint32(endLine), 308 Column: uint32(utf8.RuneCount(data[endLineOffset:endOffset]) + 1), 309 }, 310 }) 311 312 if cm.symbol { 313 if symbolInfo == nil { 314 symbolInfo = make([]*Symbol, len(chunk.candidates)) 315 } 316 start := p.id.fileEndSymbol[p.idx] 317 si := p.id.symbols.data(start + cm.symbolIdx) 318 if si != nil { 319 sec := p.docSections()[cm.symbolIdx] 320 si.Sym = string(data[sec.Start:sec.End]) 321 } 322 symbolInfo[i] = si 323 } 324 } 325 326 firstLineNumber := int(chunk.firstLine) - numContextLines 327 if firstLineNumber < 1 { 328 firstLineNumber = 1 329 } 330 firstLineStart, _ := newlines.lineBounds(firstLineNumber) 331 332 chunkMatches = append(chunkMatches, ChunkMatch{ 333 Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1), 334 ContentStart: Location{ 335 ByteOffset: firstLineStart, 336 LineNumber: uint32(firstLineNumber), 337 Column: 1, 338 }, 339 FileName: false, 340 Ranges: ranges, 341 SymbolInfo: symbolInfo, 342 }) 343 } 344 return chunkMatches 345} 346 347type candidateChunk struct { 348 firstLine uint32 // 1-based, inclusive 349 lastLine uint32 // 1-based, inclusive 350 minOffset uint32 // 0-based, inclusive 351 maxOffset uint32 // 0-based, exclusive 352 candidates []*candidateMatch 353} 354 355// chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent 356// chunks will be merged if adding `numContextLines` to the beginning and end of the chunk would cause 357// it to overlap with an adjacent chunk. 358func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines int) []candidateChunk { 359 var chunks []candidateChunk 360 for _, m := range ms { 361 startOffset := m.byteOffset 362 endOffset := m.byteOffset + m.byteMatchSz 363 firstLine, _, _ := newlines.atOffset(startOffset) 364 lastLine, _, _ := newlines.atOffset(endOffset) 365 366 if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines { 367 // If a new chunk created with the current candidateMatch would 368 // overlap with the previous chunk, instead add the candidateMatch 369 // to the last chunk and extend end of the last chunk. 370 last := &chunks[len(chunks)-1] 371 last.candidates = append(last.candidates, m) 372 if last.maxOffset < endOffset { 373 last.lastLine = uint32(lastLine) 374 last.maxOffset = uint32(endOffset) 375 } 376 } else { 377 chunks = append(chunks, candidateChunk{ 378 firstLine: uint32(firstLine), 379 lastLine: uint32(lastLine), 380 minOffset: startOffset, 381 maxOffset: endOffset, 382 candidates: []*candidateMatch{m}, 383 }) 384 } 385 } 386 return chunks 387} 388 389type newlines struct { 390 // locs is the sorted set of byte offsets of the newlines in the file 391 locs []uint32 392 393 // fileSize is just the number of bytes in the file. It is stored 394 // on this struct so we can safely know the length of the last line 395 // in the file since not all files end in a newline. 396 fileSize uint32 397} 398 399// atOffset returns the line containing the offset. If the offset lands on 400// the newline ending line M, we return M. The line is characterized 401// by its linenumber (base-1, byte index of line start, byte index of 402// line end). The line end is the index of a newline, or the filesize 403// (if matching the last line of the file.) 404func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) { 405 idx := sort.Search(len(nls.locs), func(n int) bool { 406 return nls.locs[n] >= offset 407 }) 408 409 start, end := nls.lineBounds(idx + 1) 410 return idx + 1, int(start), int(end) 411} 412 413// lineBounds returns the byte offsets of the start and end of the 1-based 414// lineNumber. The end offset is exclusive and will not contain the line-ending 415// newline. If the line number is out of range of the lines in the file, start 416// and end will be clamped to [0,fileSize]. 417func (nls newlines) lineBounds(lineNumber int) (start, end uint32) { 418 // nls.locs[0] + 1 is the start of the 2nd line of data. 419 startIdx := lineNumber - 2 420 endIdx := lineNumber - 1 421 422 if startIdx < 0 { 423 start = 0 424 } else if startIdx >= len(nls.locs) { 425 start = nls.fileSize 426 } else { 427 start = nls.locs[startIdx] + 1 428 } 429 430 if endIdx < 0 { 431 end = 0 432 } else if endIdx >= len(nls.locs) { 433 end = nls.fileSize 434 } else { 435 end = nls.locs[endIdx] 436 } 437 438 return start, end 439} 440 441// getLines returns a slice of data containing the lines [low, high). 442// low is 1-based and inclusive. high is 1-based and exclusive. 443func (nls newlines) getLines(data []byte, low, high int) []byte { 444 if low >= high { 445 return nil 446 } 447 448 lowStart, _ := nls.lineBounds(low) 449 _, highEnd := nls.lineBounds(high - 1) 450 451 return data[lowStart:highEnd] 452} 453 454const ( 455 // Query-dependent scoring signals. All of these together are bounded at ~9000 456 // (scoreWordMatch + scoreSymbol + scoreKindMatch * 10 + scoreFactorAtomMatch). 457 scorePartialWordMatch = 50.0 458 scoreWordMatch = 500.0 459 scoreBase = 7000.0 460 scorePartialBase = 4000.0 461 scoreSymbol = 7000.0 462 scorePartialSymbol = 4000.0 463 scoreKindMatch = 100.0 464 scoreFactorAtomMatch = 400.0 465 466 // File-only scoring signals. For now these are also bounded ~9000 to give them 467 // equal weight with the query-dependent signals. 468 scoreFileRankFactor = 9000.0 469 scoreFileOrderFactor = 10.0 470 scoreRepoRankFactor = 20.0 471 472 // Used for ordering line and chunk matches within a file. 473 scoreLineOrderFactor = 1.0 474) 475 476// findSection checks whether a section defined by offset and size lies within 477// one of the sections in secs. 478func findSection(secs []DocumentSection, off, sz uint32) (int, bool) { 479 j := sort.Search(len(secs), func(i int) bool { 480 return secs[i].End >= off+sz 481 }) 482 483 if j == len(secs) { 484 return 0, false 485 } 486 487 if secs[j].Start <= off && off+sz <= secs[j].End { 488 return j, true 489 } 490 return 0, false 491} 492 493func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) { 494 type debugScore struct { 495 score float64 496 what string 497 } 498 499 score := &debugScore{} 500 maxScore := &debugScore{} 501 502 addScore := func(what string, s float64) { 503 if s != 0 && debug { 504 score.what += fmt.Sprintf("%s:%.2f, ", what, s) 505 } 506 score.score += s 507 } 508 509 for i, r := range m.Ranges { 510 // calculate the start and end offset relative to the start of the content 511 relStartOffset := int(r.Start.ByteOffset - m.ContentStart.ByteOffset) 512 relEndOffset := int(r.End.ByteOffset - m.ContentStart.ByteOffset) 513 514 startBoundary := relStartOffset < len(m.Content) && (relStartOffset == 0 || byteClass(m.Content[relStartOffset-1]) != byteClass(m.Content[relStartOffset])) 515 endBoundary := relEndOffset > 0 && (relEndOffset == len(m.Content) || byteClass(m.Content[relEndOffset-1]) != byteClass(m.Content[relEndOffset])) 516 517 score.score = 0 518 score.what = "" 519 520 if startBoundary && endBoundary { 521 addScore("WordMatch", scoreWordMatch) 522 } else if startBoundary || endBoundary { 523 addScore("PartialWordMatch", scorePartialWordMatch) 524 } 525 526 if m.FileName { 527 sep := bytes.LastIndexByte(m.Content, '/') 528 startMatch := relStartOffset == sep+1 529 endMatch := relEndOffset == len(m.Content) 530 if startMatch && endMatch { 531 addScore("Base", scoreBase) 532 } else if startMatch || endMatch { 533 addScore("EdgeBase", (scoreBase+scorePartialBase)/2) 534 } else if sep < relStartOffset { 535 addScore("InnerBase", scorePartialBase) 536 } 537 } else if secIdx, ok := findSection(secs, uint32(r.Start.ByteOffset), uint32(r.End.ByteOffset-r.Start.ByteOffset)); ok { 538 sec := secs[secIdx] 539 startMatch := sec.Start == uint32(r.Start.ByteOffset) 540 endMatch := sec.End == uint32(r.End.ByteOffset) 541 if startMatch && endMatch { 542 addScore("Symbol", scoreSymbol) 543 } else if startMatch || endMatch { 544 addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2) 545 } else { 546 addScore("InnerSymbol", scorePartialSymbol) 547 } 548 549 var si *Symbol 550 if m.SymbolInfo != nil { 551 si = m.SymbolInfo[i] 552 } 553 if si == nil { 554 // for non-symbol queries, we need to hydrate in SymbolInfo. 555 start := p.id.fileEndSymbol[p.idx] 556 si = p.id.symbols.data(start + uint32(secIdx)) 557 } 558 if si != nil { 559 addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind)) 560 } 561 } 562 563 if score.score > maxScore.score { 564 maxScore.score = score.score 565 maxScore.what = score.what 566 } 567 } 568 569 if debug { 570 maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", ")) 571 } 572 573 return maxScore.score, maxScore.what 574} 575 576func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) { 577 type debugScore struct { 578 score float64 579 what string 580 } 581 582 score := &debugScore{} 583 maxScore := &debugScore{} 584 585 addScore := func(what string, s float64) { 586 if s != 0 && debug { 587 score.what += fmt.Sprintf("%s:%.2f, ", what, s) 588 } 589 score.score += s 590 } 591 592 for _, f := range m.LineFragments { 593 startBoundary := f.LineOffset < len(m.Line) && (f.LineOffset == 0 || byteClass(m.Line[f.LineOffset-1]) != byteClass(m.Line[f.LineOffset])) 594 595 end := int(f.LineOffset) + f.MatchLength 596 endBoundary := end > 0 && (end == len(m.Line) || byteClass(m.Line[end-1]) != byteClass(m.Line[end])) 597 598 score.score = 0 599 score.what = "" 600 601 if startBoundary && endBoundary { 602 addScore("WordMatch", scoreWordMatch) 603 } else if startBoundary || endBoundary { 604 addScore("PartialWordMatch", scorePartialWordMatch) 605 } 606 607 if m.FileName { 608 sep := bytes.LastIndexByte(m.Line, '/') 609 startMatch := sep+1 == f.LineOffset 610 endMatch := len(m.Line) == f.LineOffset+f.MatchLength 611 if startMatch && endMatch { 612 addScore("Base", scoreBase) 613 } else if startMatch || endMatch { 614 addScore("EdgeBase", (scoreBase+scorePartialBase)/2) 615 } else if sep < f.LineOffset { 616 addScore("InnerBase", scorePartialBase) 617 } 618 } else if secIdx, ok := findSection(secs, f.Offset, uint32(f.MatchLength)); ok { 619 sec := secs[secIdx] 620 startMatch := sec.Start == f.Offset 621 endMatch := sec.End == f.Offset+uint32(f.MatchLength) 622 if startMatch && endMatch { 623 addScore("Symbol", scoreSymbol) 624 } else if startMatch || endMatch { 625 addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2) 626 } else { 627 addScore("InnerSymbol", scorePartialSymbol) 628 } 629 630 si := f.SymbolInfo 631 if si == nil { 632 // for non-symbol queries, we need to hydrate in SymbolInfo. 633 start := p.id.fileEndSymbol[p.idx] 634 si = p.id.symbols.data(start + uint32(secIdx)) 635 } 636 if si != nil { 637 // the LineFragment may not be on a symbol, then si will be nil. 638 addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind)) 639 } 640 } 641 642 if score.score > maxScore.score { 643 maxScore.score = score.score 644 maxScore.what = score.what 645 } 646 } 647 648 if debug { 649 maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", ")) 650 } 651 652 return maxScore.score, maxScore.what 653} 654 655// scoreKind boosts a match based on the combination of language and kind. The 656// language string comes from go-enry, the kind string from ctags. 657func scoreKind(language string, kind string) float64 { 658 var factor float64 659 660 // Generic ranking which will be overriden by language specific ranking 661 switch kind { 662 case "type": // scip-ctags regression workaround https://github.com/sourcegraph/sourcegraph/issues/57659 663 factor = 8 664 case "class": 665 factor = 10 666 case "struct": 667 factor = 9.5 668 case "enum": 669 factor = 9 670 case "interface": 671 factor = 8 672 case "function", "func", "method": 673 factor = 7 674 case "member", "field": 675 factor = 5.5 676 case "constant", "const": 677 factor = 5 678 case "var", "variable": 679 factor = 4 680 681 default: 682 // No idea what it is, but its something regarded as a symbol 683 factor = 1 684 } 685 686 // Refer to universal-ctags --list-kinds-full=<language> to learn about which 687 // kinds are detected for which language. 688 // 689 // Note that go-ctags uses universal-ctags's interactive mode and thus returns 690 // the full name for "kind" and not the one-letter abbreviation. 691 switch language { 692 case "Java", "java": 693 switch kind { 694 // 2022-03-30: go-ctags contains a regex rule for Java classes that sets "kind" 695 // to "classes" instead of "c". We have to cover both cases to support existing 696 // indexes. 697 case "class", "classes": 698 factor = 10 699 case "enum": 700 factor = 9 701 case "interface": 702 factor = 8 703 case "method": 704 factor = 7 705 case "field": 706 factor = 6 707 case "enumConstant": 708 factor = 5 709 } 710 case "Kotlin", "kotlin": 711 switch kind { 712 case "class": 713 factor = 10 714 case "interface": 715 factor = 9 716 case "method": 717 factor = 8 718 case "typealias": 719 factor = 7 720 case "constant": 721 factor = 6 722 case "variable": 723 factor = 5 724 } 725 case "Go", "go": 726 switch kind { 727 // scip-ctags regression workaround https://github.com/sourcegraph/sourcegraph/issues/57659 728 // for each case a description of the fields in ctags in the comment 729 case "type": // interface struct talias 730 factor = 10 731 case "method", "function": // methodSpec 732 factor = 8 733 case "variable": // var member 734 factor = 7 735 case "constant": // const 736 factor = 6 737 738 case "interface": // interfaces 739 factor = 10 740 case "struct": // structs 741 factor = 9 742 case "talias": // type aliases 743 factor = 9 744 case "methodSpec": // interface method specification 745 factor = 8.5 746 case "func": // functions 747 factor = 8 748 case "member": // struct members 749 factor = 7 750 case "const": // constants 751 factor = 6 752 case "var": // variables 753 factor = 5 754 } 755 // Could also rank on: 756 // 757 // - anonMember struct anonymous members 758 // - packageName name for specifying imported package 759 // - receiver receivers 760 // - package packages 761 // - type types 762 // - unknown unknown 763 case "C++", "c++": 764 switch kind { 765 case "class": // classes 766 factor = 10 767 case "enum": // enumeration names 768 factor = 9 769 case "function": // function definitions 770 factor = 8 771 case "struct": // structure names 772 factor = 7 773 case "union": // union names 774 factor = 6 775 case "typdef": // typedefs 776 factor = 5 777 case "member": // class, struct, and union members 778 factor = 4 779 case "variable": // varialbe definitions 780 factor = 3 781 } 782 // Could also rank on: 783 // NAME DESCRIPTION 784 // macro macro definitions 785 // enumerator enumerators (values inside an enumeration) 786 // header included header files 787 // namespace namespaces 788 // variable variable definitions 789 case "Scala", "scala": 790 switch kind { 791 case "class": 792 factor = 10 793 case "interface": 794 factor = 9 795 case "object": 796 factor = 8 797 case "method": 798 factor = 7 799 case "type": 800 factor = 6 801 case "variable": 802 factor = 5 803 case "package": 804 factor = 4 805 } 806 case "Python", "python": 807 switch kind { 808 case "class": // classes 809 factor = 10 810 case "function": // function definitions 811 factor = 8 812 case "member": // class, struct, and union members 813 factor = 4 814 case "variable": // variable definitions 815 factor = 3 816 case "local": // local variables 817 factor = 2 818 } 819 // Could also rank on: 820 // 821 // - namespace name referring a module defined in other file 822 // - module modules 823 // - unknown name referring a class/variable/function/module defined in other module 824 // - parameter function parameters 825 case "Ruby", "ruby": 826 switch kind { 827 case "class": 828 factor = 10 829 case "method": 830 factor = 9 831 case "alias": 832 factor = 8 833 case "module": 834 factor = 7 835 case "singletonMethod": 836 factor = 6 837 case "constant": 838 factor = 5 839 case "accessor": 840 factor = 4 841 case "library": 842 factor = 3 843 } 844 case "PHP", "php": 845 switch kind { 846 case "class": 847 factor = 10 848 case "interface": 849 factor = 9 850 case "function": 851 factor = 8 852 case "trait": 853 factor = 7 854 case "define": 855 factor = 6 856 case "namespace": 857 factor = 5 858 case "alias": 859 factor = 4 860 case "variable": 861 factor = 3 862 case "local": 863 factor = 3 864 } 865 case "GraphQL", "graphql": 866 switch kind { 867 case "type": 868 factor = 10 869 } 870 case "Markdown", "markdown": 871 // Headers are good signal in docs, but do not rank as highly as code. 872 switch kind { 873 case "chapter": // # 874 factor = 4 875 case "section": // ## 876 factor = 3 877 case "subsection": // ### 878 factor = 2 879 } 880 } 881 882 return factor * scoreKindMatch 883} 884 885type matchScoreSlice []LineMatch 886 887func (m matchScoreSlice) Len() int { return len(m) } 888func (m matchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 889func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } 890 891type chunkMatchScoreSlice []ChunkMatch 892 893func (m chunkMatchScoreSlice) Len() int { return len(m) } 894func (m chunkMatchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 895func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } 896 897type fileMatchesByScore []FileMatch 898 899func (m fileMatchesByScore) Len() int { return len(m) } 900func (m fileMatchesByScore) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 901func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score } 902 903func sortMatchesByScore(ms []LineMatch) { 904 sort.Sort(matchScoreSlice(ms)) 905} 906 907func sortChunkMatchesByScore(ms []ChunkMatch) { 908 sort.Sort(chunkMatchScoreSlice(ms)) 909} 910 911// SortFiles sorts files matches. The order depends on the match score, which includes both 912// query-dependent signals like word overlap, and file-only signals like the file ranks (if 913// file ranks are enabled). 914func SortFiles(ms []FileMatch) { 915 sort.Sort(fileMatchesByScore(ms)) 916}