fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "fmt"
20 "log"
21 "sort"
22 "strings"
23 "unicode/utf8"
24)
25
26var _ = log.Println
27
28// contentProvider is an abstraction to treat matches for names and
29// content with the same code.
30type contentProvider struct {
31 id *indexData
32 stats *Stats
33
34 // mutable
35 err error
36 idx uint32
37 _data []byte
38 _nl []uint32
39 _nlBuf []uint32
40 _sects []DocumentSection
41 _sectBuf []DocumentSection
42 fileSize uint32
43}
44
45// setDocument skips to the given document.
46func (p *contentProvider) setDocument(docID uint32) {
47 fileStart := p.id.boundaries[docID]
48
49 p.idx = docID
50 p.fileSize = p.id.boundaries[docID+1] - fileStart
51
52 p._nl = nil
53 p._sects = nil
54 p._data = nil
55}
56
57func (p *contentProvider) docSections() []DocumentSection {
58 if p._sects == nil {
59 var sz uint32
60 p._sects, sz, p.err = p.id.readDocSections(p.idx, p._sectBuf)
61 p.stats.ContentBytesLoaded += int64(sz)
62 p._sectBuf = p._sects
63 }
64 return p._sects
65}
66
67func (p *contentProvider) newlines() newlines {
68 if p._nl == nil {
69 var sz uint32
70 p._nl, sz, p.err = p.id.readNewlines(p.idx, p._nlBuf)
71 p._nlBuf = p._nl
72 p.stats.ContentBytesLoaded += int64(sz)
73 }
74 return newlines{locs: p._nl, fileSize: p.fileSize}
75}
76
77func (p *contentProvider) data(fileName bool) []byte {
78 if fileName {
79 return p.id.fileNameContent[p.id.fileNameIndex[p.idx]:p.id.fileNameIndex[p.idx+1]]
80 }
81
82 if p._data == nil {
83 p._data, p.err = p.id.readContents(p.idx)
84 p.stats.FilesLoaded++
85 p.stats.ContentBytesLoaded += int64(len(p._data))
86 }
87 return p._data
88}
89
90// Find offset in bytes (relative to corpus start) for an offset in
91// runes (relative to document start). If filename is set, the corpus
92// is the set of filenames, with the document being the name itself.
93func (p *contentProvider) findOffset(filename bool, r uint32) uint32 {
94 if p.id.metaData.PlainASCII {
95 return r
96 }
97
98 sample := p.id.runeOffsets
99 runeEnds := p.id.fileEndRunes
100 fileStartByte := p.id.boundaries[p.idx]
101 if filename {
102 sample = p.id.fileNameRuneOffsets
103 runeEnds = p.id.fileNameEndRunes
104 fileStartByte = p.id.fileNameIndex[p.idx]
105 }
106
107 absR := r
108 if p.idx > 0 {
109 absR += runeEnds[p.idx-1]
110 }
111
112 byteOff, left := sample.lookup(absR)
113
114 var data []byte
115
116 if filename {
117 data = p.id.fileNameContent[byteOff:]
118 } else {
119 data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency)
120 if p.err != nil {
121 return 0
122 }
123 }
124 for left > 0 {
125 _, sz := utf8.DecodeRune(data)
126 byteOff += uint32(sz)
127 data = data[sz:]
128 left--
129 }
130
131 byteOff -= fileStartByte
132 return byteOff
133}
134
135func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch {
136 var result []LineMatch
137 if ms[0].fileName {
138 // There is only "line" in a filename.
139 res := LineMatch{
140 Line: p.id.fileName(p.idx),
141 FileName: true,
142 }
143
144 for _, m := range ms {
145 res.LineFragments = append(res.LineFragments, LineFragmentMatch{
146 LineOffset: int(m.byteOffset),
147 MatchLength: int(m.byteMatchSz),
148 Offset: m.byteOffset,
149 })
150
151 result = []LineMatch{res}
152 }
153 } else {
154 ms = breakMatchesOnNewlines(ms, p.data(false))
155 result = p.fillContentMatches(ms, numContextLines)
156 }
157
158 sects := p.docSections()
159 for i, m := range result {
160 result[i].Score, result[i].DebugScore = p.matchScore(sects, &m, language, debug)
161 }
162
163 return result
164}
165
166func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch {
167 var result []ChunkMatch
168 if ms[0].fileName {
169 // If the first match is a filename match, there will only be
170 // one match and the matched content will be the filename.
171
172 fileName := p.id.fileName(p.idx)
173 ranges := make([]Range, 0, len(ms))
174 for _, m := range ms {
175 ranges = append(ranges, Range{
176 Start: Location{
177 ByteOffset: m.byteOffset,
178 LineNumber: 1,
179 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1),
180 },
181 End: Location{
182 ByteOffset: m.byteOffset + m.byteMatchSz,
183 LineNumber: 1,
184 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1),
185 },
186 })
187 }
188
189 result = []ChunkMatch{{
190 Content: fileName,
191 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
192 Ranges: ranges,
193 FileName: true,
194 }}
195 } else {
196 result = p.fillContentChunkMatches(ms, numContextLines)
197 }
198
199 sects := p.docSections()
200 for i, m := range result {
201 result[i].Score, result[i].DebugScore = p.chunkMatchScore(sects, &m, language, debug)
202 }
203
204 return result
205}
206
207func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int) []LineMatch {
208 var result []LineMatch
209 for len(ms) > 0 {
210 m := ms[0]
211 num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset)
212
213 var lineCands []*candidateMatch
214
215 endMatch := m.byteOffset + m.byteMatchSz
216
217 for len(ms) > 0 {
218 m := ms[0]
219 if int(m.byteOffset) <= lineEnd {
220 endMatch = m.byteOffset + m.byteMatchSz
221 lineCands = append(lineCands, m)
222 ms = ms[1:]
223 } else {
224 break
225 }
226 }
227
228 if len(lineCands) == 0 {
229 log.Panicf(
230 "%s %v infinite loop: num %d start,end %d,%d, offset %d",
231 p.id.fileName(p.idx), p.id.metaData,
232 num, lineStart, lineEnd,
233 m.byteOffset)
234 }
235
236 data := p.data(false)
237
238 // Due to merging matches, we may have a match that
239 // crosses a line boundary. Prevent confusion by
240 // taking lines until we pass the last match
241 for lineEnd < len(data) && endMatch > uint32(lineEnd) {
242 next := bytes.IndexByte(data[lineEnd+1:], '\n')
243 if next == -1 {
244 lineEnd = len(data)
245 } else {
246 // TODO(hanwen): test that checks "+1" part here.
247 lineEnd += next + 1
248 }
249 }
250
251 finalMatch := LineMatch{
252 LineStart: lineStart,
253 LineEnd: lineEnd,
254 LineNumber: num,
255 }
256 finalMatch.Line = data[lineStart:lineEnd]
257
258 if numContextLines > 0 {
259 finalMatch.Before = p.newlines().getLines(data, num-numContextLines, num)
260 finalMatch.After = p.newlines().getLines(data, num+1, num+1+numContextLines)
261 }
262
263 for _, m := range lineCands {
264 fragment := LineFragmentMatch{
265 Offset: m.byteOffset,
266 LineOffset: int(m.byteOffset) - lineStart,
267 MatchLength: int(m.byteMatchSz),
268 }
269 if m.symbol {
270 start := p.id.fileEndSymbol[p.idx]
271 fragment.SymbolInfo = p.id.symbols.data(start + m.symbolIdx)
272 if fragment.SymbolInfo != nil {
273 sec := p.docSections()[m.symbolIdx]
274 fragment.SymbolInfo.Sym = string(data[sec.Start:sec.End])
275 }
276 }
277
278 finalMatch.LineFragments = append(finalMatch.LineFragments, fragment)
279 }
280 result = append(result, finalMatch)
281 }
282 return result
283}
284
285func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int) []ChunkMatch {
286 newlines := p.newlines()
287 chunks := chunkCandidates(ms, newlines, numContextLines)
288 data := p.data(false)
289 chunkMatches := make([]ChunkMatch, 0, len(chunks))
290 for _, chunk := range chunks {
291 ranges := make([]Range, 0, len(chunk.candidates))
292 var symbolInfo []*Symbol
293 for i, cm := range chunk.candidates {
294 startOffset := cm.byteOffset
295 endOffset := cm.byteOffset + cm.byteMatchSz
296 startLine, startLineOffset, _ := newlines.atOffset(startOffset)
297 endLine, endLineOffset, _ := newlines.atOffset(endOffset)
298
299 ranges = append(ranges, Range{
300 Start: Location{
301 ByteOffset: startOffset,
302 LineNumber: uint32(startLine),
303 Column: uint32(utf8.RuneCount(data[startLineOffset:startOffset]) + 1),
304 },
305 End: Location{
306 ByteOffset: endOffset,
307 LineNumber: uint32(endLine),
308 Column: uint32(utf8.RuneCount(data[endLineOffset:endOffset]) + 1),
309 },
310 })
311
312 if cm.symbol {
313 if symbolInfo == nil {
314 symbolInfo = make([]*Symbol, len(chunk.candidates))
315 }
316 start := p.id.fileEndSymbol[p.idx]
317 si := p.id.symbols.data(start + cm.symbolIdx)
318 if si != nil {
319 sec := p.docSections()[cm.symbolIdx]
320 si.Sym = string(data[sec.Start:sec.End])
321 }
322 symbolInfo[i] = si
323 }
324 }
325
326 firstLineNumber := int(chunk.firstLine) - numContextLines
327 if firstLineNumber < 1 {
328 firstLineNumber = 1
329 }
330 firstLineStart, _ := newlines.lineBounds(firstLineNumber)
331
332 chunkMatches = append(chunkMatches, ChunkMatch{
333 Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1),
334 ContentStart: Location{
335 ByteOffset: firstLineStart,
336 LineNumber: uint32(firstLineNumber),
337 Column: 1,
338 },
339 FileName: false,
340 Ranges: ranges,
341 SymbolInfo: symbolInfo,
342 })
343 }
344 return chunkMatches
345}
346
347type candidateChunk struct {
348 firstLine uint32 // 1-based, inclusive
349 lastLine uint32 // 1-based, inclusive
350 minOffset uint32 // 0-based, inclusive
351 maxOffset uint32 // 0-based, exclusive
352 candidates []*candidateMatch
353}
354
355// chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent
356// chunks will be merged if adding `numContextLines` to the beginning and end of the chunk would cause
357// it to overlap with an adjacent chunk.
358func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines int) []candidateChunk {
359 var chunks []candidateChunk
360 for _, m := range ms {
361 startOffset := m.byteOffset
362 endOffset := m.byteOffset + m.byteMatchSz
363 firstLine, _, _ := newlines.atOffset(startOffset)
364 lastLine, _, _ := newlines.atOffset(endOffset)
365
366 if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines {
367 // If a new chunk created with the current candidateMatch would
368 // overlap with the previous chunk, instead add the candidateMatch
369 // to the last chunk and extend end of the last chunk.
370 last := &chunks[len(chunks)-1]
371 last.candidates = append(last.candidates, m)
372 if last.maxOffset < endOffset {
373 last.lastLine = uint32(lastLine)
374 last.maxOffset = uint32(endOffset)
375 }
376 } else {
377 chunks = append(chunks, candidateChunk{
378 firstLine: uint32(firstLine),
379 lastLine: uint32(lastLine),
380 minOffset: startOffset,
381 maxOffset: endOffset,
382 candidates: []*candidateMatch{m},
383 })
384 }
385 }
386 return chunks
387}
388
389type newlines struct {
390 // locs is the sorted set of byte offsets of the newlines in the file
391 locs []uint32
392
393 // fileSize is just the number of bytes in the file. It is stored
394 // on this struct so we can safely know the length of the last line
395 // in the file since not all files end in a newline.
396 fileSize uint32
397}
398
399// atOffset returns the line containing the offset. If the offset lands on
400// the newline ending line M, we return M. The line is characterized
401// by its linenumber (base-1, byte index of line start, byte index of
402// line end). The line end is the index of a newline, or the filesize
403// (if matching the last line of the file.)
404func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) {
405 idx := sort.Search(len(nls.locs), func(n int) bool {
406 return nls.locs[n] >= offset
407 })
408
409 start, end := nls.lineBounds(idx + 1)
410 return idx + 1, int(start), int(end)
411}
412
413// lineBounds returns the byte offsets of the start and end of the 1-based
414// lineNumber. The end offset is exclusive and will not contain the line-ending
415// newline. If the line number is out of range of the lines in the file, start
416// and end will be clamped to [0,fileSize].
417func (nls newlines) lineBounds(lineNumber int) (start, end uint32) {
418 // nls.locs[0] + 1 is the start of the 2nd line of data.
419 startIdx := lineNumber - 2
420 endIdx := lineNumber - 1
421
422 if startIdx < 0 {
423 start = 0
424 } else if startIdx >= len(nls.locs) {
425 start = nls.fileSize
426 } else {
427 start = nls.locs[startIdx] + 1
428 }
429
430 if endIdx < 0 {
431 end = 0
432 } else if endIdx >= len(nls.locs) {
433 end = nls.fileSize
434 } else {
435 end = nls.locs[endIdx]
436 }
437
438 return start, end
439}
440
441// getLines returns a slice of data containing the lines [low, high).
442// low is 1-based and inclusive. high is 1-based and exclusive.
443func (nls newlines) getLines(data []byte, low, high int) []byte {
444 if low >= high {
445 return nil
446 }
447
448 lowStart, _ := nls.lineBounds(low)
449 _, highEnd := nls.lineBounds(high - 1)
450
451 return data[lowStart:highEnd]
452}
453
454const (
455 // Query-dependent scoring signals. All of these together are bounded at ~9000
456 // (scoreWordMatch + scoreSymbol + scoreKindMatch * 10 + scoreFactorAtomMatch).
457 scorePartialWordMatch = 50.0
458 scoreWordMatch = 500.0
459 scoreBase = 7000.0
460 scorePartialBase = 4000.0
461 scoreSymbol = 7000.0
462 scorePartialSymbol = 4000.0
463 scoreKindMatch = 100.0
464 scoreFactorAtomMatch = 400.0
465
466 // File-only scoring signals. For now these are also bounded ~9000 to give them
467 // equal weight with the query-dependent signals.
468 scoreFileRankFactor = 9000.0
469 scoreFileOrderFactor = 10.0
470 scoreRepoRankFactor = 20.0
471
472 // Used for ordering line and chunk matches within a file.
473 scoreLineOrderFactor = 1.0
474)
475
476// findSection checks whether a section defined by offset and size lies within
477// one of the sections in secs.
478func findSection(secs []DocumentSection, off, sz uint32) (int, bool) {
479 j := sort.Search(len(secs), func(i int) bool {
480 return secs[i].End >= off+sz
481 })
482
483 if j == len(secs) {
484 return 0, false
485 }
486
487 if secs[j].Start <= off && off+sz <= secs[j].End {
488 return j, true
489 }
490 return 0, false
491}
492
493func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) {
494 type debugScore struct {
495 score float64
496 what string
497 }
498
499 score := &debugScore{}
500 maxScore := &debugScore{}
501
502 addScore := func(what string, s float64) {
503 if s != 0 && debug {
504 score.what += fmt.Sprintf("%s:%.2f, ", what, s)
505 }
506 score.score += s
507 }
508
509 for i, r := range m.Ranges {
510 // calculate the start and end offset relative to the start of the content
511 relStartOffset := int(r.Start.ByteOffset - m.ContentStart.ByteOffset)
512 relEndOffset := int(r.End.ByteOffset - m.ContentStart.ByteOffset)
513
514 startBoundary := relStartOffset < len(m.Content) && (relStartOffset == 0 || byteClass(m.Content[relStartOffset-1]) != byteClass(m.Content[relStartOffset]))
515 endBoundary := relEndOffset > 0 && (relEndOffset == len(m.Content) || byteClass(m.Content[relEndOffset-1]) != byteClass(m.Content[relEndOffset]))
516
517 score.score = 0
518 score.what = ""
519
520 if startBoundary && endBoundary {
521 addScore("WordMatch", scoreWordMatch)
522 } else if startBoundary || endBoundary {
523 addScore("PartialWordMatch", scorePartialWordMatch)
524 }
525
526 if m.FileName {
527 sep := bytes.LastIndexByte(m.Content, '/')
528 startMatch := relStartOffset == sep+1
529 endMatch := relEndOffset == len(m.Content)
530 if startMatch && endMatch {
531 addScore("Base", scoreBase)
532 } else if startMatch || endMatch {
533 addScore("EdgeBase", (scoreBase+scorePartialBase)/2)
534 } else if sep < relStartOffset {
535 addScore("InnerBase", scorePartialBase)
536 }
537 } else if secIdx, ok := findSection(secs, uint32(r.Start.ByteOffset), uint32(r.End.ByteOffset-r.Start.ByteOffset)); ok {
538 sec := secs[secIdx]
539 startMatch := sec.Start == uint32(r.Start.ByteOffset)
540 endMatch := sec.End == uint32(r.End.ByteOffset)
541 if startMatch && endMatch {
542 addScore("Symbol", scoreSymbol)
543 } else if startMatch || endMatch {
544 addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2)
545 } else {
546 addScore("InnerSymbol", scorePartialSymbol)
547 }
548
549 var si *Symbol
550 if m.SymbolInfo != nil {
551 si = m.SymbolInfo[i]
552 }
553 if si == nil {
554 // for non-symbol queries, we need to hydrate in SymbolInfo.
555 start := p.id.fileEndSymbol[p.idx]
556 si = p.id.symbols.data(start + uint32(secIdx))
557 }
558 if si != nil {
559 addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind))
560 }
561 }
562
563 if score.score > maxScore.score {
564 maxScore.score = score.score
565 maxScore.what = score.what
566 }
567 }
568
569 if debug {
570 maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", "))
571 }
572
573 return maxScore.score, maxScore.what
574}
575
576func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) {
577 type debugScore struct {
578 score float64
579 what string
580 }
581
582 score := &debugScore{}
583 maxScore := &debugScore{}
584
585 addScore := func(what string, s float64) {
586 if s != 0 && debug {
587 score.what += fmt.Sprintf("%s:%.2f, ", what, s)
588 }
589 score.score += s
590 }
591
592 for _, f := range m.LineFragments {
593 startBoundary := f.LineOffset < len(m.Line) && (f.LineOffset == 0 || byteClass(m.Line[f.LineOffset-1]) != byteClass(m.Line[f.LineOffset]))
594
595 end := int(f.LineOffset) + f.MatchLength
596 endBoundary := end > 0 && (end == len(m.Line) || byteClass(m.Line[end-1]) != byteClass(m.Line[end]))
597
598 score.score = 0
599 score.what = ""
600
601 if startBoundary && endBoundary {
602 addScore("WordMatch", scoreWordMatch)
603 } else if startBoundary || endBoundary {
604 addScore("PartialWordMatch", scorePartialWordMatch)
605 }
606
607 if m.FileName {
608 sep := bytes.LastIndexByte(m.Line, '/')
609 startMatch := sep+1 == f.LineOffset
610 endMatch := len(m.Line) == f.LineOffset+f.MatchLength
611 if startMatch && endMatch {
612 addScore("Base", scoreBase)
613 } else if startMatch || endMatch {
614 addScore("EdgeBase", (scoreBase+scorePartialBase)/2)
615 } else if sep < f.LineOffset {
616 addScore("InnerBase", scorePartialBase)
617 }
618 } else if secIdx, ok := findSection(secs, f.Offset, uint32(f.MatchLength)); ok {
619 sec := secs[secIdx]
620 startMatch := sec.Start == f.Offset
621 endMatch := sec.End == f.Offset+uint32(f.MatchLength)
622 if startMatch && endMatch {
623 addScore("Symbol", scoreSymbol)
624 } else if startMatch || endMatch {
625 addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2)
626 } else {
627 addScore("InnerSymbol", scorePartialSymbol)
628 }
629
630 si := f.SymbolInfo
631 if si == nil {
632 // for non-symbol queries, we need to hydrate in SymbolInfo.
633 start := p.id.fileEndSymbol[p.idx]
634 si = p.id.symbols.data(start + uint32(secIdx))
635 }
636 if si != nil {
637 // the LineFragment may not be on a symbol, then si will be nil.
638 addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind))
639 }
640 }
641
642 if score.score > maxScore.score {
643 maxScore.score = score.score
644 maxScore.what = score.what
645 }
646 }
647
648 if debug {
649 maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", "))
650 }
651
652 return maxScore.score, maxScore.what
653}
654
655// scoreKind boosts a match based on the combination of language and kind. The
656// language string comes from go-enry, the kind string from ctags.
657func scoreKind(language string, kind string) float64 {
658 var factor float64
659
660 // Generic ranking which will be overriden by language specific ranking
661 switch kind {
662 case "type": // scip-ctags regression workaround https://github.com/sourcegraph/sourcegraph/issues/57659
663 factor = 8
664 case "class":
665 factor = 10
666 case "struct":
667 factor = 9.5
668 case "enum":
669 factor = 9
670 case "interface":
671 factor = 8
672 case "function", "func", "method":
673 factor = 7
674 case "member", "field":
675 factor = 5.5
676 case "constant", "const":
677 factor = 5
678 case "var", "variable":
679 factor = 4
680
681 default:
682 // No idea what it is, but its something regarded as a symbol
683 factor = 1
684 }
685
686 // Refer to universal-ctags --list-kinds-full=<language> to learn about which
687 // kinds are detected for which language.
688 //
689 // Note that go-ctags uses universal-ctags's interactive mode and thus returns
690 // the full name for "kind" and not the one-letter abbreviation.
691 switch language {
692 case "Java", "java":
693 switch kind {
694 // 2022-03-30: go-ctags contains a regex rule for Java classes that sets "kind"
695 // to "classes" instead of "c". We have to cover both cases to support existing
696 // indexes.
697 case "class", "classes":
698 factor = 10
699 case "enum":
700 factor = 9
701 case "interface":
702 factor = 8
703 case "method":
704 factor = 7
705 case "field":
706 factor = 6
707 case "enumConstant":
708 factor = 5
709 }
710 case "Kotlin", "kotlin":
711 switch kind {
712 case "class":
713 factor = 10
714 case "interface":
715 factor = 9
716 case "method":
717 factor = 8
718 case "typealias":
719 factor = 7
720 case "constant":
721 factor = 6
722 case "variable":
723 factor = 5
724 }
725 case "Go", "go":
726 switch kind {
727 // scip-ctags regression workaround https://github.com/sourcegraph/sourcegraph/issues/57659
728 // for each case a description of the fields in ctags in the comment
729 case "type": // interface struct talias
730 factor = 10
731 case "method", "function": // methodSpec
732 factor = 8
733 case "variable": // var member
734 factor = 7
735 case "constant": // const
736 factor = 6
737
738 case "interface": // interfaces
739 factor = 10
740 case "struct": // structs
741 factor = 9
742 case "talias": // type aliases
743 factor = 9
744 case "methodSpec": // interface method specification
745 factor = 8.5
746 case "func": // functions
747 factor = 8
748 case "member": // struct members
749 factor = 7
750 case "const": // constants
751 factor = 6
752 case "var": // variables
753 factor = 5
754 }
755 // Could also rank on:
756 //
757 // - anonMember struct anonymous members
758 // - packageName name for specifying imported package
759 // - receiver receivers
760 // - package packages
761 // - type types
762 // - unknown unknown
763 case "C++", "c++":
764 switch kind {
765 case "class": // classes
766 factor = 10
767 case "enum": // enumeration names
768 factor = 9
769 case "function": // function definitions
770 factor = 8
771 case "struct": // structure names
772 factor = 7
773 case "union": // union names
774 factor = 6
775 case "typdef": // typedefs
776 factor = 5
777 case "member": // class, struct, and union members
778 factor = 4
779 case "variable": // varialbe definitions
780 factor = 3
781 }
782 // Could also rank on:
783 // NAME DESCRIPTION
784 // macro macro definitions
785 // enumerator enumerators (values inside an enumeration)
786 // header included header files
787 // namespace namespaces
788 // variable variable definitions
789 case "Scala", "scala":
790 switch kind {
791 case "class":
792 factor = 10
793 case "interface":
794 factor = 9
795 case "object":
796 factor = 8
797 case "method":
798 factor = 7
799 case "type":
800 factor = 6
801 case "variable":
802 factor = 5
803 case "package":
804 factor = 4
805 }
806 case "Python", "python":
807 switch kind {
808 case "class": // classes
809 factor = 10
810 case "function": // function definitions
811 factor = 8
812 case "member": // class, struct, and union members
813 factor = 4
814 case "variable": // variable definitions
815 factor = 3
816 case "local": // local variables
817 factor = 2
818 }
819 // Could also rank on:
820 //
821 // - namespace name referring a module defined in other file
822 // - module modules
823 // - unknown name referring a class/variable/function/module defined in other module
824 // - parameter function parameters
825 case "Ruby", "ruby":
826 switch kind {
827 case "class":
828 factor = 10
829 case "method":
830 factor = 9
831 case "alias":
832 factor = 8
833 case "module":
834 factor = 7
835 case "singletonMethod":
836 factor = 6
837 case "constant":
838 factor = 5
839 case "accessor":
840 factor = 4
841 case "library":
842 factor = 3
843 }
844 case "PHP", "php":
845 switch kind {
846 case "class":
847 factor = 10
848 case "interface":
849 factor = 9
850 case "function":
851 factor = 8
852 case "trait":
853 factor = 7
854 case "define":
855 factor = 6
856 case "namespace":
857 factor = 5
858 case "alias":
859 factor = 4
860 case "variable":
861 factor = 3
862 case "local":
863 factor = 3
864 }
865 case "GraphQL", "graphql":
866 switch kind {
867 case "type":
868 factor = 10
869 }
870 case "Markdown", "markdown":
871 // Headers are good signal in docs, but do not rank as highly as code.
872 switch kind {
873 case "chapter": // #
874 factor = 4
875 case "section": // ##
876 factor = 3
877 case "subsection": // ###
878 factor = 2
879 }
880 }
881
882 return factor * scoreKindMatch
883}
884
885type matchScoreSlice []LineMatch
886
887func (m matchScoreSlice) Len() int { return len(m) }
888func (m matchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
889func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
890
891type chunkMatchScoreSlice []ChunkMatch
892
893func (m chunkMatchScoreSlice) Len() int { return len(m) }
894func (m chunkMatchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
895func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
896
897type fileMatchesByScore []FileMatch
898
899func (m fileMatchesByScore) Len() int { return len(m) }
900func (m fileMatchesByScore) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
901func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score }
902
903func sortMatchesByScore(ms []LineMatch) {
904 sort.Sort(matchScoreSlice(ms))
905}
906
907func sortChunkMatchesByScore(ms []ChunkMatch) {
908 sort.Sort(chunkMatchScoreSlice(ms))
909}
910
911// SortFiles sorts files matches. The order depends on the match score, which includes both
912// query-dependent signals like word overlap, and file-only signals like the file ranks (if
913// file ranks are enabled).
914func SortFiles(ms []FileMatch) {
915 sort.Sort(fileMatchesByScore(ms))
916}