contentprovider.go at 0f21f325cc5d45c798e2fd5793243d211c055170 · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / contentprovider.go
at 0f21f325cc5d45c798e2fd5793243d211c055170 25 kB View raw
Stefan Hengl scoring: score methods and funcs the same (#666) 2y ago
  1// Copyright 2016 Google Inc. All rights reserved.
  2//
  3// Licensed under the Apache License, Version 2.0 (the "License");
  4// you may not use this file except in compliance with the License.
  5// You may obtain a copy of the License at
  6//
  7//    http://www.apache.org/licenses/LICENSE-2.0
  8//
  9// Unless required by applicable law or agreed to in writing, software
 10// distributed under the License is distributed on an "AS IS" BASIS,
 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12// See the License for the specific language governing permissions and
 13// limitations under the License.
 14
 15package zoekt
 16
 17import (
 18	"bytes"
 19	"fmt"
 20	"log"
 21	"sort"
 22	"strings"
 23	"unicode/utf8"
 24)
 25
 26var _ = log.Println
 27
 28// contentProvider is an abstraction to treat matches for names and
 29// content with the same code.
 30type contentProvider struct {
 31	id    *indexData
 32	stats *Stats
 33
 34	// mutable
 35	err      error
 36	idx      uint32
 37	_data    []byte
 38	_nl      []uint32
 39	_nlBuf   []uint32
 40	_sects   []DocumentSection
 41	_sectBuf []DocumentSection
 42	fileSize uint32
 43}
 44
 45// setDocument skips to the given document.
 46func (p *contentProvider) setDocument(docID uint32) {
 47	fileStart := p.id.boundaries[docID]
 48
 49	p.idx = docID
 50	p.fileSize = p.id.boundaries[docID+1] - fileStart
 51
 52	p._nl = nil
 53	p._sects = nil
 54	p._data = nil
 55}
 56
 57func (p *contentProvider) docSections() []DocumentSection {
 58	if p._sects == nil {
 59		var sz uint32
 60		p._sects, sz, p.err = p.id.readDocSections(p.idx, p._sectBuf)
 61		p.stats.ContentBytesLoaded += int64(sz)
 62		p._sectBuf = p._sects
 63	}
 64	return p._sects
 65}
 66
 67func (p *contentProvider) newlines() newlines {
 68	if p._nl == nil {
 69		var sz uint32
 70		p._nl, sz, p.err = p.id.readNewlines(p.idx, p._nlBuf)
 71		p._nlBuf = p._nl
 72		p.stats.ContentBytesLoaded += int64(sz)
 73	}
 74	return newlines{locs: p._nl, fileSize: p.fileSize}
 75}
 76
 77func (p *contentProvider) data(fileName bool) []byte {
 78	if fileName {
 79		return p.id.fileNameContent[p.id.fileNameIndex[p.idx]:p.id.fileNameIndex[p.idx+1]]
 80	}
 81
 82	if p._data == nil {
 83		p._data, p.err = p.id.readContents(p.idx)
 84		p.stats.FilesLoaded++
 85		p.stats.ContentBytesLoaded += int64(len(p._data))
 86	}
 87	return p._data
 88}
 89
 90// Find offset in bytes (relative to corpus start) for an offset in
 91// runes (relative to document start). If filename is set, the corpus
 92// is the set of filenames, with the document being the name itself.
 93func (p *contentProvider) findOffset(filename bool, r uint32) uint32 {
 94	if p.id.metaData.PlainASCII {
 95		return r
 96	}
 97
 98	sample := p.id.runeOffsets
 99	runeEnds := p.id.fileEndRunes
100	fileStartByte := p.id.boundaries[p.idx]
101	if filename {
102		sample = p.id.fileNameRuneOffsets
103		runeEnds = p.id.fileNameEndRunes
104		fileStartByte = p.id.fileNameIndex[p.idx]
105	}
106
107	absR := r
108	if p.idx > 0 {
109		absR += runeEnds[p.idx-1]
110	}
111
112	byteOff, left := sample.lookup(absR)
113
114	var data []byte
115
116	if filename {
117		data = p.id.fileNameContent[byteOff:]
118	} else {
119		data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency)
120		if p.err != nil {
121			return 0
122		}
123	}
124	for left > 0 {
125		_, sz := utf8.DecodeRune(data)
126		byteOff += uint32(sz)
127		data = data[sz:]
128		left--
129	}
130
131	byteOff -= fileStartByte
132	return byteOff
133}
134
135func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch {
136	var result []LineMatch
137	if ms[0].fileName {
138		// There is only "line" in a filename.
139		res := LineMatch{
140			Line:     p.id.fileName(p.idx),
141			FileName: true,
142		}
143
144		for _, m := range ms {
145			res.LineFragments = append(res.LineFragments, LineFragmentMatch{
146				LineOffset:  int(m.byteOffset),
147				MatchLength: int(m.byteMatchSz),
148				Offset:      m.byteOffset,
149			})
150
151			result = []LineMatch{res}
152		}
153	} else {
154		ms = breakMatchesOnNewlines(ms, p.data(false))
155		result = p.fillContentMatches(ms, numContextLines)
156	}
157
158	sects := p.docSections()
159	for i, m := range result {
160		result[i].Score, result[i].DebugScore = p.matchScore(sects, &m, language, debug)
161	}
162
163	return result
164}
165
166func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch {
167	var result []ChunkMatch
168	if ms[0].fileName {
169		// If the first match is a filename match, there will only be
170		// one match and the matched content will be the filename.
171
172		fileName := p.id.fileName(p.idx)
173		ranges := make([]Range, 0, len(ms))
174		for _, m := range ms {
175			ranges = append(ranges, Range{
176				Start: Location{
177					ByteOffset: m.byteOffset,
178					LineNumber: 1,
179					Column:     uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1),
180				},
181				End: Location{
182					ByteOffset: m.byteOffset + m.byteMatchSz,
183					LineNumber: 1,
184					Column:     uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1),
185				},
186			})
187		}
188
189		result = []ChunkMatch{{
190			Content:      fileName,
191			ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
192			Ranges:       ranges,
193			FileName:     true,
194		}}
195	} else {
196		result = p.fillContentChunkMatches(ms, numContextLines)
197	}
198
199	sects := p.docSections()
200	for i, m := range result {
201		result[i].Score, result[i].DebugScore = p.chunkMatchScore(sects, &m, language, debug)
202	}
203
204	return result
205}
206
207func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int) []LineMatch {
208	var result []LineMatch
209	for len(ms) > 0 {
210		m := ms[0]
211		num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset)
212
213		var lineCands []*candidateMatch
214
215		endMatch := m.byteOffset + m.byteMatchSz
216
217		for len(ms) > 0 {
218			m := ms[0]
219			if int(m.byteOffset) <= lineEnd {
220				endMatch = m.byteOffset + m.byteMatchSz
221				lineCands = append(lineCands, m)
222				ms = ms[1:]
223			} else {
224				break
225			}
226		}
227
228		if len(lineCands) == 0 {
229			log.Panicf(
230				"%s %v infinite loop: num %d start,end %d,%d, offset %d",
231				p.id.fileName(p.idx), p.id.metaData,
232				num, lineStart, lineEnd,
233				m.byteOffset)
234		}
235
236		data := p.data(false)
237
238		// Due to merging matches, we may have a match that
239		// crosses a line boundary. Prevent confusion by
240		// taking lines until we pass the last match
241		for lineEnd < len(data) && endMatch > uint32(lineEnd) {
242			next := bytes.IndexByte(data[lineEnd+1:], '\n')
243			if next == -1 {
244				lineEnd = len(data)
245			} else {
246				// TODO(hanwen): test that checks "+1" part here.
247				lineEnd += next + 1
248			}
249		}
250
251		finalMatch := LineMatch{
252			LineStart:  lineStart,
253			LineEnd:    lineEnd,
254			LineNumber: num,
255		}
256		finalMatch.Line = data[lineStart:lineEnd]
257
258		if numContextLines > 0 {
259			finalMatch.Before = p.newlines().getLines(data, num-numContextLines, num)
260			finalMatch.After = p.newlines().getLines(data, num+1, num+1+numContextLines)
261		}
262
263		for _, m := range lineCands {
264			fragment := LineFragmentMatch{
265				Offset:      m.byteOffset,
266				LineOffset:  int(m.byteOffset) - lineStart,
267				MatchLength: int(m.byteMatchSz),
268			}
269			if m.symbol {
270				start := p.id.fileEndSymbol[p.idx]
271				fragment.SymbolInfo = p.id.symbols.data(start + m.symbolIdx)
272				if fragment.SymbolInfo != nil {
273					sec := p.docSections()[m.symbolIdx]
274					fragment.SymbolInfo.Sym = string(data[sec.Start:sec.End])
275				}
276			}
277
278			finalMatch.LineFragments = append(finalMatch.LineFragments, fragment)
279		}
280		result = append(result, finalMatch)
281	}
282	return result
283}
284
285func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int) []ChunkMatch {
286	newlines := p.newlines()
287	chunks := chunkCandidates(ms, newlines, numContextLines)
288	data := p.data(false)
289	chunkMatches := make([]ChunkMatch, 0, len(chunks))
290	for _, chunk := range chunks {
291		ranges := make([]Range, 0, len(chunk.candidates))
292		var symbolInfo []*Symbol
293		for i, cm := range chunk.candidates {
294			startOffset := cm.byteOffset
295			endOffset := cm.byteOffset + cm.byteMatchSz
296			startLine, startLineOffset, _ := newlines.atOffset(startOffset)
297			endLine, endLineOffset, _ := newlines.atOffset(endOffset)
298
299			ranges = append(ranges, Range{
300				Start: Location{
301					ByteOffset: startOffset,
302					LineNumber: uint32(startLine),
303					Column:     uint32(utf8.RuneCount(data[startLineOffset:startOffset]) + 1),
304				},
305				End: Location{
306					ByteOffset: endOffset,
307					LineNumber: uint32(endLine),
308					Column:     uint32(utf8.RuneCount(data[endLineOffset:endOffset]) + 1),
309				},
310			})
311
312			if cm.symbol {
313				if symbolInfo == nil {
314					symbolInfo = make([]*Symbol, len(chunk.candidates))
315				}
316				start := p.id.fileEndSymbol[p.idx]
317				si := p.id.symbols.data(start + cm.symbolIdx)
318				if si != nil {
319					sec := p.docSections()[cm.symbolIdx]
320					si.Sym = string(data[sec.Start:sec.End])
321				}
322				symbolInfo[i] = si
323			}
324		}
325
326		firstLineNumber := int(chunk.firstLine) - numContextLines
327		if firstLineNumber < 1 {
328			firstLineNumber = 1
329		}
330		firstLineStart, _ := newlines.lineBounds(firstLineNumber)
331
332		chunkMatches = append(chunkMatches, ChunkMatch{
333			Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1),
334			ContentStart: Location{
335				ByteOffset: firstLineStart,
336				LineNumber: uint32(firstLineNumber),
337				Column:     1,
338			},
339			FileName:   false,
340			Ranges:     ranges,
341			SymbolInfo: symbolInfo,
342		})
343	}
344	return chunkMatches
345}
346
347type candidateChunk struct {
348	firstLine  uint32 // 1-based, inclusive
349	lastLine   uint32 // 1-based, inclusive
350	minOffset  uint32 // 0-based, inclusive
351	maxOffset  uint32 // 0-based, exclusive
352	candidates []*candidateMatch
353}
354
355// chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent
356// chunks will be merged if adding `numContextLines` to the beginning and end of the chunk would cause
357// it to overlap with an adjacent chunk.
358func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines int) []candidateChunk {
359	var chunks []candidateChunk
360	for _, m := range ms {
361		startOffset := m.byteOffset
362		endOffset := m.byteOffset + m.byteMatchSz
363		firstLine, _, _ := newlines.atOffset(startOffset)
364		lastLine, _, _ := newlines.atOffset(endOffset)
365
366		if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines {
367			// If a new chunk created with the current candidateMatch would
368			// overlap with the previous chunk, instead add the candidateMatch
369			// to the last chunk and extend end of the last chunk.
370			last := &chunks[len(chunks)-1]
371			last.candidates = append(last.candidates, m)
372			if last.maxOffset < endOffset {
373				last.lastLine = uint32(lastLine)
374				last.maxOffset = uint32(endOffset)
375			}
376		} else {
377			chunks = append(chunks, candidateChunk{
378				firstLine:  uint32(firstLine),
379				lastLine:   uint32(lastLine),
380				minOffset:  startOffset,
381				maxOffset:  endOffset,
382				candidates: []*candidateMatch{m},
383			})
384		}
385	}
386	return chunks
387}
388
389type newlines struct {
390	// locs is the sorted set of byte offsets of the newlines in the file
391	locs []uint32
392
393	// fileSize is just the number of bytes in the file. It is stored
394	// on this struct so we can safely know the length of the last line
395	// in the file since not all files end in a newline.
396	fileSize uint32
397}
398
399// atOffset returns the line containing the offset. If the offset lands on
400// the newline ending line M, we return M.  The line is characterized
401// by its linenumber (base-1, byte index of line start, byte index of
402// line end). The line end is the index of a newline, or the filesize
403// (if matching the last line of the file.)
404func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) {
405	idx := sort.Search(len(nls.locs), func(n int) bool {
406		return nls.locs[n] >= offset
407	})
408
409	start, end := nls.lineBounds(idx + 1)
410	return idx + 1, int(start), int(end)
411}
412
413// lineBounds returns the byte offsets of the start and end of the 1-based
414// lineNumber. The end offset is exclusive and will not contain the line-ending
415// newline. If the line number is out of range of the lines in the file, start
416// and end will be clamped to [0,fileSize].
417func (nls newlines) lineBounds(lineNumber int) (start, end uint32) {
418	// nls.locs[0] + 1 is the start of the 2nd line of data.
419	startIdx := lineNumber - 2
420	endIdx := lineNumber - 1
421
422	if startIdx < 0 {
423		start = 0
424	} else if startIdx >= len(nls.locs) {
425		start = nls.fileSize
426	} else {
427		start = nls.locs[startIdx] + 1
428	}
429
430	if endIdx < 0 {
431		end = 0
432	} else if endIdx >= len(nls.locs) {
433		end = nls.fileSize
434	} else {
435		end = nls.locs[endIdx]
436	}
437
438	return start, end
439}
440
441// getLines returns a slice of data containing the lines [low, high).
442// low is 1-based and inclusive. high is 1-based and exclusive.
443func (nls newlines) getLines(data []byte, low, high int) []byte {
444	if low >= high {
445		return nil
446	}
447
448	lowStart, _ := nls.lineBounds(low)
449	_, highEnd := nls.lineBounds(high - 1)
450
451	return data[lowStart:highEnd]
452}
453
454const (
455	// Query-dependent scoring signals. All of these together are bounded at ~9000
456	// (scoreWordMatch + scoreSymbol + scoreKindMatch * 10 + scoreFactorAtomMatch).
457	scorePartialWordMatch = 50.0
458	scoreWordMatch        = 500.0
459	scoreBase             = 7000.0
460	scorePartialBase      = 4000.0
461	scoreSymbol           = 7000.0
462	scorePartialSymbol    = 4000.0
463	scoreKindMatch        = 100.0
464	scoreFactorAtomMatch  = 400.0
465
466	// File-only scoring signals. For now these are also bounded ~9000 to give them
467	// equal weight with the query-dependent signals.
468	scoreFileRankFactor  = 9000.0
469	scoreFileOrderFactor = 10.0
470	scoreRepoRankFactor  = 20.0
471
472	// Used for ordering line and chunk matches within a file.
473	scoreLineOrderFactor = 1.0
474)
475
476// findSection checks whether a section defined by offset and size lies within
477// one of the sections in secs.
478func findSection(secs []DocumentSection, off, sz uint32) (int, bool) {
479	j := sort.Search(len(secs), func(i int) bool {
480		return secs[i].End >= off+sz
481	})
482
483	if j == len(secs) {
484		return 0, false
485	}
486
487	if secs[j].Start <= off && off+sz <= secs[j].End {
488		return j, true
489	}
490	return 0, false
491}
492
493func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) {
494	type debugScore struct {
495		score float64
496		what  string
497	}
498
499	score := &debugScore{}
500	maxScore := &debugScore{}
501
502	addScore := func(what string, s float64) {
503		if s != 0 && debug {
504			score.what += fmt.Sprintf("%s:%.2f, ", what, s)
505		}
506		score.score += s
507	}
508
509	for i, r := range m.Ranges {
510		// calculate the start and end offset relative to the start of the content
511		relStartOffset := int(r.Start.ByteOffset - m.ContentStart.ByteOffset)
512		relEndOffset := int(r.End.ByteOffset - m.ContentStart.ByteOffset)
513
514		startBoundary := relStartOffset < len(m.Content) && (relStartOffset == 0 || byteClass(m.Content[relStartOffset-1]) != byteClass(m.Content[relStartOffset]))
515		endBoundary := relEndOffset > 0 && (relEndOffset == len(m.Content) || byteClass(m.Content[relEndOffset-1]) != byteClass(m.Content[relEndOffset]))
516
517		score.score = 0
518		score.what = ""
519
520		if startBoundary && endBoundary {
521			addScore("WordMatch", scoreWordMatch)
522		} else if startBoundary || endBoundary {
523			addScore("PartialWordMatch", scorePartialWordMatch)
524		}
525
526		if m.FileName {
527			sep := bytes.LastIndexByte(m.Content, '/')
528			startMatch := relStartOffset == sep+1
529			endMatch := relEndOffset == len(m.Content)
530			if startMatch && endMatch {
531				addScore("Base", scoreBase)
532			} else if startMatch || endMatch {
533				addScore("EdgeBase", (scoreBase+scorePartialBase)/2)
534			} else if sep < relStartOffset {
535				addScore("InnerBase", scorePartialBase)
536			}
537		} else if secIdx, ok := findSection(secs, uint32(r.Start.ByteOffset), uint32(r.End.ByteOffset-r.Start.ByteOffset)); ok {
538			sec := secs[secIdx]
539			startMatch := sec.Start == uint32(r.Start.ByteOffset)
540			endMatch := sec.End == uint32(r.End.ByteOffset)
541			if startMatch && endMatch {
542				addScore("Symbol", scoreSymbol)
543			} else if startMatch || endMatch {
544				addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2)
545			} else {
546				addScore("InnerSymbol", scorePartialSymbol)
547			}
548
549			var si *Symbol
550			if m.SymbolInfo != nil {
551				si = m.SymbolInfo[i]
552			}
553			if si == nil {
554				// for non-symbol queries, we need to hydrate in SymbolInfo.
555				start := p.id.fileEndSymbol[p.idx]
556				si = p.id.symbols.data(start + uint32(secIdx))
557			}
558			if si != nil {
559				addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind))
560			}
561		}
562
563		if score.score > maxScore.score {
564			maxScore.score = score.score
565			maxScore.what = score.what
566		}
567	}
568
569	if debug {
570		maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", "))
571	}
572
573	return maxScore.score, maxScore.what
574}
575
576func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) {
577	type debugScore struct {
578		score float64
579		what  string
580	}
581
582	score := &debugScore{}
583	maxScore := &debugScore{}
584
585	addScore := func(what string, s float64) {
586		if s != 0 && debug {
587			score.what += fmt.Sprintf("%s:%.2f, ", what, s)
588		}
589		score.score += s
590	}
591
592	for _, f := range m.LineFragments {
593		startBoundary := f.LineOffset < len(m.Line) && (f.LineOffset == 0 || byteClass(m.Line[f.LineOffset-1]) != byteClass(m.Line[f.LineOffset]))
594
595		end := int(f.LineOffset) + f.MatchLength
596		endBoundary := end > 0 && (end == len(m.Line) || byteClass(m.Line[end-1]) != byteClass(m.Line[end]))
597
598		score.score = 0
599		score.what = ""
600
601		if startBoundary && endBoundary {
602			addScore("WordMatch", scoreWordMatch)
603		} else if startBoundary || endBoundary {
604			addScore("PartialWordMatch", scorePartialWordMatch)
605		}
606
607		if m.FileName {
608			sep := bytes.LastIndexByte(m.Line, '/')
609			startMatch := sep+1 == f.LineOffset
610			endMatch := len(m.Line) == f.LineOffset+f.MatchLength
611			if startMatch && endMatch {
612				addScore("Base", scoreBase)
613			} else if startMatch || endMatch {
614				addScore("EdgeBase", (scoreBase+scorePartialBase)/2)
615			} else if sep < f.LineOffset {
616				addScore("InnerBase", scorePartialBase)
617			}
618		} else if secIdx, ok := findSection(secs, f.Offset, uint32(f.MatchLength)); ok {
619			sec := secs[secIdx]
620			startMatch := sec.Start == f.Offset
621			endMatch := sec.End == f.Offset+uint32(f.MatchLength)
622			if startMatch && endMatch {
623				addScore("Symbol", scoreSymbol)
624			} else if startMatch || endMatch {
625				addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2)
626			} else {
627				addScore("InnerSymbol", scorePartialSymbol)
628			}
629
630			si := f.SymbolInfo
631			if si == nil {
632				// for non-symbol queries, we need to hydrate in SymbolInfo.
633				start := p.id.fileEndSymbol[p.idx]
634				si = p.id.symbols.data(start + uint32(secIdx))
635			}
636			if si != nil {
637				// the LineFragment may not be on a symbol, then si will be nil.
638				addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind))
639			}
640		}
641
642		if score.score > maxScore.score {
643			maxScore.score = score.score
644			maxScore.what = score.what
645		}
646	}
647
648	if debug {
649		maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", "))
650	}
651
652	return maxScore.score, maxScore.what
653}
654
655// scoreKind boosts a match based on the combination of language and kind. The
656// language string comes from go-enry, the kind string from ctags.
657func scoreKind(language string, kind string) float64 {
658	var factor float64
659
660	// Generic ranking which will be overriden by language specific ranking
661	switch kind {
662	case "type": // scip-ctags regression workaround https://github.com/sourcegraph/sourcegraph/issues/57659
663		factor = 8
664	case "class":
665		factor = 10
666	case "struct":
667		factor = 9.5
668	case "enum":
669		factor = 9
670	case "interface":
671		factor = 8
672	case "function", "func", "method":
673		factor = 7
674	case "member", "field":
675		factor = 5.5
676	case "constant", "const":
677		factor = 5
678	case "var", "variable":
679		factor = 4
680
681	default:
682		// No idea what it is, but its something regarded as a symbol
683		factor = 1
684	}
685
686	// Refer to universal-ctags --list-kinds-full=<language> to learn about which
687	// kinds are detected for which language.
688	//
689	// Note that go-ctags uses universal-ctags's interactive mode and thus returns
690	// the full name for "kind" and not the one-letter abbreviation.
691	switch language {
692	case "Java", "java":
693		switch kind {
694		// 2022-03-30: go-ctags contains a regex rule for Java classes that sets "kind"
695		// to "classes" instead of "c". We have to cover both cases to support existing
696		// indexes.
697		case "class", "classes":
698			factor = 10
699		case "enum":
700			factor = 9
701		case "interface":
702			factor = 8
703		case "method":
704			factor = 7
705		case "field":
706			factor = 6
707		case "enumConstant":
708			factor = 5
709		}
710	case "Kotlin", "kotlin":
711		switch kind {
712		case "class":
713			factor = 10
714		case "interface":
715			factor = 9
716		case "method":
717			factor = 8
718		case "typealias":
719			factor = 7
720		case "constant":
721			factor = 6
722		case "variable":
723			factor = 5
724		}
725	case "Go", "go":
726		switch kind {
727		// scip-ctags regression workaround https://github.com/sourcegraph/sourcegraph/issues/57659
728		// for each case a description of the fields in ctags in the comment
729		case "type": // interface struct talias
730			factor = 10
731		case "method", "function": // methodSpec
732			factor = 8
733		case "variable": // var member
734			factor = 7
735		case "constant": // const
736			factor = 6
737
738		case "interface": // interfaces
739			factor = 10
740		case "struct": // structs
741			factor = 9
742		case "talias": // type aliases
743			factor = 9
744		case "methodSpec": // interface method specification
745			factor = 8.5
746		case "func": // functions
747			factor = 8
748		case "member": // struct members
749			factor = 7
750		case "const": // constants
751			factor = 6
752		case "var": // variables
753			factor = 5
754		}
755		// Could also rank on:
756		//
757		//   - anonMember  struct anonymous members
758		//   - packageName name for specifying imported package
759		//   - receiver    receivers
760		//   - package     packages
761		//   - type        types
762		//   - unknown     unknown
763	case "C++", "c++":
764		switch kind {
765		case "class": // classes
766			factor = 10
767		case "enum": // enumeration names
768			factor = 9
769		case "function": // function definitions
770			factor = 8
771		case "struct": // structure names
772			factor = 7
773		case "union": // union names
774			factor = 6
775		case "typdef": // typedefs
776			factor = 5
777		case "member": // class, struct, and union members
778			factor = 4
779		case "variable": // varialbe definitions
780			factor = 3
781		}
782	// Could also rank on:
783	// NAME        DESCRIPTION
784	// macro       macro definitions
785	// enumerator  enumerators (values inside an enumeration)
786	// header      included header files
787	// namespace   namespaces
788	// variable    variable definitions
789	case "Scala", "scala":
790		switch kind {
791		case "class":
792			factor = 10
793		case "interface":
794			factor = 9
795		case "object":
796			factor = 8
797		case "method":
798			factor = 7
799		case "type":
800			factor = 6
801		case "variable":
802			factor = 5
803		case "package":
804			factor = 4
805		}
806	case "Python", "python":
807		switch kind {
808		case "class": // classes
809			factor = 10
810		case "function": // function definitions
811			factor = 8
812		case "member": // class, struct, and union members
813			factor = 4
814		case "variable": // variable definitions
815			factor = 3
816		case "local": // local variables
817			factor = 2
818		}
819		// Could also rank on:
820		//
821		//   - namespace name referring a module defined in other file
822		//   - module    modules
823		//   - unknown   name referring a class/variable/function/module defined in other module
824		//   - parameter function parameters
825	case "Ruby", "ruby":
826		switch kind {
827		case "class":
828			factor = 10
829		case "method":
830			factor = 9
831		case "alias":
832			factor = 8
833		case "module":
834			factor = 7
835		case "singletonMethod":
836			factor = 6
837		case "constant":
838			factor = 5
839		case "accessor":
840			factor = 4
841		case "library":
842			factor = 3
843		}
844	case "PHP", "php":
845		switch kind {
846		case "class":
847			factor = 10
848		case "interface":
849			factor = 9
850		case "function":
851			factor = 8
852		case "trait":
853			factor = 7
854		case "define":
855			factor = 6
856		case "namespace":
857			factor = 5
858		case "alias":
859			factor = 4
860		case "variable":
861			factor = 3
862		case "local":
863			factor = 3
864		}
865	case "GraphQL", "graphql":
866		switch kind {
867		case "type":
868			factor = 10
869		}
870	case "Markdown", "markdown":
871		// Headers are good signal in docs, but do not rank as highly as code.
872		switch kind {
873		case "chapter": // #
874			factor = 4
875		case "section": // ##
876			factor = 3
877		case "subsection": // ###
878			factor = 2
879		}
880	}
881
882	return factor * scoreKindMatch
883}
884
885type matchScoreSlice []LineMatch
886
887func (m matchScoreSlice) Len() int           { return len(m) }
888func (m matchScoreSlice) Swap(i, j int)      { m[i], m[j] = m[j], m[i] }
889func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
890
891type chunkMatchScoreSlice []ChunkMatch
892
893func (m chunkMatchScoreSlice) Len() int           { return len(m) }
894func (m chunkMatchScoreSlice) Swap(i, j int)      { m[i], m[j] = m[j], m[i] }
895func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
896
897type fileMatchesByScore []FileMatch
898
899func (m fileMatchesByScore) Len() int           { return len(m) }
900func (m fileMatchesByScore) Swap(i, j int)      { m[i], m[j] = m[j], m[i] }
901func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score }
902
903func sortMatchesByScore(ms []LineMatch) {
904	sort.Sort(matchScoreSlice(ms))
905}
906
907func sortChunkMatchesByScore(ms []ChunkMatch) {
908	sort.Sort(chunkMatchScoreSlice(ms))
909}
910
911// SortFiles sorts files matches. The order depends on the match score, which includes both
912// query-dependent signals like word overlap, and file-only signals like the file ranks (if
913// file ranks are enabled).
914func SortFiles(ms []FileMatch) {
915	sort.Sort(fileMatchesByScore(ms))
916}
Configure Feed

Configure Feed