contentprovider.go at 659eac980e6df6b11cdbd2f23b0fdb73ae5e58c4 · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / contentprovider.go
at 659eac980e6df6b11cdbd2f23b0fdb73ae5e58c4 24 kB View raw
Julie Tibshirani Ranking: increase contribution of repo rank (#546) 3y ago
  1// Copyright 2016 Google Inc. All rights reserved.
  2//
  3// Licensed under the Apache License, Version 2.0 (the "License");
  4// you may not use this file except in compliance with the License.
  5// You may obtain a copy of the License at
  6//
  7//    http://www.apache.org/licenses/LICENSE-2.0
  8//
  9// Unless required by applicable law or agreed to in writing, software
 10// distributed under the License is distributed on an "AS IS" BASIS,
 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12// See the License for the specific language governing permissions and
 13// limitations under the License.
 14
 15package zoekt
 16
 17import (
 18	"bytes"
 19	"fmt"
 20	"log"
 21	"sort"
 22	"strings"
 23	"unicode/utf8"
 24)
 25
 26var _ = log.Println
 27
 28// contentProvider is an abstraction to treat matches for names and
 29// content with the same code.
 30type contentProvider struct {
 31	id    *indexData
 32	stats *Stats
 33
 34	// mutable
 35	err      error
 36	idx      uint32
 37	_data    []byte
 38	_nl      []uint32
 39	_nlBuf   []uint32
 40	_sects   []DocumentSection
 41	_sectBuf []DocumentSection
 42	fileSize uint32
 43}
 44
 45// setDocument skips to the given document.
 46func (p *contentProvider) setDocument(docID uint32) {
 47	fileStart := p.id.boundaries[docID]
 48
 49	p.idx = docID
 50	p.fileSize = p.id.boundaries[docID+1] - fileStart
 51
 52	p._nl = nil
 53	p._sects = nil
 54	p._data = nil
 55}
 56
 57func (p *contentProvider) docSections() []DocumentSection {
 58	if p._sects == nil {
 59		var sz uint32
 60		p._sects, sz, p.err = p.id.readDocSections(p.idx, p._sectBuf)
 61		p.stats.ContentBytesLoaded += int64(sz)
 62		p._sectBuf = p._sects
 63	}
 64	return p._sects
 65}
 66
 67func (p *contentProvider) newlines() newlines {
 68	if p._nl == nil {
 69		var sz uint32
 70		p._nl, sz, p.err = p.id.readNewlines(p.idx, p._nlBuf)
 71		p._nlBuf = p._nl
 72		p.stats.ContentBytesLoaded += int64(sz)
 73	}
 74	return newlines{locs: p._nl, fileSize: p.fileSize}
 75}
 76
 77func (p *contentProvider) data(fileName bool) []byte {
 78	if fileName {
 79		return p.id.fileNameContent[p.id.fileNameIndex[p.idx]:p.id.fileNameIndex[p.idx+1]]
 80	}
 81
 82	if p._data == nil {
 83		p._data, p.err = p.id.readContents(p.idx)
 84		p.stats.FilesLoaded++
 85		p.stats.ContentBytesLoaded += int64(len(p._data))
 86	}
 87	return p._data
 88}
 89
 90// Find offset in bytes (relative to corpus start) for an offset in
 91// runes (relative to document start). If filename is set, the corpus
 92// is the set of filenames, with the document being the name itself.
 93func (p *contentProvider) findOffset(filename bool, r uint32) uint32 {
 94	if p.id.metaData.PlainASCII {
 95		return r
 96	}
 97
 98	sample := p.id.runeOffsets
 99	runeEnds := p.id.fileEndRunes
100	fileStartByte := p.id.boundaries[p.idx]
101	if filename {
102		sample = p.id.fileNameRuneOffsets
103		runeEnds = p.id.fileNameEndRunes
104		fileStartByte = p.id.fileNameIndex[p.idx]
105	}
106
107	absR := r
108	if p.idx > 0 {
109		absR += runeEnds[p.idx-1]
110	}
111
112	byteOff, left := sample.lookup(absR)
113
114	var data []byte
115
116	if filename {
117		data = p.id.fileNameContent[byteOff:]
118	} else {
119		data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency)
120		if p.err != nil {
121			return 0
122		}
123	}
124	for left > 0 {
125		_, sz := utf8.DecodeRune(data)
126		byteOff += uint32(sz)
127		data = data[sz:]
128		left--
129	}
130
131	byteOff -= fileStartByte
132	return byteOff
133}
134
135func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch {
136	var result []LineMatch
137	if ms[0].fileName {
138		// There is only "line" in a filename.
139		res := LineMatch{
140			Line:     p.id.fileName(p.idx),
141			FileName: true,
142		}
143
144		for _, m := range ms {
145			res.LineFragments = append(res.LineFragments, LineFragmentMatch{
146				LineOffset:  int(m.byteOffset),
147				MatchLength: int(m.byteMatchSz),
148				Offset:      m.byteOffset,
149			})
150
151			result = []LineMatch{res}
152		}
153	} else {
154		ms = breakMatchesOnNewlines(ms, p.data(false))
155		result = p.fillContentMatches(ms, numContextLines)
156	}
157
158	sects := p.docSections()
159	for i, m := range result {
160		result[i].Score, result[i].DebugScore = p.matchScore(sects, &m, language, debug)
161	}
162
163	return result
164}
165
166func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch {
167	var result []ChunkMatch
168	if ms[0].fileName {
169		// If the first match is a filename match, there will only be
170		// one match and the matched content will be the filename.
171
172		fileName := p.id.fileName(p.idx)
173		ranges := make([]Range, 0, len(ms))
174		for _, m := range ms {
175			ranges = append(ranges, Range{
176				Start: Location{
177					ByteOffset: m.byteOffset,
178					LineNumber: 1,
179					Column:     uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1),
180				},
181				End: Location{
182					ByteOffset: m.byteOffset + m.byteMatchSz,
183					LineNumber: 1,
184					Column:     uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1),
185				},
186			})
187		}
188
189		result = []ChunkMatch{{
190			Content:      fileName,
191			ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
192			Ranges:       ranges,
193			FileName:     true,
194		}}
195	} else {
196		result = p.fillContentChunkMatches(ms, numContextLines)
197	}
198
199	sects := p.docSections()
200	for i, m := range result {
201		result[i].Score, result[i].DebugScore = p.chunkMatchScore(sects, &m, language, debug)
202	}
203
204	return result
205}
206
207func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int) []LineMatch {
208	var result []LineMatch
209	for len(ms) > 0 {
210		m := ms[0]
211		num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset)
212
213		var lineCands []*candidateMatch
214
215		endMatch := m.byteOffset + m.byteMatchSz
216
217		for len(ms) > 0 {
218			m := ms[0]
219			if int(m.byteOffset) <= lineEnd {
220				endMatch = m.byteOffset + m.byteMatchSz
221				lineCands = append(lineCands, m)
222				ms = ms[1:]
223			} else {
224				break
225			}
226		}
227
228		if len(lineCands) == 0 {
229			log.Panicf(
230				"%s %v infinite loop: num %d start,end %d,%d, offset %d",
231				p.id.fileName(p.idx), p.id.metaData,
232				num, lineStart, lineEnd,
233				m.byteOffset)
234		}
235
236		data := p.data(false)
237
238		// Due to merging matches, we may have a match that
239		// crosses a line boundary. Prevent confusion by
240		// taking lines until we pass the last match
241		for lineEnd < len(data) && endMatch > uint32(lineEnd) {
242			next := bytes.IndexByte(data[lineEnd+1:], '\n')
243			if next == -1 {
244				lineEnd = len(data)
245			} else {
246				// TODO(hanwen): test that checks "+1" part here.
247				lineEnd += next + 1
248			}
249		}
250
251		finalMatch := LineMatch{
252			LineStart:  lineStart,
253			LineEnd:    lineEnd,
254			LineNumber: num,
255		}
256		finalMatch.Line = data[lineStart:lineEnd]
257
258		if numContextLines > 0 {
259			finalMatch.Before = p.newlines().getLines(data, num-numContextLines, num)
260			finalMatch.After = p.newlines().getLines(data, num+1, num+1+numContextLines)
261		}
262
263		for _, m := range lineCands {
264			fragment := LineFragmentMatch{
265				Offset:      m.byteOffset,
266				LineOffset:  int(m.byteOffset) - lineStart,
267				MatchLength: int(m.byteMatchSz),
268			}
269			if m.symbol {
270				start := p.id.fileEndSymbol[p.idx]
271				fragment.SymbolInfo = p.id.symbols.data(start + m.symbolIdx)
272				if fragment.SymbolInfo != nil {
273					sec := p.docSections()[m.symbolIdx]
274					fragment.SymbolInfo.Sym = string(data[sec.Start:sec.End])
275				}
276			}
277
278			finalMatch.LineFragments = append(finalMatch.LineFragments, fragment)
279		}
280		result = append(result, finalMatch)
281	}
282	return result
283}
284
285func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int) []ChunkMatch {
286	newlines := p.newlines()
287	chunks := chunkCandidates(ms, newlines, numContextLines)
288	data := p.data(false)
289	chunkMatches := make([]ChunkMatch, 0, len(chunks))
290	for _, chunk := range chunks {
291		ranges := make([]Range, 0, len(chunk.candidates))
292		var symbolInfo []*Symbol
293		for i, cm := range chunk.candidates {
294			startOffset := cm.byteOffset
295			endOffset := cm.byteOffset + cm.byteMatchSz
296			startLine, startLineOffset, _ := newlines.atOffset(startOffset)
297			endLine, endLineOffset, _ := newlines.atOffset(endOffset)
298
299			ranges = append(ranges, Range{
300				Start: Location{
301					ByteOffset: startOffset,
302					LineNumber: uint32(startLine),
303					Column:     uint32(utf8.RuneCount(data[startLineOffset:startOffset]) + 1),
304				},
305				End: Location{
306					ByteOffset: endOffset,
307					LineNumber: uint32(endLine),
308					Column:     uint32(utf8.RuneCount(data[endLineOffset:endOffset]) + 1),
309				},
310			})
311
312			if cm.symbol {
313				if symbolInfo == nil {
314					symbolInfo = make([]*Symbol, len(chunk.candidates))
315				}
316				start := p.id.fileEndSymbol[p.idx]
317				si := p.id.symbols.data(start + cm.symbolIdx)
318				if si != nil {
319					sec := p.docSections()[cm.symbolIdx]
320					si.Sym = string(data[sec.Start:sec.End])
321				}
322				symbolInfo[i] = si
323			}
324		}
325
326		firstLineNumber := int(chunk.firstLine) - numContextLines
327		if firstLineNumber < 1 {
328			firstLineNumber = 1
329		}
330		firstLineStart, _ := newlines.lineBounds(firstLineNumber)
331
332		chunkMatches = append(chunkMatches, ChunkMatch{
333			Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1),
334			ContentStart: Location{
335				ByteOffset: firstLineStart,
336				LineNumber: uint32(firstLineNumber),
337				Column:     1,
338			},
339			FileName:   false,
340			Ranges:     ranges,
341			SymbolInfo: symbolInfo,
342		})
343	}
344	return chunkMatches
345}
346
347type candidateChunk struct {
348	firstLine  uint32 // 1-based, inclusive
349	lastLine   uint32 // 1-based, inclusive
350	minOffset  uint32 // 0-based, inclusive
351	maxOffset  uint32 // 0-based, exclusive
352	candidates []*candidateMatch
353}
354
355// chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent
356// chunks will be merged if adding `numContextLines` to the beginning and end of the chunk would cause
357// it to overlap with an adjacent chunk.
358func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines int) []candidateChunk {
359	var chunks []candidateChunk
360	for _, m := range ms {
361		startOffset := m.byteOffset
362		endOffset := m.byteOffset + m.byteMatchSz
363		firstLine, _, _ := newlines.atOffset(startOffset)
364		lastLine, _, _ := newlines.atOffset(endOffset)
365
366		if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines {
367			// If a new chunk created with the current candidateMatch would
368			// overlap with the previous chunk, instead add the candidateMatch
369			// to the last chunk and extend end of the last chunk.
370			last := &chunks[len(chunks)-1]
371			last.candidates = append(last.candidates, m)
372			if last.maxOffset < endOffset {
373				last.lastLine = uint32(lastLine)
374				last.maxOffset = uint32(endOffset)
375			}
376		} else {
377			chunks = append(chunks, candidateChunk{
378				firstLine:  uint32(firstLine),
379				lastLine:   uint32(lastLine),
380				minOffset:  startOffset,
381				maxOffset:  endOffset,
382				candidates: []*candidateMatch{m},
383			})
384		}
385	}
386	return chunks
387}
388
389type newlines struct {
390	// locs is the sorted set of byte offsets of the newlines in the file
391	locs []uint32
392
393	// fileSize is just the number of bytes in the file. It is stored
394	// on this struct so we can safely know the length of the last line
395	// in the file since not all files end in a newline.
396	fileSize uint32
397}
398
399// atOffset returns the line containing the offset. If the offset lands on
400// the newline ending line M, we return M.  The line is characterized
401// by its linenumber (base-1, byte index of line start, byte index of
402// line end). The line end is the index of a newline, or the filesize
403// (if matching the last line of the file.)
404func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) {
405	idx := sort.Search(len(nls.locs), func(n int) bool {
406		return nls.locs[n] >= offset
407	})
408
409	start, end := nls.lineBounds(idx + 1)
410	return idx + 1, int(start), int(end)
411}
412
413// lineBounds returns the byte offsets of the start and end of the 1-based
414// lineNumber. The end offset is exclusive and will not contain the line-ending
415// newline. If the line number is out of range of the lines in the file, start
416// and end will be clamped to [0,fileSize].
417func (nls newlines) lineBounds(lineNumber int) (start, end uint32) {
418	// nls.locs[0] + 1 is the start of the 2nd line of data.
419	startIdx := lineNumber - 2
420	endIdx := lineNumber - 1
421
422	if startIdx < 0 {
423		start = 0
424	} else if startIdx >= len(nls.locs) {
425		start = nls.fileSize
426	} else {
427		start = nls.locs[startIdx] + 1
428	}
429
430	if endIdx < 0 {
431		end = 0
432	} else if endIdx >= len(nls.locs) {
433		end = nls.fileSize
434	} else {
435		end = nls.locs[endIdx]
436	}
437
438	return start, end
439}
440
441// getLines returns a slice of data containing the lines [low, high).
442// low is 1-based and inclusive. high is 1-based and exclusive.
443func (nls newlines) getLines(data []byte, low, high int) []byte {
444	if low >= high {
445		return nil
446	}
447
448	lowStart, _ := nls.lineBounds(low)
449	_, highEnd := nls.lineBounds(high - 1)
450
451	return data[lowStart:highEnd]
452}
453
454const (
455	// Query-dependent scoring signals. All of these together are bounded at ~9000
456	// (scoreWordMatch + scoreSymbol + scoreKindMatch * 10 + scoreFactorAtomMatch).
457	scorePartialWordMatch = 50.0
458	scoreWordMatch        = 500.0
459	scoreBase             = 7000.0
460	scorePartialBase      = 4000.0
461	scoreSymbol           = 7000.0
462	scorePartialSymbol    = 4000.0
463	scoreKindMatch        = 100.0
464	scoreRepetitionFactor = 1.0
465	scoreFactorAtomMatch  = 400.0
466
467	// File-only scoring signals. For now these are also bounded ~9000 to give them
468	// equal weight with the query-dependent signals.
469	scoreFileRankFactor  = 9000.0
470	scoreFileOrderFactor = 10.0
471	scoreRepoRankFactor  = 20.0
472
473	// Used for ordering line and chunk matches within a file.
474	scoreLineOrderFactor = 1.0
475)
476
477// findSection checks whether a section defined by offset and size lies within
478// one of the sections in secs.
479func findSection(secs []DocumentSection, off, sz uint32) (int, bool) {
480	j := sort.Search(len(secs), func(i int) bool {
481		return secs[i].End >= off+sz
482	})
483
484	if j == len(secs) {
485		return 0, false
486	}
487
488	if secs[j].Start <= off && off+sz <= secs[j].End {
489		return j, true
490	}
491	return 0, false
492}
493
494func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) {
495	type debugScore struct {
496		score float64
497		what  string
498	}
499
500	score := &debugScore{}
501	maxScore := &debugScore{}
502
503	addScore := func(what string, s float64) {
504		if debug {
505			score.what += fmt.Sprintf("%s:%.2f, ", what, s)
506		}
507		score.score += s
508	}
509
510	for i, r := range m.Ranges {
511		// calculate the start and end offset relative to the start of the content
512		relStartOffset := int(r.Start.ByteOffset - m.ContentStart.ByteOffset)
513		relEndOffset := int(r.End.ByteOffset - m.ContentStart.ByteOffset)
514
515		startBoundary := relStartOffset < len(m.Content) && (relStartOffset == 0 || byteClass(m.Content[relStartOffset-1]) != byteClass(m.Content[relStartOffset]))
516		endBoundary := relEndOffset > 0 && (relEndOffset == len(m.Content) || byteClass(m.Content[relEndOffset-1]) != byteClass(m.Content[relEndOffset]))
517
518		score.score = 0
519		score.what = ""
520
521		if startBoundary && endBoundary {
522			addScore("WordMatch", scoreWordMatch)
523		} else if startBoundary || endBoundary {
524			addScore("PartialWordMatch", scorePartialWordMatch)
525		}
526
527		if m.FileName {
528			sep := bytes.LastIndexByte(m.Content, '/')
529			startMatch := relStartOffset == sep+1
530			endMatch := relEndOffset == len(m.Content)
531			if startMatch && endMatch {
532				addScore("Base", scoreBase)
533			} else if startMatch || endMatch {
534				addScore("EdgeBase", (scoreBase+scorePartialBase)/2)
535			} else if sep < relStartOffset {
536				addScore("InnerBase", scorePartialBase)
537			}
538		} else if secIdx, ok := findSection(secs, uint32(r.Start.ByteOffset), uint32(r.End.ByteOffset-r.Start.ByteOffset)); ok {
539			sec := secs[secIdx]
540			startMatch := sec.Start == uint32(r.Start.ByteOffset)
541			endMatch := sec.End == uint32(r.End.ByteOffset)
542			if startMatch && endMatch {
543				addScore("Symbol", scoreSymbol)
544			} else if startMatch || endMatch {
545				addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2)
546			} else {
547				addScore("InnerSymbol", scorePartialSymbol)
548			}
549
550			var si *Symbol
551			if m.SymbolInfo != nil {
552				si = m.SymbolInfo[i]
553			}
554			if si == nil {
555				// for non-symbol queries, we need to hydrate in SymbolInfo.
556				start := p.id.fileEndSymbol[p.idx]
557				si = p.id.symbols.data(start + uint32(secIdx))
558			}
559			if si != nil {
560				addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind))
561			}
562		}
563
564		if score.score > maxScore.score {
565			maxScore.score = score.score
566			maxScore.what = score.what
567		}
568	}
569
570	if debug {
571		maxScore.what = fmt.Sprintf("score:%f <- %s", maxScore.score, strings.TrimRight(maxScore.what, ", "))
572	}
573
574	return maxScore.score, maxScore.what
575}
576
577func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) {
578	type debugScore struct {
579		score float64
580		what  string
581	}
582
583	score := &debugScore{}
584	maxScore := &debugScore{}
585
586	addScore := func(what string, s float64) {
587		if debug {
588			score.what += fmt.Sprintf("%s:%.2f, ", what, s)
589		}
590		score.score += s
591	}
592
593	for _, f := range m.LineFragments {
594		startBoundary := f.LineOffset < len(m.Line) && (f.LineOffset == 0 || byteClass(m.Line[f.LineOffset-1]) != byteClass(m.Line[f.LineOffset]))
595
596		end := int(f.LineOffset) + f.MatchLength
597		endBoundary := end > 0 && (end == len(m.Line) || byteClass(m.Line[end-1]) != byteClass(m.Line[end]))
598
599		score.score = 0
600		score.what = ""
601
602		if startBoundary && endBoundary {
603			addScore("WordMatch", scoreWordMatch)
604		} else if startBoundary || endBoundary {
605			addScore("PartialWordMatch", scorePartialWordMatch)
606		}
607
608		if m.FileName {
609			sep := bytes.LastIndexByte(m.Line, '/')
610			startMatch := sep+1 == f.LineOffset
611			endMatch := len(m.Line) == f.LineOffset+f.MatchLength
612			if startMatch && endMatch {
613				addScore("Base", scoreBase)
614			} else if startMatch || endMatch {
615				addScore("EdgeBase", (scoreBase+scorePartialBase)/2)
616			} else if sep < f.LineOffset {
617				addScore("InnerBase", scorePartialBase)
618			}
619		} else if secIdx, ok := findSection(secs, f.Offset, uint32(f.MatchLength)); ok {
620			sec := secs[secIdx]
621			startMatch := sec.Start == f.Offset
622			endMatch := sec.End == f.Offset+uint32(f.MatchLength)
623			if startMatch && endMatch {
624				addScore("Symbol", scoreSymbol)
625			} else if startMatch || endMatch {
626				addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2)
627			} else {
628				addScore("InnerSymbol", scorePartialSymbol)
629			}
630
631			si := f.SymbolInfo
632			if si == nil {
633				// for non-symbol queries, we need to hydrate in SymbolInfo.
634				start := p.id.fileEndSymbol[p.idx]
635				si = p.id.symbols.data(start + uint32(secIdx))
636			}
637			if si != nil {
638				// the LineFragment may not be on a symbol, then si will be nil.
639				addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind))
640			}
641		}
642
643		if score.score > maxScore.score {
644			maxScore.score = score.score
645			maxScore.what = score.what
646		}
647	}
648
649	if debug {
650		maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", "))
651	}
652
653	return maxScore.score, maxScore.what
654}
655
656// scoreKind boosts a match based on the combination of language and kind. The
657// language string comes from go-enry, the kind string from ctags.
658func scoreKind(language string, kind string) float64 {
659	var factor float64
660
661	// Generic ranking which will be overriden by language specific ranking
662	switch kind {
663	case "class":
664		factor = 10
665	case "struct":
666		factor = 9.5
667	case "enum":
668		factor = 9
669	case "interface":
670		factor = 8
671	case "function", "func":
672		factor = 7
673	case "method":
674		factor = 6
675	case "member", "field":
676		factor = 5.5
677	case "constant", "const":
678		factor = 5
679	case "var", "variable":
680		factor = 4
681	}
682
683	// Refer to universal-ctags --list-kinds-full=<language> to learn about which
684	// kinds are detected for which language.
685	//
686	// Note that go-ctags uses universal-ctags's interactive mode and thus returns
687	// the full name for "kind" and not the one-letter abbreviation.
688	switch language {
689	case "Java", "java":
690		switch kind {
691		// 2022-03-30: go-ctags contains a regex rule for Java classes that sets "kind"
692		// to "classes" instead of "c". We have to cover both cases to support existing
693		// indexes.
694		case "class", "classes":
695			factor = 10
696		case "enum":
697			factor = 9
698		case "interface":
699			factor = 8
700		case "method":
701			factor = 7
702		case "field":
703			factor = 6
704		case "enumConstant":
705			factor = 5
706		}
707	case "Kotlin", "kotlin":
708		switch kind {
709		case "class":
710			factor = 10
711		case "interface":
712			factor = 9
713		case "method":
714			factor = 8
715		case "typealias":
716			factor = 7
717		case "constant":
718			factor = 6
719		case "variable":
720			factor = 5
721		}
722	case "Go", "go":
723		switch kind {
724		case "interface": // interfaces
725			factor = 10
726		case "struct": // structs
727			factor = 9
728		case "talias": // type aliases
729			factor = 9
730		case "methodSpec": // interface method specification
731			factor = 8.5
732		case "func": // functions
733			factor = 8
734		case "member": // struct members
735			factor = 7
736		case "const": // constants
737			factor = 6
738		case "var": // variables
739			factor = 5
740		}
741		// Could also rank on:
742		//
743		//   - anonMember  struct anonymous members
744		//   - packageName name for specifying imported package
745		//   - receiver    receivers
746		//   - package     packages
747		//   - type        types
748		//   - unknown     unknown
749	case "C++", "c++":
750		switch kind {
751		case "class": // classes
752			factor = 10
753		case "enum": // enumeration names
754			factor = 9
755		case "function": // function definitions
756			factor = 8
757		case "struct": // structure names
758			factor = 7
759		case "union": // union names
760			factor = 6
761		case "typdef": // typedefs
762			factor = 5
763		case "member": // class, struct, and union members
764			factor = 4
765		case "variable": // varialbe definitions
766			factor = 3
767		}
768	// Could also rank on:
769	// NAME        DESCRIPTION
770	// macro       macro definitions
771	// enumerator  enumerators (values inside an enumeration)
772	// header      included header files
773	// namespace   namespaces
774	// variable    variable definitions
775	case "Scala", "scala":
776		switch kind {
777		case "class":
778			factor = 10
779		case "interface":
780			factor = 9
781		case "object":
782			factor = 8
783		case "method":
784			factor = 7
785		case "type":
786			factor = 6
787		case "variable":
788			factor = 5
789		case "package":
790			factor = 4
791		}
792	case "Python", "python":
793		switch kind {
794		case "class": // classes
795			factor = 10
796		case "function": // function definitions
797			factor = 8
798		case "member": // class, struct, and union members
799			factor = 4
800		case "variable": // variable definitions
801			factor = 3
802		case "local": // local variables
803			factor = 2
804		}
805		// Could also rank on:
806		//
807		//   - namespace name referring a module defined in other file
808		//   - module    modules
809		//   - unknown   name referring a class/variable/function/module defined in other module
810		//   - parameter function parameters
811	case "Ruby", "ruby":
812		switch kind {
813		case "class":
814			factor = 10
815		case "method":
816			factor = 9
817		case "alias":
818			factor = 8
819		case "module":
820			factor = 7
821		case "singletonMethod":
822			factor = 6
823		case "constant":
824			factor = 5
825		case "accessor":
826			factor = 4
827		case "library":
828			factor = 3
829		}
830	case "PHP", "php":
831		switch kind {
832		case "class":
833			factor = 10
834		case "interface":
835			factor = 9
836		case "function":
837			factor = 8
838		case "trait":
839			factor = 7
840		case "define":
841			factor = 6
842		case "namespace":
843			factor = 5
844		case "alias":
845			factor = 4
846		case "variable":
847			factor = 3
848		case "local":
849			factor = 3
850		}
851	}
852
853	return factor * scoreKindMatch
854}
855
856type matchScoreSlice []LineMatch
857
858func (m matchScoreSlice) Len() int           { return len(m) }
859func (m matchScoreSlice) Swap(i, j int)      { m[i], m[j] = m[j], m[i] }
860func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
861
862type chunkMatchScoreSlice []ChunkMatch
863
864func (m chunkMatchScoreSlice) Len() int           { return len(m) }
865func (m chunkMatchScoreSlice) Swap(i, j int)      { m[i], m[j] = m[j], m[i] }
866func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score }
867
868type fileMatchesByScore []FileMatch
869
870func (m fileMatchesByScore) Len() int           { return len(m) }
871func (m fileMatchesByScore) Swap(i, j int)      { m[i], m[j] = m[j], m[i] }
872func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score }
873
874func sortMatchesByScore(ms []LineMatch) {
875	sort.Sort(matchScoreSlice(ms))
876}
877
878func sortChunkMatchesByScore(ms []ChunkMatch) {
879	sort.Sort(chunkMatchScoreSlice(ms))
880}
881
882// SortFiles sorts files matches. The order depends on the match score, which includes both
883// query-dependent signals like word overlap, and file-only signals like the file ranks (if
884// file ranks are enabled).
885func SortFiles(ms []FileMatch) {
886	sort.Sort(fileMatchesByScore(ms))
887}
Configure Feed

Configure Feed