fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "sort" 22 "strings" 23 "unicode/utf8" 24) 25 26var _ = log.Println 27 28// contentProvider is an abstraction to treat matches for names and 29// content with the same code. 30type contentProvider struct { 31 id *indexData 32 stats *Stats 33 34 // mutable 35 err error 36 idx uint32 37 _data []byte 38 _nl []uint32 39 _nlBuf []uint32 40 _sects []DocumentSection 41 _sectBuf []DocumentSection 42 fileSize uint32 43} 44 45// setDocument skips to the given document. 46func (p *contentProvider) setDocument(docID uint32) { 47 fileStart := p.id.boundaries[docID] 48 49 p.idx = docID 50 p.fileSize = p.id.boundaries[docID+1] - fileStart 51 52 p._nl = nil 53 p._sects = nil 54 p._data = nil 55} 56 57func (p *contentProvider) docSections() []DocumentSection { 58 if p._sects == nil { 59 var sz uint32 60 p._sects, sz, p.err = p.id.readDocSections(p.idx, p._sectBuf) 61 p.stats.ContentBytesLoaded += int64(sz) 62 p._sectBuf = p._sects 63 } 64 return p._sects 65} 66 67func (p *contentProvider) newlines() newlines { 68 if p._nl == nil { 69 var sz uint32 70 p._nl, sz, p.err = p.id.readNewlines(p.idx, p._nlBuf) 71 p._nlBuf = p._nl 72 p.stats.ContentBytesLoaded += int64(sz) 73 } 74 return newlines{locs: p._nl, fileSize: p.fileSize} 75} 76 77func (p *contentProvider) data(fileName bool) []byte { 78 if fileName { 79 return p.id.fileNameContent[p.id.fileNameIndex[p.idx]:p.id.fileNameIndex[p.idx+1]] 80 } 81 82 if p._data == nil { 83 p._data, p.err = p.id.readContents(p.idx) 84 p.stats.FilesLoaded++ 85 p.stats.ContentBytesLoaded += int64(len(p._data)) 86 } 87 return p._data 88} 89 90// Find offset in bytes (relative to corpus start) for an offset in 91// runes (relative to document start). If filename is set, the corpus 92// is the set of filenames, with the document being the name itself. 93func (p *contentProvider) findOffset(filename bool, r uint32) uint32 { 94 if p.id.metaData.PlainASCII { 95 return r 96 } 97 98 sample := p.id.runeOffsets 99 runeEnds := p.id.fileEndRunes 100 fileStartByte := p.id.boundaries[p.idx] 101 if filename { 102 sample = p.id.fileNameRuneOffsets 103 runeEnds = p.id.fileNameEndRunes 104 fileStartByte = p.id.fileNameIndex[p.idx] 105 } 106 107 absR := r 108 if p.idx > 0 { 109 absR += runeEnds[p.idx-1] 110 } 111 112 byteOff, left := sample.lookup(absR) 113 114 var data []byte 115 116 if filename { 117 data = p.id.fileNameContent[byteOff:] 118 } else { 119 data, p.err = p.id.readContentSlice(byteOff, 3*runeOffsetFrequency) 120 if p.err != nil { 121 return 0 122 } 123 } 124 for left > 0 { 125 _, sz := utf8.DecodeRune(data) 126 byteOff += uint32(sz) 127 data = data[sz:] 128 left-- 129 } 130 131 byteOff -= fileStartByte 132 return byteOff 133} 134 135func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch { 136 var result []LineMatch 137 if ms[0].fileName { 138 // There is only "line" in a filename. 139 res := LineMatch{ 140 Line: p.id.fileName(p.idx), 141 FileName: true, 142 } 143 144 for _, m := range ms { 145 res.LineFragments = append(res.LineFragments, LineFragmentMatch{ 146 LineOffset: int(m.byteOffset), 147 MatchLength: int(m.byteMatchSz), 148 Offset: m.byteOffset, 149 }) 150 151 result = []LineMatch{res} 152 } 153 } else { 154 ms = breakMatchesOnNewlines(ms, p.data(false)) 155 result = p.fillContentMatches(ms, numContextLines) 156 } 157 158 sects := p.docSections() 159 for i, m := range result { 160 result[i].Score, result[i].DebugScore = p.matchScore(sects, &m, language, debug) 161 } 162 163 return result 164} 165 166func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch { 167 var result []ChunkMatch 168 if ms[0].fileName { 169 // If the first match is a filename match, there will only be 170 // one match and the matched content will be the filename. 171 172 fileName := p.id.fileName(p.idx) 173 ranges := make([]Range, 0, len(ms)) 174 for _, m := range ms { 175 ranges = append(ranges, Range{ 176 Start: Location{ 177 ByteOffset: m.byteOffset, 178 LineNumber: 1, 179 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1), 180 }, 181 End: Location{ 182 ByteOffset: m.byteOffset + m.byteMatchSz, 183 LineNumber: 1, 184 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1), 185 }, 186 }) 187 } 188 189 result = []ChunkMatch{{ 190 Content: fileName, 191 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 192 Ranges: ranges, 193 FileName: true, 194 }} 195 } else { 196 result = p.fillContentChunkMatches(ms, numContextLines) 197 } 198 199 sects := p.docSections() 200 for i, m := range result { 201 result[i].Score, result[i].DebugScore = p.chunkMatchScore(sects, &m, language, debug) 202 } 203 204 return result 205} 206 207func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int) []LineMatch { 208 var result []LineMatch 209 for len(ms) > 0 { 210 m := ms[0] 211 num, lineStart, lineEnd := p.newlines().atOffset(m.byteOffset) 212 213 var lineCands []*candidateMatch 214 215 endMatch := m.byteOffset + m.byteMatchSz 216 217 for len(ms) > 0 { 218 m := ms[0] 219 if int(m.byteOffset) <= lineEnd { 220 endMatch = m.byteOffset + m.byteMatchSz 221 lineCands = append(lineCands, m) 222 ms = ms[1:] 223 } else { 224 break 225 } 226 } 227 228 if len(lineCands) == 0 { 229 log.Panicf( 230 "%s %v infinite loop: num %d start,end %d,%d, offset %d", 231 p.id.fileName(p.idx), p.id.metaData, 232 num, lineStart, lineEnd, 233 m.byteOffset) 234 } 235 236 data := p.data(false) 237 238 // Due to merging matches, we may have a match that 239 // crosses a line boundary. Prevent confusion by 240 // taking lines until we pass the last match 241 for lineEnd < len(data) && endMatch > uint32(lineEnd) { 242 next := bytes.IndexByte(data[lineEnd+1:], '\n') 243 if next == -1 { 244 lineEnd = len(data) 245 } else { 246 // TODO(hanwen): test that checks "+1" part here. 247 lineEnd += next + 1 248 } 249 } 250 251 finalMatch := LineMatch{ 252 LineStart: lineStart, 253 LineEnd: lineEnd, 254 LineNumber: num, 255 } 256 finalMatch.Line = data[lineStart:lineEnd] 257 258 if numContextLines > 0 { 259 finalMatch.Before = p.newlines().getLines(data, num-numContextLines, num) 260 finalMatch.After = p.newlines().getLines(data, num+1, num+1+numContextLines) 261 } 262 263 for _, m := range lineCands { 264 fragment := LineFragmentMatch{ 265 Offset: m.byteOffset, 266 LineOffset: int(m.byteOffset) - lineStart, 267 MatchLength: int(m.byteMatchSz), 268 } 269 if m.symbol { 270 start := p.id.fileEndSymbol[p.idx] 271 fragment.SymbolInfo = p.id.symbols.data(start + m.symbolIdx) 272 if fragment.SymbolInfo != nil { 273 sec := p.docSections()[m.symbolIdx] 274 fragment.SymbolInfo.Sym = string(data[sec.Start:sec.End]) 275 } 276 } 277 278 finalMatch.LineFragments = append(finalMatch.LineFragments, fragment) 279 } 280 result = append(result, finalMatch) 281 } 282 return result 283} 284 285func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int) []ChunkMatch { 286 newlines := p.newlines() 287 chunks := chunkCandidates(ms, newlines, numContextLines) 288 data := p.data(false) 289 chunkMatches := make([]ChunkMatch, 0, len(chunks)) 290 for _, chunk := range chunks { 291 ranges := make([]Range, 0, len(chunk.candidates)) 292 var symbolInfo []*Symbol 293 for i, cm := range chunk.candidates { 294 startOffset := cm.byteOffset 295 endOffset := cm.byteOffset + cm.byteMatchSz 296 startLine, startLineOffset, _ := newlines.atOffset(startOffset) 297 endLine, endLineOffset, _ := newlines.atOffset(endOffset) 298 299 ranges = append(ranges, Range{ 300 Start: Location{ 301 ByteOffset: startOffset, 302 LineNumber: uint32(startLine), 303 Column: uint32(utf8.RuneCount(data[startLineOffset:startOffset]) + 1), 304 }, 305 End: Location{ 306 ByteOffset: endOffset, 307 LineNumber: uint32(endLine), 308 Column: uint32(utf8.RuneCount(data[endLineOffset:endOffset]) + 1), 309 }, 310 }) 311 312 if cm.symbol { 313 if symbolInfo == nil { 314 symbolInfo = make([]*Symbol, len(chunk.candidates)) 315 } 316 start := p.id.fileEndSymbol[p.idx] 317 si := p.id.symbols.data(start + cm.symbolIdx) 318 if si != nil { 319 sec := p.docSections()[cm.symbolIdx] 320 si.Sym = string(data[sec.Start:sec.End]) 321 } 322 symbolInfo[i] = si 323 } 324 } 325 326 firstLineNumber := int(chunk.firstLine) - numContextLines 327 if firstLineNumber < 1 { 328 firstLineNumber = 1 329 } 330 firstLineStart, _ := newlines.lineBounds(firstLineNumber) 331 332 chunkMatches = append(chunkMatches, ChunkMatch{ 333 Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1), 334 ContentStart: Location{ 335 ByteOffset: firstLineStart, 336 LineNumber: uint32(firstLineNumber), 337 Column: 1, 338 }, 339 FileName: false, 340 Ranges: ranges, 341 SymbolInfo: symbolInfo, 342 }) 343 } 344 return chunkMatches 345} 346 347type candidateChunk struct { 348 firstLine uint32 // 1-based, inclusive 349 lastLine uint32 // 1-based, inclusive 350 minOffset uint32 // 0-based, inclusive 351 maxOffset uint32 // 0-based, exclusive 352 candidates []*candidateMatch 353} 354 355// chunkCandidates groups a set of sorted, non-overlapping candidate matches by line number. Adjacent 356// chunks will be merged if adding `numContextLines` to the beginning and end of the chunk would cause 357// it to overlap with an adjacent chunk. 358func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines int) []candidateChunk { 359 var chunks []candidateChunk 360 for _, m := range ms { 361 startOffset := m.byteOffset 362 endOffset := m.byteOffset + m.byteMatchSz 363 firstLine, _, _ := newlines.atOffset(startOffset) 364 lastLine, _, _ := newlines.atOffset(endOffset) 365 366 if len(chunks) > 0 && int(chunks[len(chunks)-1].lastLine)+numContextLines >= firstLine-numContextLines { 367 // If a new chunk created with the current candidateMatch would 368 // overlap with the previous chunk, instead add the candidateMatch 369 // to the last chunk and extend end of the last chunk. 370 last := &chunks[len(chunks)-1] 371 last.candidates = append(last.candidates, m) 372 if last.maxOffset < endOffset { 373 last.lastLine = uint32(lastLine) 374 last.maxOffset = uint32(endOffset) 375 } 376 } else { 377 chunks = append(chunks, candidateChunk{ 378 firstLine: uint32(firstLine), 379 lastLine: uint32(lastLine), 380 minOffset: startOffset, 381 maxOffset: endOffset, 382 candidates: []*candidateMatch{m}, 383 }) 384 } 385 } 386 return chunks 387} 388 389type newlines struct { 390 // locs is the sorted set of byte offsets of the newlines in the file 391 locs []uint32 392 393 // fileSize is just the number of bytes in the file. It is stored 394 // on this struct so we can safely know the length of the last line 395 // in the file since not all files end in a newline. 396 fileSize uint32 397} 398 399// atOffset returns the line containing the offset. If the offset lands on 400// the newline ending line M, we return M. The line is characterized 401// by its linenumber (base-1, byte index of line start, byte index of 402// line end). The line end is the index of a newline, or the filesize 403// (if matching the last line of the file.) 404func (nls newlines) atOffset(offset uint32) (lineNumber, lineStart, lineEnd int) { 405 idx := sort.Search(len(nls.locs), func(n int) bool { 406 return nls.locs[n] >= offset 407 }) 408 409 start, end := nls.lineBounds(idx + 1) 410 return idx + 1, int(start), int(end) 411} 412 413// lineBounds returns the byte offsets of the start and end of the 1-based 414// lineNumber. The end offset is exclusive and will not contain the line-ending 415// newline. If the line number is out of range of the lines in the file, start 416// and end will be clamped to [0,fileSize]. 417func (nls newlines) lineBounds(lineNumber int) (start, end uint32) { 418 // nls.locs[0] + 1 is the start of the 2nd line of data. 419 startIdx := lineNumber - 2 420 endIdx := lineNumber - 1 421 422 if startIdx < 0 { 423 start = 0 424 } else if startIdx >= len(nls.locs) { 425 start = nls.fileSize 426 } else { 427 start = nls.locs[startIdx] + 1 428 } 429 430 if endIdx < 0 { 431 end = 0 432 } else if endIdx >= len(nls.locs) { 433 end = nls.fileSize 434 } else { 435 end = nls.locs[endIdx] 436 } 437 438 return start, end 439} 440 441// getLines returns a slice of data containing the lines [low, high). 442// low is 1-based and inclusive. high is 1-based and exclusive. 443func (nls newlines) getLines(data []byte, low, high int) []byte { 444 if low >= high { 445 return nil 446 } 447 448 lowStart, _ := nls.lineBounds(low) 449 _, highEnd := nls.lineBounds(high - 1) 450 451 return data[lowStart:highEnd] 452} 453 454const ( 455 // Query-dependent scoring signals. All of these together are bounded at ~9000 456 // (scoreWordMatch + scoreSymbol + scoreKindMatch * 10 + scoreFactorAtomMatch). 457 scorePartialWordMatch = 50.0 458 scoreWordMatch = 500.0 459 scoreBase = 7000.0 460 scorePartialBase = 4000.0 461 scoreSymbol = 7000.0 462 scorePartialSymbol = 4000.0 463 scoreKindMatch = 100.0 464 scoreRepetitionFactor = 1.0 465 scoreFactorAtomMatch = 400.0 466 467 // File-only scoring signals. For now these are also bounded ~9000 to give them 468 // equal weight with the query-dependent signals. 469 scoreFileRankFactor = 9000.0 470 scoreFileOrderFactor = 10.0 471 scoreRepoRankFactor = 20.0 472 473 // Used for ordering line and chunk matches within a file. 474 scoreLineOrderFactor = 1.0 475) 476 477// findSection checks whether a section defined by offset and size lies within 478// one of the sections in secs. 479func findSection(secs []DocumentSection, off, sz uint32) (int, bool) { 480 j := sort.Search(len(secs), func(i int) bool { 481 return secs[i].End >= off+sz 482 }) 483 484 if j == len(secs) { 485 return 0, false 486 } 487 488 if secs[j].Start <= off && off+sz <= secs[j].End { 489 return j, true 490 } 491 return 0, false 492} 493 494func (p *contentProvider) chunkMatchScore(secs []DocumentSection, m *ChunkMatch, language string, debug bool) (float64, string) { 495 type debugScore struct { 496 score float64 497 what string 498 } 499 500 score := &debugScore{} 501 maxScore := &debugScore{} 502 503 addScore := func(what string, s float64) { 504 if debug { 505 score.what += fmt.Sprintf("%s:%.2f, ", what, s) 506 } 507 score.score += s 508 } 509 510 for i, r := range m.Ranges { 511 // calculate the start and end offset relative to the start of the content 512 relStartOffset := int(r.Start.ByteOffset - m.ContentStart.ByteOffset) 513 relEndOffset := int(r.End.ByteOffset - m.ContentStart.ByteOffset) 514 515 startBoundary := relStartOffset < len(m.Content) && (relStartOffset == 0 || byteClass(m.Content[relStartOffset-1]) != byteClass(m.Content[relStartOffset])) 516 endBoundary := relEndOffset > 0 && (relEndOffset == len(m.Content) || byteClass(m.Content[relEndOffset-1]) != byteClass(m.Content[relEndOffset])) 517 518 score.score = 0 519 score.what = "" 520 521 if startBoundary && endBoundary { 522 addScore("WordMatch", scoreWordMatch) 523 } else if startBoundary || endBoundary { 524 addScore("PartialWordMatch", scorePartialWordMatch) 525 } 526 527 if m.FileName { 528 sep := bytes.LastIndexByte(m.Content, '/') 529 startMatch := relStartOffset == sep+1 530 endMatch := relEndOffset == len(m.Content) 531 if startMatch && endMatch { 532 addScore("Base", scoreBase) 533 } else if startMatch || endMatch { 534 addScore("EdgeBase", (scoreBase+scorePartialBase)/2) 535 } else if sep < relStartOffset { 536 addScore("InnerBase", scorePartialBase) 537 } 538 } else if secIdx, ok := findSection(secs, uint32(r.Start.ByteOffset), uint32(r.End.ByteOffset-r.Start.ByteOffset)); ok { 539 sec := secs[secIdx] 540 startMatch := sec.Start == uint32(r.Start.ByteOffset) 541 endMatch := sec.End == uint32(r.End.ByteOffset) 542 if startMatch && endMatch { 543 addScore("Symbol", scoreSymbol) 544 } else if startMatch || endMatch { 545 addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2) 546 } else { 547 addScore("InnerSymbol", scorePartialSymbol) 548 } 549 550 var si *Symbol 551 if m.SymbolInfo != nil { 552 si = m.SymbolInfo[i] 553 } 554 if si == nil { 555 // for non-symbol queries, we need to hydrate in SymbolInfo. 556 start := p.id.fileEndSymbol[p.idx] 557 si = p.id.symbols.data(start + uint32(secIdx)) 558 } 559 if si != nil { 560 addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind)) 561 } 562 } 563 564 if score.score > maxScore.score { 565 maxScore.score = score.score 566 maxScore.what = score.what 567 } 568 } 569 570 if debug { 571 maxScore.what = fmt.Sprintf("score:%f <- %s", maxScore.score, strings.TrimRight(maxScore.what, ", ")) 572 } 573 574 return maxScore.score, maxScore.what 575} 576 577func (p *contentProvider) matchScore(secs []DocumentSection, m *LineMatch, language string, debug bool) (float64, string) { 578 type debugScore struct { 579 score float64 580 what string 581 } 582 583 score := &debugScore{} 584 maxScore := &debugScore{} 585 586 addScore := func(what string, s float64) { 587 if debug { 588 score.what += fmt.Sprintf("%s:%.2f, ", what, s) 589 } 590 score.score += s 591 } 592 593 for _, f := range m.LineFragments { 594 startBoundary := f.LineOffset < len(m.Line) && (f.LineOffset == 0 || byteClass(m.Line[f.LineOffset-1]) != byteClass(m.Line[f.LineOffset])) 595 596 end := int(f.LineOffset) + f.MatchLength 597 endBoundary := end > 0 && (end == len(m.Line) || byteClass(m.Line[end-1]) != byteClass(m.Line[end])) 598 599 score.score = 0 600 score.what = "" 601 602 if startBoundary && endBoundary { 603 addScore("WordMatch", scoreWordMatch) 604 } else if startBoundary || endBoundary { 605 addScore("PartialWordMatch", scorePartialWordMatch) 606 } 607 608 if m.FileName { 609 sep := bytes.LastIndexByte(m.Line, '/') 610 startMatch := sep+1 == f.LineOffset 611 endMatch := len(m.Line) == f.LineOffset+f.MatchLength 612 if startMatch && endMatch { 613 addScore("Base", scoreBase) 614 } else if startMatch || endMatch { 615 addScore("EdgeBase", (scoreBase+scorePartialBase)/2) 616 } else if sep < f.LineOffset { 617 addScore("InnerBase", scorePartialBase) 618 } 619 } else if secIdx, ok := findSection(secs, f.Offset, uint32(f.MatchLength)); ok { 620 sec := secs[secIdx] 621 startMatch := sec.Start == f.Offset 622 endMatch := sec.End == f.Offset+uint32(f.MatchLength) 623 if startMatch && endMatch { 624 addScore("Symbol", scoreSymbol) 625 } else if startMatch || endMatch { 626 addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2) 627 } else { 628 addScore("InnerSymbol", scorePartialSymbol) 629 } 630 631 si := f.SymbolInfo 632 if si == nil { 633 // for non-symbol queries, we need to hydrate in SymbolInfo. 634 start := p.id.fileEndSymbol[p.idx] 635 si = p.id.symbols.data(start + uint32(secIdx)) 636 } 637 if si != nil { 638 // the LineFragment may not be on a symbol, then si will be nil. 639 addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind)) 640 } 641 } 642 643 if score.score > maxScore.score { 644 maxScore.score = score.score 645 maxScore.what = score.what 646 } 647 } 648 649 if debug { 650 maxScore.what = fmt.Sprintf("score:%.2f <- %s", maxScore.score, strings.TrimSuffix(maxScore.what, ", ")) 651 } 652 653 return maxScore.score, maxScore.what 654} 655 656// scoreKind boosts a match based on the combination of language and kind. The 657// language string comes from go-enry, the kind string from ctags. 658func scoreKind(language string, kind string) float64 { 659 var factor float64 660 661 // Generic ranking which will be overriden by language specific ranking 662 switch kind { 663 case "class": 664 factor = 10 665 case "struct": 666 factor = 9.5 667 case "enum": 668 factor = 9 669 case "interface": 670 factor = 8 671 case "function", "func": 672 factor = 7 673 case "method": 674 factor = 6 675 case "member", "field": 676 factor = 5.5 677 case "constant", "const": 678 factor = 5 679 case "var", "variable": 680 factor = 4 681 } 682 683 // Refer to universal-ctags --list-kinds-full=<language> to learn about which 684 // kinds are detected for which language. 685 // 686 // Note that go-ctags uses universal-ctags's interactive mode and thus returns 687 // the full name for "kind" and not the one-letter abbreviation. 688 switch language { 689 case "Java", "java": 690 switch kind { 691 // 2022-03-30: go-ctags contains a regex rule for Java classes that sets "kind" 692 // to "classes" instead of "c". We have to cover both cases to support existing 693 // indexes. 694 case "class", "classes": 695 factor = 10 696 case "enum": 697 factor = 9 698 case "interface": 699 factor = 8 700 case "method": 701 factor = 7 702 case "field": 703 factor = 6 704 case "enumConstant": 705 factor = 5 706 } 707 case "Kotlin", "kotlin": 708 switch kind { 709 case "class": 710 factor = 10 711 case "interface": 712 factor = 9 713 case "method": 714 factor = 8 715 case "typealias": 716 factor = 7 717 case "constant": 718 factor = 6 719 case "variable": 720 factor = 5 721 } 722 case "Go", "go": 723 switch kind { 724 case "interface": // interfaces 725 factor = 10 726 case "struct": // structs 727 factor = 9 728 case "talias": // type aliases 729 factor = 9 730 case "methodSpec": // interface method specification 731 factor = 8.5 732 case "func": // functions 733 factor = 8 734 case "member": // struct members 735 factor = 7 736 case "const": // constants 737 factor = 6 738 case "var": // variables 739 factor = 5 740 } 741 // Could also rank on: 742 // 743 // - anonMember struct anonymous members 744 // - packageName name for specifying imported package 745 // - receiver receivers 746 // - package packages 747 // - type types 748 // - unknown unknown 749 case "C++", "c++": 750 switch kind { 751 case "class": // classes 752 factor = 10 753 case "enum": // enumeration names 754 factor = 9 755 case "function": // function definitions 756 factor = 8 757 case "struct": // structure names 758 factor = 7 759 case "union": // union names 760 factor = 6 761 case "typdef": // typedefs 762 factor = 5 763 case "member": // class, struct, and union members 764 factor = 4 765 case "variable": // varialbe definitions 766 factor = 3 767 } 768 // Could also rank on: 769 // NAME DESCRIPTION 770 // macro macro definitions 771 // enumerator enumerators (values inside an enumeration) 772 // header included header files 773 // namespace namespaces 774 // variable variable definitions 775 case "Scala", "scala": 776 switch kind { 777 case "class": 778 factor = 10 779 case "interface": 780 factor = 9 781 case "object": 782 factor = 8 783 case "method": 784 factor = 7 785 case "type": 786 factor = 6 787 case "variable": 788 factor = 5 789 case "package": 790 factor = 4 791 } 792 case "Python", "python": 793 switch kind { 794 case "class": // classes 795 factor = 10 796 case "function": // function definitions 797 factor = 8 798 case "member": // class, struct, and union members 799 factor = 4 800 case "variable": // variable definitions 801 factor = 3 802 case "local": // local variables 803 factor = 2 804 } 805 // Could also rank on: 806 // 807 // - namespace name referring a module defined in other file 808 // - module modules 809 // - unknown name referring a class/variable/function/module defined in other module 810 // - parameter function parameters 811 case "Ruby", "ruby": 812 switch kind { 813 case "class": 814 factor = 10 815 case "method": 816 factor = 9 817 case "alias": 818 factor = 8 819 case "module": 820 factor = 7 821 case "singletonMethod": 822 factor = 6 823 case "constant": 824 factor = 5 825 case "accessor": 826 factor = 4 827 case "library": 828 factor = 3 829 } 830 case "PHP", "php": 831 switch kind { 832 case "class": 833 factor = 10 834 case "interface": 835 factor = 9 836 case "function": 837 factor = 8 838 case "trait": 839 factor = 7 840 case "define": 841 factor = 6 842 case "namespace": 843 factor = 5 844 case "alias": 845 factor = 4 846 case "variable": 847 factor = 3 848 case "local": 849 factor = 3 850 } 851 } 852 853 return factor * scoreKindMatch 854} 855 856type matchScoreSlice []LineMatch 857 858func (m matchScoreSlice) Len() int { return len(m) } 859func (m matchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 860func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } 861 862type chunkMatchScoreSlice []ChunkMatch 863 864func (m chunkMatchScoreSlice) Len() int { return len(m) } 865func (m chunkMatchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 866func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } 867 868type fileMatchesByScore []FileMatch 869 870func (m fileMatchesByScore) Len() int { return len(m) } 871func (m fileMatchesByScore) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 872func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score } 873 874func sortMatchesByScore(ms []LineMatch) { 875 sort.Sort(matchScoreSlice(ms)) 876} 877 878func sortChunkMatchesByScore(ms []ChunkMatch) { 879 sort.Sort(chunkMatchScoreSlice(ms)) 880} 881 882// SortFiles sorts files matches. The order depends on the match score, which includes both 883// query-dependent signals like word overlap, and file-only signals like the file ranks (if 884// file ranks are enabled). 885func SortFiles(ms []FileMatch) { 886 sort.Sort(fileMatchesByScore(ms)) 887}