fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Short-circuit content evaluations for short substrings.

+86 -51
+17 -12
dociter.go
··· 34 34 35 35 fileIdx int 36 36 ends []uint32 37 + 38 + // The ngram matches cover the pattern, so no need to check 39 + // contents. 40 + coversContent bool 37 41 } 38 42 39 43 type candidateMatch struct { 40 44 query *SubstringQuery 41 45 42 - substrBytes []byte 43 - lowered []byte 44 - caseMask [][]byte 45 - caseBits [][]byte 46 + substrBytes []byte 47 + substrLowered []byte 48 + 49 + caseMask [][]byte 50 + caseBits [][]byte 46 51 47 52 file uint32 48 53 offset uint32 ··· 78 83 } 79 84 80 85 func (m *candidateMatch) matchContent(content []byte) bool { 81 - return bytes.Compare(content[m.offset:m.offset+uint32(len(m.lowered))], m.lowered) == 0 86 + return bytes.Compare(content[m.offset:m.offset+uint32(len(m.substrLowered))], m.substrLowered) == 0 82 87 } 83 88 84 89 func (m *candidateMatch) line(newlines []uint32, content []byte, caseBits []byte) (lineNum, lineOff int, lineContent []byte) { ··· 134 139 135 140 candidates = append(candidates, 136 141 &candidateMatch{ 137 - caseMask: caseMasks, 138 - caseBits: caseBits, 139 - query: s.query, 140 - substrBytes: patBytes, 141 - lowered: lowerPatBytes, 142 - file: uint32(s.fileIdx), 143 - offset: p1 - fileStart - s.leftPad, 142 + caseMask: caseMasks, 143 + caseBits: caseBits, 144 + query: s.query, 145 + substrBytes: patBytes, 146 + substrLowered: lowerPatBytes, 147 + file: uint32(s.fileIdx), 148 + offset: p1 - fileStart - s.leftPad, 144 149 }) 145 150 } 146 151 }
+29 -36
eval.go
··· 27 27 } 28 28 29 29 type substrMatchTree struct { 30 - query *SubstringQuery 31 - current []*candidateMatch 32 - caseMatch *bool 33 - contMatch *bool 34 - cands []*candidateMatch 30 + query *SubstringQuery 31 + current []*candidateMatch 32 + caseEvaluated bool 33 + contEvaluated bool 34 + cands []*candidateMatch 35 + coversContent bool 35 36 } 36 37 37 38 type branchQueryMatchTree struct { ··· 106 107 } 107 108 108 109 func (p *contentProvider) evalContentMatches(s *substrMatchTree) { 109 - pruned := s.current[:0] 110 - for _, m := range s.current { 111 - if p.matchContent(m) { 112 - pruned = append(pruned, m) 110 + if !s.coversContent { 111 + pruned := s.current[:0] 112 + for _, m := range s.current { 113 + if p.matchContent(m) { 114 + pruned = append(pruned, m) 115 + } 113 116 } 117 + s.current = pruned 114 118 } 115 - s.current = pruned 116 - s.contMatch = new(bool) 117 - *s.contMatch = (len(pruned) > 0) 119 + s.contEvaluated = true 118 120 } 119 121 120 122 func (p *contentProvider) evalCaseMatches(s *substrMatchTree) { 121 - pruned := s.current[:0] 122 - for _, m := range s.current { 123 - if p.caseMatches(m) { 124 - pruned = append(pruned, m) 123 + if s.query.CaseSensitive { 124 + pruned := s.current[:0] 125 + for _, m := range s.current { 126 + if p.caseMatches(m) { 127 + pruned = append(pruned, m) 128 + } 125 129 } 130 + s.current = pruned 126 131 } 127 - s.current = pruned 128 - s.caseMatch = new(bool) 129 - *s.caseMatch = len(pruned) > 0 132 + s.caseEvaluated = true 130 133 } 131 134 132 135 func (t *andMatchTree) matches(known map[matchTree]bool, docID uint32) (bool, bool) { ··· 185 188 if len(t.current) == 0 { 186 189 return false, true 187 190 } 188 - sure := true 189 - val := true 190 - if t.caseMatch != nil { 191 - val = *t.caseMatch && val 192 - } else { 193 - sure = false 194 - } 195 - if t.contMatch != nil { 196 - val = *t.contMatch && val 197 - } else { 198 - sure = false 199 - } 200 191 201 - return val, sure 192 + sure := (!t.query.CaseSensitive || t.caseEvaluated) && (t.coversContent || t.contEvaluated) 193 + return true, sure 202 194 } 203 195 204 196 func (d *indexData) newMatchTree(q Query, sq map[*SubstringQuery]*substrMatchTree) (matchTree, error) { ··· 234 226 return nil, err 235 227 } 236 228 st := &substrMatchTree{ 237 - query: s, 238 - cands: iter.next(), 229 + query: s, 230 + coversContent: iter.coversContent, 231 + cands: iter.next(), 239 232 } 240 233 sq[s] = st 241 234 return st, nil ··· 298 291 } 299 292 st.current = st.cands[:i] 300 293 st.cands = st.cands[i:] 301 - st.contMatch = nil 302 - st.caseMatch = nil 294 + st.contEvaluated = false 295 + st.caseEvaluated = false 303 296 } 304 297 305 298 cp := contentProvider{
+12 -3
index.go
··· 120 120 if data.reader.err != nil { 121 121 return nil, data.reader.err 122 122 } 123 - input.last = fromDeltas(data.reader.readSectionBlob(last)) 124 - if data.reader.err != nil { 125 - return nil, data.reader.err 123 + 124 + if firstI != lastI { 125 + input.last = fromDeltas(data.reader.readSectionBlob(last)) 126 + if data.reader.err != nil { 127 + return nil, data.reader.err 128 + } 129 + } else { 130 + input.last = input.first 131 + } 132 + 133 + if lastI-firstI <= ngramSize && input.leftPad == 0 && input.rightPad == 0 { 134 + input.coversContent = true 126 135 } 127 136 return input, nil 128 137 }
+28
index_test.go
··· 586 586 t.Fatalf("got branches %q, want %q", f.Branches, branches) 587 587 } 588 588 } 589 + 590 + func TestCoversContent(t *testing.T) { 591 + b := NewIndexBuilder() 592 + 593 + branches := []string{"stable", "master"} 594 + b.AddFileBranches("f1", []byte("needle the bla"), branches) 595 + 596 + searcher := searcherForTest(t, b) 597 + sres, err := searcher.Search( 598 + &AndQuery{ 599 + Children: []Query{ 600 + &SubstringQuery{ 601 + Pattern: "needle", 602 + }, 603 + &NotQuery{&SubstringQuery{ 604 + Pattern: "the", 605 + }}, 606 + }, 607 + }) 608 + 609 + if err != nil || len(sres.Files) > 0 { 610 + t.Fatalf("got %v, %v, want success without results", sres.Files, err) 611 + } 612 + 613 + if sres.Stats.FilesLoaded > 0 { 614 + t.Errorf("got %#v, want no FilesLoaded", sres.Stats) 615 + } 616 + }