fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "fmt" 20) 21 22// candidateMatch is a candidate match for a substring. 23type candidateMatch struct { 24 caseSensitive bool 25 fileName bool 26 symbol bool 27 symbolIdx uint32 28 29 substrBytes []byte 30 substrLowered []byte 31 32 file uint32 33 34 // Offsets are relative to the start of the filename or file contents. 35 runeOffset uint32 36 byteOffset uint32 37 byteMatchSz uint32 38} 39 40// Matches content against the substring, and populates byteMatchSz on success 41func (m *candidateMatch) matchContent(content []byte) bool { 42 if m.caseSensitive { 43 comp := bytes.Equal(m.substrBytes, content[m.byteOffset:m.byteOffset+uint32(len(m.substrBytes))]) 44 45 m.byteMatchSz = uint32(len(m.substrBytes)) 46 return comp 47 } else { 48 // It is tempting to try a simple ASCII based 49 // comparison if possible, but we need more 50 // information. Simple ASCII chars have unicode upper 51 // case variants (the ASCII 'k' has the Kelvin symbol 52 // as upper case variant). We can only degrade to 53 // ASCII if we are sure that both the corpus and the 54 // query is ASCII only 55 sz, ok := caseFoldingEqualsRunes(m.substrLowered, content[m.byteOffset:]) 56 m.byteMatchSz = uint32(sz) 57 return ok 58 } 59} 60 61// matchIterator is a docIterator that produces candidateMatches for a given document 62type matchIterator interface { 63 docIterator 64 65 candidates() []*candidateMatch 66 67 // updateStats is called twice. After matchtree construction and after 68 // searching is done. Implementations must take care to not report 69 // statistics twice. 70 updateStats(*Stats) 71} 72 73// noMatchTree is both matchIterator and matchTree that matches nothing. 74type noMatchTree struct { 75 Why string 76 77 // Stats captures the work done to create the noMatchTree. 78 Stats Stats 79} 80 81func (t *noMatchTree) String() string { 82 return fmt.Sprintf("not(%q)", t.Why) 83} 84 85func (t *noMatchTree) candidates() []*candidateMatch { 86 return nil 87} 88 89func (t *noMatchTree) nextDoc() uint32 { 90 return maxUInt32 91} 92 93func (t *noMatchTree) prepare(uint32) {} 94 95func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) { 96 return false, true 97} 98 99func (t *noMatchTree) updateStats(s *Stats) { 100 s.Add(t.Stats) 101 t.Stats = Stats{} 102} 103 104func (m *candidateMatch) String() string { 105 return fmt.Sprintf("%d:%d", m.file, m.runeOffset) 106} 107 108type ngramDocIterator struct { 109 leftPad uint32 110 rightPad uint32 111 112 iter hitIterator 113 ends []uint32 114 115 // ngramLookups is how many lookups we did to create this iterator. 116 ngramLookups int 117 118 // mutable 119 fileIdx uint32 120 matchCount int 121} 122 123// nextFileIndex returns the smallest index j of ends such that 124// ends[j] > offset, assuming ends[f] <= offset. 125func nextFileIndex(offset, f uint32, ends []uint32) uint32 { 126 d := uint32(1) 127 for f < uint32(len(ends)) && ends[f] <= offset { 128 if f+d < uint32(len(ends)) && ends[f+d] <= offset { 129 f += d 130 d *= 2 131 } else if d > 1 { 132 d = d/4 + 1 133 } else { 134 f++ 135 } 136 } 137 return f 138} 139 140func (i *ngramDocIterator) nextDoc() uint32 { 141 i.fileIdx = nextFileIndex(i.iter.first(), i.fileIdx, i.ends) 142 if i.fileIdx >= uint32(len(i.ends)) { 143 return maxUInt32 144 } 145 return i.fileIdx 146} 147 148func (i *ngramDocIterator) String() string { 149 return fmt.Sprintf("ngram(L=%d,R=%d,%v)", i.leftPad, i.rightPad, i.iter) 150} 151 152func (i *ngramDocIterator) prepare(nextDoc uint32) { 153 var start uint32 154 if nextDoc > 0 { 155 start = i.ends[nextDoc-1] 156 } 157 if start > 0 { 158 i.iter.next(start + i.leftPad - 1) 159 } 160 i.fileIdx = nextDoc 161} 162 163func (i *ngramDocIterator) updateStats(s *Stats) { 164 i.iter.updateStats(s) 165 s.NgramMatches += i.matchCount 166 s.NgramLookups += i.ngramLookups 167 i.matchCount = 0 168 i.ngramLookups = 0 169} 170 171func (i *ngramDocIterator) candidates() []*candidateMatch { 172 if i.fileIdx >= uint32(len(i.ends)) { 173 return nil 174 } 175 176 var fileStart uint32 177 if i.fileIdx > 0 { 178 fileStart = i.ends[i.fileIdx-1] 179 } 180 fileEnd := i.ends[i.fileIdx] 181 182 var candidates []*candidateMatch 183 for { 184 p1 := i.iter.first() 185 if p1 == maxUInt32 || p1 >= i.ends[i.fileIdx] { 186 break 187 } 188 i.iter.next(p1) 189 190 if p1 < i.leftPad+fileStart || p1+i.rightPad > fileEnd { 191 continue 192 } 193 194 candidates = append(candidates, &candidateMatch{ 195 file: uint32(i.fileIdx), 196 runeOffset: p1 - fileStart - i.leftPad, 197 }) 198 } 199 i.matchCount += len(candidates) 200 return candidates 201}