fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "fmt" 20) 21 22// candidateMatch is a candidate match for a substring. 23// 24// Note: a lot of these can be in memory, so think about fieldalignment when 25// modify the fields of this structure. 26type candidateMatch struct { 27 substrBytes []byte 28 substrLowered []byte 29 30 scoreWeight float64 31 32 file uint32 33 symbolIdx uint32 34 35 // Offsets are relative to the start of the filename or file contents. 36 runeOffset uint32 37 byteOffset uint32 38 byteMatchSz uint32 39 40 // bools at end for struct field alignment 41 caseSensitive bool 42 fileName bool 43 symbol bool 44} 45 46// Matches content against the substring, and populates byteMatchSz on success 47func (m *candidateMatch) matchContent(content []byte) bool { 48 if m.caseSensitive { 49 comp := bytes.Equal(m.substrBytes, content[m.byteOffset:m.byteOffset+uint32(len(m.substrBytes))]) 50 51 m.byteMatchSz = uint32(len(m.substrBytes)) 52 return comp 53 } else { 54 // It is tempting to try a simple ASCII based 55 // comparison if possible, but we need more 56 // information. Simple ASCII chars have unicode upper 57 // case variants (the ASCII 'k' has the Kelvin symbol 58 // as upper case variant). We can only degrade to 59 // ASCII if we are sure that both the corpus and the 60 // query is ASCII only 61 sz, ok := caseFoldingEqualsRunes(m.substrLowered, content[m.byteOffset:]) 62 m.byteMatchSz = uint32(sz) 63 return ok 64 } 65} 66 67// matchIterator is a docIterator that produces candidateMatches for a given document 68type matchIterator interface { 69 docIterator 70 71 candidates() []*candidateMatch 72 73 // updateStats is called twice. After matchtree construction and after 74 // searching is done. Implementations must take care to not report 75 // statistics twice. 76 updateStats(*Stats) 77} 78 79// noMatchTree is both matchIterator and matchTree that matches nothing. 80type noMatchTree struct { 81 Why string 82 83 // Stats captures the work done to create the noMatchTree. 84 Stats Stats 85} 86 87func (t *noMatchTree) String() string { 88 return fmt.Sprintf("not(%q)", t.Why) 89} 90 91func (t *noMatchTree) candidates() []*candidateMatch { 92 return nil 93} 94 95func (t *noMatchTree) nextDoc() uint32 { 96 return maxUInt32 97} 98 99func (t *noMatchTree) prepare(uint32) {} 100 101func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState { 102 return matchesNone 103} 104 105func (t *noMatchTree) updateStats(s *Stats) { 106 s.Add(t.Stats) 107 t.Stats = Stats{} 108} 109 110func (m *candidateMatch) String() string { 111 return fmt.Sprintf("%d:%d", m.file, m.runeOffset) 112} 113 114type ngramDocIterator struct { 115 leftPad uint32 116 rightPad uint32 117 118 iter hitIterator 119 ends []uint32 120 121 // ngramLookups is how many lookups we did to create this iterator. 122 ngramLookups int 123 124 // mutable 125 fileIdx uint32 126 matchCount int 127} 128 129// nextFileIndex returns the smallest index j of ends such that 130// ends[j] > offset, assuming ends[f] <= offset. 131func nextFileIndex(offset, f uint32, ends []uint32) uint32 { 132 d := uint32(1) 133 for f < uint32(len(ends)) && ends[f] <= offset { 134 if f+d < uint32(len(ends)) && ends[f+d] <= offset { 135 f += d 136 d *= 2 137 } else if d > 1 { 138 d = d/4 + 1 139 } else { 140 f++ 141 } 142 } 143 return f 144} 145 146func (i *ngramDocIterator) nextDoc() uint32 { 147 i.fileIdx = nextFileIndex(i.iter.first(), i.fileIdx, i.ends) 148 if i.fileIdx >= uint32(len(i.ends)) { 149 return maxUInt32 150 } 151 return i.fileIdx 152} 153 154func (i *ngramDocIterator) String() string { 155 return fmt.Sprintf("ngram(L=%d,R=%d,%v)", i.leftPad, i.rightPad, i.iter) 156} 157 158func (i *ngramDocIterator) prepare(nextDoc uint32) { 159 var start uint32 160 if nextDoc > 0 { 161 start = i.ends[nextDoc-1] 162 } 163 if start > 0 { 164 i.iter.next(start + i.leftPad - 1) 165 } 166 i.fileIdx = nextDoc 167} 168 169func (i *ngramDocIterator) updateStats(s *Stats) { 170 i.iter.updateStats(s) 171 s.NgramMatches += i.matchCount 172 s.NgramLookups += i.ngramLookups 173 i.matchCount = 0 174 i.ngramLookups = 0 175} 176 177func (i *ngramDocIterator) candidates() []*candidateMatch { 178 if i.fileIdx >= uint32(len(i.ends)) { 179 return nil 180 } 181 182 var fileStart uint32 183 if i.fileIdx > 0 { 184 fileStart = i.ends[i.fileIdx-1] 185 } 186 fileEnd := i.ends[i.fileIdx] 187 188 var candidates []*candidateMatch 189 for { 190 p1 := i.iter.first() 191 if p1 == maxUInt32 || p1 >= i.ends[i.fileIdx] { 192 break 193 } 194 i.iter.next(p1) 195 196 if p1 < i.leftPad+fileStart || p1+i.rightPad > fileEnd { 197 continue 198 } 199 200 candidates = append(candidates, &candidateMatch{ 201 file: uint32(i.fileIdx), 202 runeOffset: p1 - fileStart - i.leftPad, 203 }) 204 } 205 i.matchCount += len(candidates) 206 return candidates 207}