fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "fmt" 20) 21 22// candidateMatch is a candidate match for a substring. 23// 24// Note: a lot of these can be in memory, so think about fieldalignment when 25// modify the fields of this structure. 26type candidateMatch struct { 27 substrBytes []byte 28 substrLowered []byte 29 30 file uint32 31 symbolIdx uint32 32 33 // Offsets are relative to the start of the filename or file contents. 34 runeOffset uint32 35 byteOffset uint32 36 byteMatchSz uint32 37 38 // bools at end for struct field alignment 39 caseSensitive bool 40 fileName bool 41 symbol bool 42} 43 44// Matches content against the substring, and populates byteMatchSz on success 45func (m *candidateMatch) matchContent(content []byte) bool { 46 if m.caseSensitive { 47 comp := bytes.Equal(m.substrBytes, content[m.byteOffset:m.byteOffset+uint32(len(m.substrBytes))]) 48 49 m.byteMatchSz = uint32(len(m.substrBytes)) 50 return comp 51 } else { 52 // It is tempting to try a simple ASCII based 53 // comparison if possible, but we need more 54 // information. Simple ASCII chars have unicode upper 55 // case variants (the ASCII 'k' has the Kelvin symbol 56 // as upper case variant). We can only degrade to 57 // ASCII if we are sure that both the corpus and the 58 // query is ASCII only 59 sz, ok := caseFoldingEqualsRunes(m.substrLowered, content[m.byteOffset:]) 60 m.byteMatchSz = uint32(sz) 61 return ok 62 } 63} 64 65// matchIterator is a docIterator that produces candidateMatches for a given document 66type matchIterator interface { 67 docIterator 68 69 candidates() []*candidateMatch 70 71 // updateStats is called twice. After matchtree construction and after 72 // searching is done. Implementations must take care to not report 73 // statistics twice. 74 updateStats(*Stats) 75} 76 77// noMatchTree is both matchIterator and matchTree that matches nothing. 78type noMatchTree struct { 79 Why string 80 81 // Stats captures the work done to create the noMatchTree. 82 Stats Stats 83} 84 85func (t *noMatchTree) String() string { 86 return fmt.Sprintf("not(%q)", t.Why) 87} 88 89func (t *noMatchTree) candidates() []*candidateMatch { 90 return nil 91} 92 93func (t *noMatchTree) nextDoc() uint32 { 94 return maxUInt32 95} 96 97func (t *noMatchTree) prepare(uint32) {} 98 99func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState { 100 return matchesNone 101} 102 103func (t *noMatchTree) updateStats(s *Stats) { 104 s.Add(t.Stats) 105 t.Stats = Stats{} 106} 107 108func (m *candidateMatch) String() string { 109 return fmt.Sprintf("%d:%d", m.file, m.runeOffset) 110} 111 112type ngramDocIterator struct { 113 leftPad uint32 114 rightPad uint32 115 116 iter hitIterator 117 ends []uint32 118 119 // ngramLookups is how many lookups we did to create this iterator. 120 ngramLookups int 121 122 // mutable 123 fileIdx uint32 124 matchCount int 125} 126 127// nextFileIndex returns the smallest index j of ends such that 128// ends[j] > offset, assuming ends[f] <= offset. 129func nextFileIndex(offset, f uint32, ends []uint32) uint32 { 130 d := uint32(1) 131 for f < uint32(len(ends)) && ends[f] <= offset { 132 if f+d < uint32(len(ends)) && ends[f+d] <= offset { 133 f += d 134 d *= 2 135 } else if d > 1 { 136 d = d/4 + 1 137 } else { 138 f++ 139 } 140 } 141 return f 142} 143 144func (i *ngramDocIterator) nextDoc() uint32 { 145 i.fileIdx = nextFileIndex(i.iter.first(), i.fileIdx, i.ends) 146 if i.fileIdx >= uint32(len(i.ends)) { 147 return maxUInt32 148 } 149 return i.fileIdx 150} 151 152func (i *ngramDocIterator) String() string { 153 return fmt.Sprintf("ngram(L=%d,R=%d,%v)", i.leftPad, i.rightPad, i.iter) 154} 155 156func (i *ngramDocIterator) prepare(nextDoc uint32) { 157 var start uint32 158 if nextDoc > 0 { 159 start = i.ends[nextDoc-1] 160 } 161 if start > 0 { 162 i.iter.next(start + i.leftPad - 1) 163 } 164 i.fileIdx = nextDoc 165} 166 167func (i *ngramDocIterator) updateStats(s *Stats) { 168 i.iter.updateStats(s) 169 s.NgramMatches += i.matchCount 170 s.NgramLookups += i.ngramLookups 171 i.matchCount = 0 172 i.ngramLookups = 0 173} 174 175func (i *ngramDocIterator) candidates() []*candidateMatch { 176 if i.fileIdx >= uint32(len(i.ends)) { 177 return nil 178 } 179 180 var fileStart uint32 181 if i.fileIdx > 0 { 182 fileStart = i.ends[i.fileIdx-1] 183 } 184 fileEnd := i.ends[i.fileIdx] 185 186 var candidates []*candidateMatch 187 for { 188 p1 := i.iter.first() 189 if p1 == maxUInt32 || p1 >= i.ends[i.fileIdx] { 190 break 191 } 192 i.iter.next(p1) 193 194 if p1 < i.leftPad+fileStart || p1+i.rightPad > fileEnd { 195 continue 196 } 197 198 candidates = append(candidates, &candidateMatch{ 199 file: uint32(i.fileIdx), 200 runeOffset: p1 - fileStart - i.leftPad, 201 }) 202 } 203 i.matchCount += len(candidates) 204 return candidates 205}