fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2018 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "encoding/binary" 19 "fmt" 20 "math" 21 22 "github.com/sourcegraph/zoekt" 23) 24 25// hitIterator finds potential search matches, measured in offsets of 26// the concatenation of all documents. 27type hitIterator interface { 28 // Return the first hit, or math.MaxUint32 if none. 29 first() uint32 30 31 // Skip until past limit. The argument math.MaxUint32 should be 32 // treated specially. 33 next(limit uint32) 34 35 // Return how many bytes were read. 36 updateStats(s *zoekt.Stats) 37} 38 39// distanceHitIterator looks for hits at a fixed distance apart. 40type distanceHitIterator struct { 41 i1 hitIterator 42 i2 hitIterator 43 distance uint32 44 started bool 45} 46 47func (i *distanceHitIterator) String() string { 48 return fmt.Sprintf("dist(%d, %v, %v)", i.distance, i.i1, i.i2) 49} 50 51func (i *distanceHitIterator) findNext() { 52 for { 53 var p1, p2 uint32 54 p1 = i.i1.first() 55 p2 = i.i2.first() 56 if p1 == math.MaxUint32 || p2 == math.MaxUint32 { 57 i.i1.next(math.MaxUint32) 58 break 59 } 60 61 if p1+i.distance < p2 { 62 i.i1.next(p2 - i.distance - 1) 63 } else if p1+i.distance > p2 { 64 i.i2.next(p1 + i.distance - 1) 65 } else { 66 break 67 } 68 } 69} 70 71func (i *distanceHitIterator) first() uint32 { 72 if !i.started { 73 i.findNext() 74 i.started = true 75 } 76 return i.i1.first() 77} 78 79func (i *distanceHitIterator) updateStats(s *zoekt.Stats) { 80 i.i1.updateStats(s) 81 i.i2.updateStats(s) 82} 83 84func (i *distanceHitIterator) next(limit uint32) { 85 i.i1.next(limit) 86 l2 := limit + i.distance 87 88 if l2 < limit { // overflow. 89 l2 = math.MaxUint32 90 } 91 i.i2.next(l2) 92 i.findNext() 93} 94 95func (d *indexData) newDistanceTrigramIter(ng1, ng2 ngram, dist uint32, caseSensitive, fileName bool) (hitIterator, error) { 96 if dist == 0 { 97 return nil, fmt.Errorf("d == 0") 98 } 99 100 i1, err := d.trigramHitIterator(ng1, caseSensitive, fileName) 101 if err != nil { 102 return nil, err 103 } 104 i2, err := d.trigramHitIterator(ng2, caseSensitive, fileName) 105 if err != nil { 106 return nil, err 107 } 108 return &distanceHitIterator{ 109 i1: i1, 110 i2: i2, 111 distance: dist, 112 }, nil 113} 114 115func (d *indexData) trigramHitIterator(ng ngram, caseSensitive, fileName bool) (hitIterator, error) { 116 variants := []ngram{ng} 117 if !caseSensitive { 118 variants = generateCaseNgrams(ng) 119 } 120 121 iters := make([]hitIterator, 0, len(variants)) 122 ngramLookups := 0 123 ngrams := d.ngrams(fileName) 124 for _, v := range variants { 125 sec := ngrams.Get(v) 126 ngramLookups++ 127 blob, err := d.readSectionBlob(sec) 128 if err != nil { 129 return nil, err 130 } 131 if len(blob) > 0 { 132 iters = append(iters, newCompressedPostingIterator(blob, v)) 133 } 134 } 135 136 if len(iters) == 1 { 137 // if we only return 1 then we need to include our ngramLookups stats 138 iter := (iters[0]).(*compressedPostingIterator) 139 iter.ngramLookups = ngramLookups 140 return iter, nil 141 } 142 return &mergingIterator{ 143 ngramLookups: ngramLookups, 144 iters: iters, 145 }, nil 146} 147 148// inMemoryIterator is hitIterator that goes over an in-memory uint32 posting list. 149type inMemoryIterator struct { 150 postings []uint32 151 what ngram 152} 153 154func (i *inMemoryIterator) String() string { 155 return fmt.Sprintf("mem(%s):%v", i.what, i.postings) 156} 157 158func (i *inMemoryIterator) first() uint32 { 159 if len(i.postings) > 0 { 160 return i.postings[0] 161 } 162 return math.MaxUint32 163} 164 165func (i *inMemoryIterator) updateStats(_ *zoekt.Stats) { 166} 167 168func (i *inMemoryIterator) next(limit uint32) { 169 if limit == math.MaxUint32 { 170 i.postings = nil 171 } 172 173 for len(i.postings) > 0 && i.postings[0] <= limit { 174 i.postings = i.postings[1:] 175 } 176} 177 178// compressedPostingIterator goes over a delta varint encoded posting 179// list. 180type compressedPostingIterator struct { 181 blob []byte 182 indexBytesLoaded int 183 ngramLookups int 184 _first uint32 185 what ngram 186} 187 188func newCompressedPostingIterator(b []byte, w ngram) *compressedPostingIterator { 189 d, sz := binary.Uvarint(b) 190 return &compressedPostingIterator{ 191 _first: uint32(d), 192 blob: b[sz:], 193 indexBytesLoaded: sz, 194 what: w, 195 } 196} 197 198func (i *compressedPostingIterator) String() string { 199 return fmt.Sprintf("compressed(%s, %d, [%d bytes])", i.what, i._first, len(i.blob)) 200} 201 202func (i *compressedPostingIterator) first() uint32 { 203 return i._first 204} 205 206func (i *compressedPostingIterator) next(limit uint32) { 207 if limit == math.MaxUint32 { 208 i.blob = nil 209 i._first = math.MaxUint32 210 return 211 } 212 213 for i._first <= limit && len(i.blob) > 0 { 214 delta, sz := binary.Uvarint(i.blob) 215 i._first += uint32(delta) 216 i.indexBytesLoaded += sz 217 i.blob = i.blob[sz:] 218 } 219 220 if i._first <= limit && len(i.blob) == 0 { 221 i._first = math.MaxUint32 222 } 223} 224 225func (i *compressedPostingIterator) updateStats(s *zoekt.Stats) { 226 s.IndexBytesLoaded += int64(i.indexBytesLoaded) 227 s.NgramLookups += i.ngramLookups 228 i.indexBytesLoaded = 0 229 i.ngramLookups = 0 230} 231 232// mergingIterator forms the merge of a set of hitIterators, to 233// implement an OR operation at the hit level. 234type mergingIterator struct { 235 iters []hitIterator 236 ngramLookups int 237} 238 239func (i *mergingIterator) String() string { 240 return fmt.Sprintf("merge:%v", i.iters) 241} 242 243func (i *mergingIterator) updateStats(s *zoekt.Stats) { 244 s.NgramLookups += i.ngramLookups 245 i.ngramLookups = 0 246 for _, j := range i.iters { 247 j.updateStats(s) 248 } 249} 250 251func (i *mergingIterator) first() uint32 { 252 r := uint32(math.MaxUint32) 253 for _, j := range i.iters { 254 f := j.first() 255 if f < r { 256 r = f 257 } 258 } 259 260 return r 261} 262 263func (i *mergingIterator) next(limit uint32) { 264 for _, j := range i.iters { 265 j.next(limit) 266 } 267}