fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

indexdata: add interface for inverted index (#520)

We already have two implementations, combinedNgramOffset and
binarySearchNgram, for the inverted index from ngrams to posting lists.
With the btree I am working on, a third implementation will follow soon.

The current apporach guarantees a zero-value that won't panic in the
tests, however, I don't think we should pile on top of that for the
third implementation, but rather handle nil in the code and return an
error where it makes sense.

+26 -33
+4
hititer.go
··· 110 110 } 111 111 112 112 func (d *indexData) trigramHitIterator(ng ngram, caseSensitive, fileName bool) (hitIterator, error) { 113 + if d.ngrams == nil { 114 + return nil, fmt.Errorf("trigramHitIterator: ngrams=nil") 115 + } 116 + 113 117 variants := []ngram{ng} 114 118 if !caseSensitive { 115 119 variants = generateCaseNgrams(ng)
+8 -2
indexdata.go
··· 33 33 34 34 file IndexFile 35 35 36 - ngrams ngramMap 36 + ngrams ngramIndex 37 37 38 38 newlinesStart uint32 39 39 newlinesIndex []uint32 ··· 314 314 } 315 315 sz += 8 * len(d.runeDocSections) 316 316 sz += 8 * len(d.fileBranchMasks) 317 - sz += d.ngrams.SizeBytes() 317 + if d.ngrams != nil { 318 + sz += d.ngrams.SizeBytes() 319 + } 318 320 sz += 12 * len(d.fileNameNgrams) // these slices reference mmap-ed memory 319 321 return sz 320 322 } ··· 348 350 func (data *indexData) ngramFrequency(ng ngram, filename bool) uint32 { 349 351 if filename { 350 352 return uint32(len(data.fileNameNgrams[ng])) 353 + } 354 + 355 + if data.ngrams == nil { 356 + return 0 351 357 } 352 358 353 359 return data.ngrams.Get(ng).sz
+8 -29
ngramoffset.go
··· 358 358 return 4*len(a.entries) + 4*len(a.chunkOffsets) 359 359 } 360 360 361 - // ngramMap is an transient type while we investigate the performance of 362 - // combinedNgramOffset (established) vs binarySearch (new). 363 - // 364 - // It is like an interface, but we do the drudgery so we still get a useful 365 - // zero value (instead of nil panics in tests). 366 - type ngramMap struct { 367 - offsetMap combinedNgramOffset 368 - bsMap binarySearchNgram 369 - } 370 - 371 - func (m ngramMap) Get(gram ngram) simpleSection { 372 - if m.offsetMap.asc != nil { 373 - return m.offsetMap.Get(gram) 374 - } 375 - return m.bsMap.Get(gram) 376 - } 377 - 378 - func (m ngramMap) DumpMap() map[ngram]simpleSection { 379 - if m.offsetMap.asc != nil { 380 - return m.offsetMap.DumpMap() 381 - } 382 - return m.bsMap.DumpMap() 383 - } 384 - 385 - func (m ngramMap) SizeBytes() int { 386 - if m.offsetMap.asc != nil { 387 - return m.offsetMap.SizeBytes() 388 - } 389 - return 0 // binarySearch only uses mmaped data. 361 + type ngramIndex interface { 362 + Get(gram ngram) simpleSection 363 + DumpMap() map[ngram]simpleSection 364 + SizeBytes() int 390 365 } 391 366 392 367 type binarySearchNgram struct { ··· 443 418 } 444 419 return m 445 420 } 421 + 422 + func (b binarySearchNgram) SizeBytes() int { 423 + return 0 // binarySearch only uses mmaped data. 424 + }
+6 -2
read.go
··· 294 294 if err != nil { 295 295 return nil, err 296 296 } 297 - d.ngrams = ngramMap{bsMap: bsMap} 297 + d.ngrams = bsMap 298 298 } else { 299 299 offsetMap, err := d.readNgrams(toc) 300 300 if err != nil { 301 301 return nil, err 302 302 } 303 - d.ngrams = ngramMap{offsetMap: offsetMap} 303 + d.ngrams = offsetMap 304 304 } 305 305 306 306 d.fileBranchMasks, err = readSectionU64(d.file, toc.branchMasks) ··· 694 694 id, err := loadIndexData(r) 695 695 if err != nil { 696 696 return err 697 + } 698 + 699 + if id.ngrams == nil { 700 + return fmt.Errorf("PrintNgramStats: ngrams=nil") 697 701 } 698 702 var rNgram [3]rune 699 703 for ngram, ss := range id.ngrams.DumpMap() {