fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

rename ngrams to contentNgrams (#623)

The name always bothered me since we had fileNameNgrams as well. Now
they are both accessed via a btreeIndex, I also took the opportunity to
introduce a helper "ngrams" which returns the correct index depending on
if you want filenames or contents.

Test Plan: go test

+25 -41
-8
btree.go
··· 431 431 432 432 return m 433 433 } 434 - 435 - // GetBlob returns the raw encoded offset list for ng. 436 - // 437 - // Note: the returned byte slice is mmap backed normally. 438 - func (b btreeIndex) GetBlob(ng ngram) ([]byte, error) { 439 - sec := b.Get(ng) 440 - return b.file.Read(sec.off, sec.sz) 441 - }
+2 -13
hititer.go
··· 117 117 118 118 iters := make([]hitIterator, 0, len(variants)) 119 119 ngramLookups := 0 120 + ngrams := d.ngrams(fileName) 120 121 for _, v := range variants { 121 - if fileName { 122 - blob, err := d.fileNameNgrams.GetBlob(v) 123 - ngramLookups++ 124 - if err != nil { 125 - return nil, err 126 - } 127 - if len(blob) > 0 { 128 - iters = append(iters, newCompressedPostingIterator(blob, v)) 129 - } 130 - continue 131 - } 132 - 133 - sec := d.ngrams.Get(v) 122 + sec := ngrams.Get(v) 134 123 ngramLookups++ 135 124 blob, err := d.readSectionBlob(sec) 136 125 if err != nil {
+8 -7
indexdata.go
··· 35 35 36 36 file IndexFile 37 37 38 - ngrams btreeIndex 38 + contentNgrams btreeIndex 39 39 40 40 newlinesStart uint32 41 41 newlinesIndex []uint32 ··· 315 315 } 316 316 sz += 8 * len(d.runeDocSections) 317 317 sz += 8 * len(d.fileBranchMasks) 318 - sz += d.ngrams.SizeBytes() 318 + sz += d.contentNgrams.SizeBytes() 319 319 sz += d.fileNameNgrams.SizeBytes() 320 320 return sz 321 321 } ··· 366 366 return first, last 367 367 } 368 368 369 - func (data *indexData) ngramFrequency(ng ngram, filename bool) uint32 { 369 + func (data *indexData) ngrams(filename bool) btreeIndex { 370 370 if filename { 371 - return data.fileNameNgrams.Get(ng).sz 371 + return data.fileNameNgrams 372 372 } 373 - return data.ngrams.Get(ng).sz 373 + return data.contentNgrams 374 374 } 375 375 376 376 type ngramIterationResults struct { ··· 415 415 }) 416 416 frequencies := make([]uint32, 0, len(ngramOffs)) 417 417 ngramLookups := 0 418 + ngrams := d.ngrams(query.FileName) 418 419 for _, o := range ngramOffs { 419 420 var freq uint32 420 421 if query.CaseSensitive { 421 - freq = d.ngramFrequency(o.ngram, query.FileName) 422 + freq = ngrams.Get(o.ngram).sz 422 423 ngramLookups++ 423 424 } else { 424 425 for _, v := range generateCaseNgrams(o.ngram) { 425 - freq += d.ngramFrequency(v, query.FileName) 426 + freq += ngrams.Get(v).sz 426 427 ngramLookups++ 427 428 } 428 429 }
+2 -2
read.go
··· 288 288 return nil, err 289 289 } 290 290 291 - d.ngrams, err = d.newBtreeIndex(toc.ngramText, toc.postings) 291 + d.contentNgrams, err = d.newBtreeIndex(toc.ngramText, toc.postings) 292 292 if err != nil { 293 293 return nil, err 294 294 } ··· 652 652 } 653 653 654 654 var rNgram [3]rune 655 - for ngram, ss := range id.ngrams.DumpMap() { 655 + for ngram, ss := range id.contentNgrams.DumpMap() { 656 656 rNgram = ngramToRunes(ngram) 657 657 fmt.Printf("%d\t%q\n", ss.sz, string(rNgram[:])) 658 658 }
+13 -11
read_test.go
··· 74 74 t.Errorf("got filename %q, want %q", got, "filename") 75 75 } 76 76 77 - if len(data.ngrams.DumpMap()) != 3 { 78 - t.Fatalf("got ngrams %v, want 3 ngrams", data.ngrams) 77 + contentNgrams := data.contentNgrams.DumpMap() 78 + if len(contentNgrams) != 3 { 79 + t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams) 79 80 } 80 81 81 - if sec := data.ngrams.Get(stringToNGram("bcq")); sec.sz > 0 { 82 - t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), data.ngrams) 82 + if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 { 83 + t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), contentNgrams) 83 84 } 84 85 } 85 86 ··· 117 118 t.Errorf("got index %v, want {0,4}", data.fileNameIndex) 118 119 } 119 120 120 - gotBlob, err := data.fileNameNgrams.GetBlob(stringToNGram("bCd")) 121 + gotSec := data.fileNameNgrams.Get(stringToNGram("bCd")) 121 122 if err != nil { 122 123 t.Fatalf("fileNameNgrams.GetBlob: %v", err) 123 124 } 124 125 125 - if !reflect.DeepEqual(gotBlob, []byte{1}) { 126 + if !reflect.DeepEqual(buf.Bytes()[gotSec.off:gotSec.off+gotSec.sz], []byte{1}) { 126 127 t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams) 127 128 } 128 129 } ··· 193 194 194 195 for _, tt := range cases { 195 196 t.Run(tt.ng, func(t *testing.T) { 196 - havePostingList := id.ngrams.Get(stringToNGram(tt.ng)) 197 + havePostingList := id.contentNgrams.Get(stringToNGram(tt.ng)) 197 198 if !reflect.DeepEqual(tt.wantPostingList, havePostingList) { 198 199 t.Fatalf("\nwant:%+v\ngot: %+v", tt.wantPostingList, havePostingList) 199 200 } ··· 417 418 t.Errorf("got filename %q, want %q", got, "filename") 418 419 } 419 420 420 - if len(data.ngrams.DumpMap()) != 3 { 421 - t.Fatalf("got ngrams %v, want 3 ngrams", data.ngrams) 421 + contentNgrams := data.contentNgrams.DumpMap() 422 + if len(data.contentNgrams.DumpMap()) != 3 { 423 + t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams) 422 424 } 423 425 424 - if sec := data.ngrams.Get(stringToNGram("bcq")); sec.sz > 0 { 425 - t.Errorf("found ngram bcd in %v", data.ngrams) 426 + if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 { 427 + t.Errorf("found ngram bcd in %v", contentNgrams) 426 428 } 427 429 }, 428 430 )