···11+package index
22+33+import (
44+ "os"
55+ "strconv"
66+ "sync"
77+)
88+99+// docMatchTreeCache is a cache for docMatchTrees with random eviction.
1010+type docMatchTreeCache struct {
1111+ maxEntries int
1212+ cache map[docMatchTreeCacheKey]*docMatchTree
1313+ mu sync.RWMutex
1414+}
1515+1616+type docMatchTreeCacheKey struct {
1717+ field string
1818+ value string
1919+}
2020+2121+// newDocMatchTreeCache creates a new docMatchTreeCache.
2222+// If cacheSize is 0, the value from the ZOEKT_DOCMATCHTREE_CACHE environment
2323+// variable will be used if it is present.
2424+func newDocMatchTreeCache(cacheSize int) *docMatchTreeCache {
2525+ if v := os.Getenv("ZOEKT_DOCMATCHTREE_CACHE"); cacheSize == 0 && v != "" {
2626+ var err error
2727+ cacheSize, err = strconv.Atoi(v)
2828+ if err != nil {
2929+ cacheSize = 0
3030+ }
3131+ }
3232+ return &docMatchTreeCache{
3333+ maxEntries: cacheSize,
3434+ cache: make(map[docMatchTreeCacheKey]*docMatchTree),
3535+ }
3636+}
3737+3838+func (c *docMatchTreeCache) Get(field, value string) (*docMatchTree, bool) {
3939+ c.mu.RLock()
4040+ defer c.mu.RUnlock()
4141+ k := docMatchTreeCacheKey{field, value}
4242+ mt, ok := c.cache[k]
4343+ return mt, ok
4444+}
4545+4646+func (c *docMatchTreeCache) Add(field, value string, mt *docMatchTree) {
4747+ if c.maxEntries == 0 {
4848+ return
4949+ }
5050+ c.mu.Lock()
5151+ defer c.mu.Unlock()
5252+ k := docMatchTreeCacheKey{field, value}
5353+ c.cache[k] = mt
5454+ if len(c.cache) > c.maxEntries {
5555+ c.evictRandom()
5656+ }
5757+}
5858+5959+func (c *docMatchTreeCache) evictRandom() {
6060+ for k := range c.cache {
6161+ delete(c.cache, k)
6262+ break
6363+ }
6464+}
+98
index/docmatchtreecache_test.go
···11+package index
22+33+import (
44+ "strconv"
55+ "testing"
66+)
77+88+func TestDocMatchTreeCache_Basic(t *testing.T) {
99+ cache := newDocMatchTreeCache(2)
1010+1111+ mt1 := &docMatchTree{}
1212+ mt2 := &docMatchTree{}
1313+ mt3 := &docMatchTree{}
1414+1515+ // Add and Get
1616+ cache.Add("f1", "v1", mt1)
1717+ cache.Add("f2", "v2", mt2)
1818+ if v, ok := cache.Get("f1", "v1"); !ok || v != mt1 {
1919+ t.Errorf("expected mt1, got %v", v)
2020+ }
2121+ if v, ok := cache.Get("f2", "v2"); !ok || v != mt2 {
2222+ t.Errorf("expected mt2, got %v", v)
2323+ }
2424+2525+ // Add triggers eviction (random, so one of the two should be evicted)
2626+ cache.Add("f3", "v3", mt3)
2727+ v1, ok1 := cache.Get("f1", "v1")
2828+ v2, ok2 := cache.Get("f2", "v2")
2929+ v3, ok3 := cache.Get("f3", "v3")
3030+3131+ // Should have exactly 2 items
3232+ present := 0
3333+ if ok1 {
3434+ present++
3535+ if v1 != mt1 {
3636+ t.Errorf("expected mt1, got %v", v1)
3737+ }
3838+ }
3939+ if ok2 {
4040+ present++
4141+ if v2 != mt2 {
4242+ t.Errorf("expected mt2, got %v", v2)
4343+ }
4444+ }
4545+ if ok3 {
4646+ present++
4747+ if v3 != mt3 {
4848+ t.Errorf("expected mt3, got %v", v3)
4949+ }
5050+ }
5151+ if present != 2 {
5252+ t.Errorf("expected exactly 2 items in cache, got %d", present)
5353+ }
5454+}
5555+5656+func TestDocMatchTreeCache_Concurrent(t *testing.T) {
5757+ cache := newDocMatchTreeCache(100)
5858+5959+ // Create some test data
6060+ trees := make([]*docMatchTree, 50)
6161+ for i := range trees {
6262+ trees[i] = &docMatchTree{}
6363+ }
6464+6565+ // Start multiple goroutines doing concurrent reads and writes
6666+ const numGoroutines = 10
6767+ const numOperations = 1000
6868+6969+ done := make(chan bool, numGoroutines)
7070+7171+ // Reader goroutines (should be majority of operations)
7272+ for i := 0; i < numGoroutines-1; i++ {
7373+ go func(id int) {
7474+ for j := 0; j < numOperations; j++ {
7575+ field := "field" + strconv.Itoa(j%10)
7676+ value := "value" + strconv.Itoa(j%20)
7777+ cache.Get(field, value)
7878+ }
7979+ done <- true
8080+ }(i)
8181+ }
8282+8383+ // Writer goroutine (fewer write operations)
8484+ go func() {
8585+ for j := 0; j < numOperations/10; j++ {
8686+ field := "field" + strconv.Itoa(j%10)
8787+ value := "value" + strconv.Itoa(j%20)
8888+ tree := trees[j%len(trees)]
8989+ cache.Add(field, value, tree)
9090+ }
9191+ done <- true
9292+ }()
9393+9494+ // Wait for all goroutines to complete
9595+ for i := 0; i < numGoroutines; i++ {
9696+ <-done
9797+ }
9898+}
+3
index/indexdata.go
···103103104104 // rawConfigMasks contains the encoded RawConfig for each repository
105105 rawConfigMasks []uint8
106106+107107+ // Cache for docMatchTree objects
108108+ docMatchTreeCache *docMatchTreeCache
106109}
107110108111type symbolData struct {
+20-2
index/matchtree.go
···2323 "strings"
2424 "unicode/utf8"
25252626+ "github.com/cespare/xxhash/v2"
2627 "github.com/grafana/regexp"
27282829 "github.com/sourcegraph/zoekt"
···971972 if q == nil {
972973 return nil, fmt.Errorf("got nil (sub)query")
973974 }
975975+974976 switch s := q.(type) {
975977 case *query.Regexp:
976978 // RegexpToMatchTreeRecursive tries to distill a matchTree that matches a
···10541056 }, nil
1055105710561058 case *query.Meta:
10591059+ checksum := queryMetaChecksum(s.Field, s.Value)
10601060+ cacheKeyField := "Meta"
10611061+ if cached, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); ok {
10621062+ return cached, nil
10631063+ }
10641064+10571065 reposWant := make([]bool, len(d.repoMetaData))
10581066 for repoIdx, r := range d.repoMetaData {
10591067 if r.Metadata != nil {
···10631071 }
10641072 }
1065107310661066- return &docMatchTree{
10741074+ mt := &docMatchTree{
10671075 reason: "Meta",
10681076 numDocs: d.numDocs(),
10691077 predicate: func(docID uint32) bool {
···10731081 }
10741082 return reposWant[repoIdx]
10751083 },
10761076- }, nil
10841084+ }
10851085+ d.docMatchTreeCache.Add(cacheKeyField, checksum, mt)
10861086+ return mt, nil
1077108710781088 case *query.Substring:
10791089 return d.newSubstringMatchTree(s)
···14351445 return false
14361446 }
14371447}
14481448+14491449+func queryMetaChecksum(field string, value *regexp.Regexp) string {
14501450+ h := xxhash.New()
14511451+ h.Write([]byte(field))
14521452+ h.Write([]byte{':'})
14531453+ h.Write([]byte(value.String()))
14541454+ return fmt.Sprintf("%x", h.Sum64())
14551455+}
+30-2
index/matchtree_test.go
···376376 if err != nil {
377377 t.Fatal(err)
378378 }
379379+379380 want := []uint32{2, 4, 5}
380381 for i := range want {
381382 nextDoc := mt.nextDoc()
···433434 {Name: "r3", Metadata: map[string]string{"haystack": "needle"}},
434435 {Name: "r4", Metadata: map[string]string{"note": "test"}},
435436 },
436436- fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs
437437- repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos
437437+ fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs
438438+ repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos
439439+ docMatchTreeCache: newDocMatchTreeCache(1), // small cache to test eviction
438440 }
439441440442 q := &query.Meta{
···447449 t.Fatalf("failed to build matchTree: %v", err)
448450 }
449451452452+ // Check that the docMatchTree cache is populated correctly
453453+ checksum := queryMetaChecksum("license", regexp.MustCompile("M.T"))
454454+ cacheKeyField := "Meta"
455455+ if _, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); !ok {
456456+ t.Errorf("expected docMatchTreeCache to be populated for key (%q, %q)", cacheKeyField, checksum)
457457+ }
458458+450459 var matched []uint32
451460 for {
452461 doc := mt.nextDoc()
···462471 t.Errorf("meta match failed: got %v, want %v", matched, want)
463472 }
464473}
474474+475475+func Test_queryMetaCacheKey(t *testing.T) {
476476+ cases := []struct {
477477+ field string
478478+ pattern string
479479+ wantKey string
480480+ }{
481481+ {"metaField", "foo.*bar", "24e88a5ffec04af0"},
482482+ {"metaField", "foo.*baz", "d8d6f6a7f0725b61"},
483483+ {"otherField", "foo.*bar", "c9d07e17c028364"},
484484+ }
485485+ for _, tc := range cases {
486486+ re := regexp.MustCompile(tc.pattern)
487487+ key := queryMetaChecksum(tc.field, re)
488488+ if key != tc.wantKey {
489489+ t.Errorf("unexpected key for field=%q pattern=%q: got %q, want %q", tc.field, tc.pattern, key, tc.wantKey)
490490+ }
491491+ }
492492+}
+4
index/read.go
···257257 file: r.r,
258258 branchIDs: []map[string]uint{},
259259 branchNames: []map[uint]string{},
260260+261261+ // docMatchTreeCache is disabled by default.
262262+ // The number of max entries can be set with environment variable ZOEKT_DOCMATCHTREE_CACHE
263263+ docMatchTreeCache: newDocMatchTreeCache(0),
260264 }
261265262266 repos, md, err := r.parseMetadata(toc.metaData, toc.repoMetaData)