fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Cache docMatchTree results for Meta conditions (#982)

author
John Mason
committer
GitHub
date (Sep 22, 2025, 2:53 PM +0200) commit 98307ca8 parent 4e4a529c
+219 -4
+64
index/docmatchtreecache.go
··· 1 + package index 2 + 3 + import ( 4 + "os" 5 + "strconv" 6 + "sync" 7 + ) 8 + 9 + // docMatchTreeCache is a cache for docMatchTrees with random eviction. 10 + type docMatchTreeCache struct { 11 + maxEntries int 12 + cache map[docMatchTreeCacheKey]*docMatchTree 13 + mu sync.RWMutex 14 + } 15 + 16 + type docMatchTreeCacheKey struct { 17 + field string 18 + value string 19 + } 20 + 21 + // newDocMatchTreeCache creates a new docMatchTreeCache. 22 + // If cacheSize is 0, the value from the ZOEKT_DOCMATCHTREE_CACHE environment 23 + // variable will be used if it is present. 24 + func newDocMatchTreeCache(cacheSize int) *docMatchTreeCache { 25 + if v := os.Getenv("ZOEKT_DOCMATCHTREE_CACHE"); cacheSize == 0 && v != "" { 26 + var err error 27 + cacheSize, err = strconv.Atoi(v) 28 + if err != nil { 29 + cacheSize = 0 30 + } 31 + } 32 + return &docMatchTreeCache{ 33 + maxEntries: cacheSize, 34 + cache: make(map[docMatchTreeCacheKey]*docMatchTree), 35 + } 36 + } 37 + 38 + func (c *docMatchTreeCache) Get(field, value string) (*docMatchTree, bool) { 39 + c.mu.RLock() 40 + defer c.mu.RUnlock() 41 + k := docMatchTreeCacheKey{field, value} 42 + mt, ok := c.cache[k] 43 + return mt, ok 44 + } 45 + 46 + func (c *docMatchTreeCache) Add(field, value string, mt *docMatchTree) { 47 + if c.maxEntries == 0 { 48 + return 49 + } 50 + c.mu.Lock() 51 + defer c.mu.Unlock() 52 + k := docMatchTreeCacheKey{field, value} 53 + c.cache[k] = mt 54 + if len(c.cache) > c.maxEntries { 55 + c.evictRandom() 56 + } 57 + } 58 + 59 + func (c *docMatchTreeCache) evictRandom() { 60 + for k := range c.cache { 61 + delete(c.cache, k) 62 + break 63 + } 64 + }
+98
index/docmatchtreecache_test.go
··· 1 + package index 2 + 3 + import ( 4 + "strconv" 5 + "testing" 6 + ) 7 + 8 + func TestDocMatchTreeCache_Basic(t *testing.T) { 9 + cache := newDocMatchTreeCache(2) 10 + 11 + mt1 := &docMatchTree{} 12 + mt2 := &docMatchTree{} 13 + mt3 := &docMatchTree{} 14 + 15 + // Add and Get 16 + cache.Add("f1", "v1", mt1) 17 + cache.Add("f2", "v2", mt2) 18 + if v, ok := cache.Get("f1", "v1"); !ok || v != mt1 { 19 + t.Errorf("expected mt1, got %v", v) 20 + } 21 + if v, ok := cache.Get("f2", "v2"); !ok || v != mt2 { 22 + t.Errorf("expected mt2, got %v", v) 23 + } 24 + 25 + // Add triggers eviction (random, so one of the two should be evicted) 26 + cache.Add("f3", "v3", mt3) 27 + v1, ok1 := cache.Get("f1", "v1") 28 + v2, ok2 := cache.Get("f2", "v2") 29 + v3, ok3 := cache.Get("f3", "v3") 30 + 31 + // Should have exactly 2 items 32 + present := 0 33 + if ok1 { 34 + present++ 35 + if v1 != mt1 { 36 + t.Errorf("expected mt1, got %v", v1) 37 + } 38 + } 39 + if ok2 { 40 + present++ 41 + if v2 != mt2 { 42 + t.Errorf("expected mt2, got %v", v2) 43 + } 44 + } 45 + if ok3 { 46 + present++ 47 + if v3 != mt3 { 48 + t.Errorf("expected mt3, got %v", v3) 49 + } 50 + } 51 + if present != 2 { 52 + t.Errorf("expected exactly 2 items in cache, got %d", present) 53 + } 54 + } 55 + 56 + func TestDocMatchTreeCache_Concurrent(t *testing.T) { 57 + cache := newDocMatchTreeCache(100) 58 + 59 + // Create some test data 60 + trees := make([]*docMatchTree, 50) 61 + for i := range trees { 62 + trees[i] = &docMatchTree{} 63 + } 64 + 65 + // Start multiple goroutines doing concurrent reads and writes 66 + const numGoroutines = 10 67 + const numOperations = 1000 68 + 69 + done := make(chan bool, numGoroutines) 70 + 71 + // Reader goroutines (should be majority of operations) 72 + for i := 0; i < numGoroutines-1; i++ { 73 + go func(id int) { 74 + for j := 0; j < numOperations; j++ { 75 + field := "field" + strconv.Itoa(j%10) 76 + value := "value" + strconv.Itoa(j%20) 77 + cache.Get(field, value) 78 + } 79 + done <- true 80 + }(i) 81 + } 82 + 83 + // Writer goroutine (fewer write operations) 84 + go func() { 85 + for j := 0; j < numOperations/10; j++ { 86 + field := "field" + strconv.Itoa(j%10) 87 + value := "value" + strconv.Itoa(j%20) 88 + tree := trees[j%len(trees)] 89 + cache.Add(field, value, tree) 90 + } 91 + done <- true 92 + }() 93 + 94 + // Wait for all goroutines to complete 95 + for i := 0; i < numGoroutines; i++ { 96 + <-done 97 + } 98 + }
+3
index/indexdata.go
··· 103 103 104 104 // rawConfigMasks contains the encoded RawConfig for each repository 105 105 rawConfigMasks []uint8 106 + 107 + // Cache for docMatchTree objects 108 + docMatchTreeCache *docMatchTreeCache 106 109 } 107 110 108 111 type symbolData struct {
+20 -2
index/matchtree.go
··· 23 23 "strings" 24 24 "unicode/utf8" 25 25 26 + "github.com/cespare/xxhash/v2" 26 27 "github.com/grafana/regexp" 27 28 28 29 "github.com/sourcegraph/zoekt" ··· 971 972 if q == nil { 972 973 return nil, fmt.Errorf("got nil (sub)query") 973 974 } 975 + 974 976 switch s := q.(type) { 975 977 case *query.Regexp: 976 978 // RegexpToMatchTreeRecursive tries to distill a matchTree that matches a ··· 1054 1056 }, nil 1055 1057 1056 1058 case *query.Meta: 1059 + checksum := queryMetaChecksum(s.Field, s.Value) 1060 + cacheKeyField := "Meta" 1061 + if cached, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); ok { 1062 + return cached, nil 1063 + } 1064 + 1057 1065 reposWant := make([]bool, len(d.repoMetaData)) 1058 1066 for repoIdx, r := range d.repoMetaData { 1059 1067 if r.Metadata != nil { ··· 1063 1071 } 1064 1072 } 1065 1073 1066 - return &docMatchTree{ 1074 + mt := &docMatchTree{ 1067 1075 reason: "Meta", 1068 1076 numDocs: d.numDocs(), 1069 1077 predicate: func(docID uint32) bool { ··· 1073 1081 } 1074 1082 return reposWant[repoIdx] 1075 1083 }, 1076 - }, nil 1084 + } 1085 + d.docMatchTreeCache.Add(cacheKeyField, checksum, mt) 1086 + return mt, nil 1077 1087 1078 1088 case *query.Substring: 1079 1089 return d.newSubstringMatchTree(s) ··· 1435 1445 return false 1436 1446 } 1437 1447 } 1448 + 1449 + func queryMetaChecksum(field string, value *regexp.Regexp) string { 1450 + h := xxhash.New() 1451 + h.Write([]byte(field)) 1452 + h.Write([]byte{':'}) 1453 + h.Write([]byte(value.String())) 1454 + return fmt.Sprintf("%x", h.Sum64()) 1455 + }
+30 -2
index/matchtree_test.go
··· 376 376 if err != nil { 377 377 t.Fatal(err) 378 378 } 379 + 379 380 want := []uint32{2, 4, 5} 380 381 for i := range want { 381 382 nextDoc := mt.nextDoc() ··· 433 434 {Name: "r3", Metadata: map[string]string{"haystack": "needle"}}, 434 435 {Name: "r4", Metadata: map[string]string{"note": "test"}}, 435 436 }, 436 - fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs 437 - repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos 437 + fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs 438 + repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos 439 + docMatchTreeCache: newDocMatchTreeCache(1), // small cache to test eviction 438 440 } 439 441 440 442 q := &query.Meta{ ··· 447 449 t.Fatalf("failed to build matchTree: %v", err) 448 450 } 449 451 452 + // Check that the docMatchTree cache is populated correctly 453 + checksum := queryMetaChecksum("license", regexp.MustCompile("M.T")) 454 + cacheKeyField := "Meta" 455 + if _, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); !ok { 456 + t.Errorf("expected docMatchTreeCache to be populated for key (%q, %q)", cacheKeyField, checksum) 457 + } 458 + 450 459 var matched []uint32 451 460 for { 452 461 doc := mt.nextDoc() ··· 462 471 t.Errorf("meta match failed: got %v, want %v", matched, want) 463 472 } 464 473 } 474 + 475 + func Test_queryMetaCacheKey(t *testing.T) { 476 + cases := []struct { 477 + field string 478 + pattern string 479 + wantKey string 480 + }{ 481 + {"metaField", "foo.*bar", "24e88a5ffec04af0"}, 482 + {"metaField", "foo.*baz", "d8d6f6a7f0725b61"}, 483 + {"otherField", "foo.*bar", "c9d07e17c028364"}, 484 + } 485 + for _, tc := range cases { 486 + re := regexp.MustCompile(tc.pattern) 487 + key := queryMetaChecksum(tc.field, re) 488 + if key != tc.wantKey { 489 + t.Errorf("unexpected key for field=%q pattern=%q: got %q, want %q", tc.field, tc.pattern, key, tc.wantKey) 490 + } 491 + } 492 + }
+4
index/read.go
··· 257 257 file: r.r, 258 258 branchIDs: []map[string]uint{}, 259 259 branchNames: []map[uint]string{}, 260 + 261 + // docMatchTreeCache is disabled by default. 262 + // The number of max entries can be set with environment variable ZOEKT_DOCMATCHTREE_CACHE 263 + docMatchTreeCache: newDocMatchTreeCache(0), 260 264 } 261 265 262 266 repos, md, err := r.parseMetadata(toc.metaData, toc.repoMetaData)