fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at tngl 9.5 kB View raw
1// Copyright 2026 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package hybridre2 16 17import ( 18 "fmt" 19 "testing" 20 21 grafanaregexp "github.com/grafana/regexp" 22) 23 24// withThreshold overrides the effective threshold for the duration of the test 25// and registers a t.Cleanup to restore it afterwards. 26// 27// NOT safe for concurrent use: do not call t.Parallel() after withThreshold, 28// and do not use it from TestMain or init(). 29func withThreshold(tb testing.TB, thresh int64) { 30 tb.Helper() 31 old := threshold 32 threshold = func() int64 { return thresh } 33 tb.Cleanup(func() { threshold = old }) 34} 35 36// ---- unit tests ---- 37 38func TestCompileValid(t *testing.T) { 39 _, err := Compile(`foo.*bar`) 40 if err != nil { 41 t.Fatalf("unexpected error: %v", err) 42 } 43} 44 45func TestCompileInvalid(t *testing.T) { 46 _, err := Compile(`[invalid`) 47 if err == nil { 48 t.Fatal("expected error for invalid pattern, got nil") 49 } 50} 51 52func TestMustCompilePanics(t *testing.T) { 53 defer func() { 54 if r := recover(); r == nil { 55 t.Fatal("MustCompile should panic on invalid pattern") 56 } 57 }() 58 MustCompile(`[invalid`) 59} 60 61func TestString(t *testing.T) { 62 const pat = `foo.*bar` 63 re := MustCompile(pat) 64 if re.String() != pat { 65 t.Fatalf("String() = %q, want %q", re.String(), pat) 66 } 67} 68 69// TestFindAllIndexDisabled checks that with threshold=-1, we use grafana/regexp. 70func TestFindAllIndexDisabled(t *testing.T) { 71 corpus := []byte("func main() { fmt.Println(\"hello world\") }") 72 patterns := []string{`\w+`, `fmt\.\w+`, `(?i)MAIN`, `"[^"]*"`} 73 74 withThreshold(t, disabled) 75 for _, pat := range patterns { 76 hybrid := MustCompile(pat) 77 grafana := grafanaregexp.MustCompile(pat) 78 got := hybrid.FindAllIndex(corpus, -1) 79 want := grafana.FindAllIndex(corpus, -1) 80 if !equalIndexSlices(got, want) { 81 t.Errorf("disabled mode, pattern %q: hybrid=%v grafana=%v", pat, got, want) 82 } 83 } 84} 85 86// TestFindAllIndexForcedRE2 checks that with threshold=0, go-re2 is used and 87// produces identical results to grafana/regexp for standard patterns. 88func TestFindAllIndexForcedRE2(t *testing.T) { 89 corpus := []byte("func main() { fmt.Println(\"hello world\") }") 90 patterns := []string{`\w+`, `fmt\.\w+`, `(?i)MAIN`, `"[^"]*"`} 91 92 withThreshold(t, 0) 93 for _, pat := range patterns { 94 hybrid := MustCompile(pat) 95 grafana := grafanaregexp.MustCompile(pat) 96 got := hybrid.FindAllIndex(corpus, -1) 97 want := grafana.FindAllIndex(corpus, -1) 98 if !equalIndexSlices(got, want) { 99 t.Errorf("forced-re2 mode, pattern %q: hybrid=%v grafana=%v", pat, got, want) 100 } 101 } 102} 103 104// TestThresholdSwitching verifies the engine switches at the configured byte boundary. 105func TestThresholdSwitching(t *testing.T) { 106 const thresh = int64(512) 107 pattern := `func\s+\w+` 108 grafana := grafanaregexp.MustCompile(pattern) 109 110 smallCorpus := makeCorpus(300) // < 512 111 largeCorpus := makeCorpus(600) // >= 512 112 113 withThreshold(t, thresh) 114 hybrid := MustCompile(pattern) 115 116 for _, tc := range []struct { 117 name string 118 corpus []byte 119 }{ 120 {"small(<threshold)", smallCorpus}, 121 {"large(>=threshold)", largeCorpus}, 122 } { 123 got := hybrid.FindAllIndex(tc.corpus, -1) 124 want := grafana.FindAllIndex(tc.corpus, -1) 125 if !equalIndexSlices(got, want) { 126 t.Errorf("%s: hybrid=%v grafana=%v", tc.name, got, want) 127 } 128 } 129} 130 131// TestFindAllIndexIdenticalResults is a comprehensive correctness sweep across 132// pattern types and input sizes, asserting identical match positions. 133func TestFindAllIndexIdenticalResults(t *testing.T) { 134 patterns := []struct { 135 name string 136 pattern string 137 }{ 138 {"literal", `hello`}, 139 {"case-insensitive", `(?i)Hello`}, 140 {"word-boundary", `\bfunc\b`}, 141 {"alternation", `foo|bar|baz`}, 142 {"char-class", `[a-zA-Z_]\w*`}, 143 {"complex", `(func|var|const)\s+[A-Z]\w*`}, 144 {"dot-plus", `.+`}, 145 {"anchored-line", `(?m)^package\s+\w+`}, 146 {"no-match", `XYZZY_NEVER_MATCHES`}, 147 } 148 149 sizes := []struct { 150 name string 151 size int 152 }{ 153 {"64B", 64}, 154 {"512B", 512}, 155 {"4KB", 4 * 1024}, 156 {"64KB", 64 * 1024}, 157 {"256KB", 256 * 1024}, 158 } 159 160 // Force re2 path to test its correctness across all sizes. 161 withThreshold(t, 0) 162 for _, sz := range sizes { 163 corpus := makeCorpus(sz.size) 164 for _, pat := range patterns { 165 name := sz.name + "/" + pat.name 166 t.Run(name, func(t *testing.T) { 167 hybrid := MustCompile(pat.pattern) 168 grafana := grafanaregexp.MustCompile(pat.pattern) 169 170 got := hybrid.FindAllIndex(corpus, -1) 171 want := grafana.FindAllIndex(corpus, -1) 172 if !equalIndexSlices(got, want) { 173 t.Errorf("pattern=%q size=%d: len(hybrid)=%d len(grafana)=%d", 174 pat.pattern, sz.size, len(got), len(want)) 175 if len(got) > 0 && len(want) > 0 { 176 t.Errorf(" first hybrid=%v first grafana=%v", got[0], want[0]) 177 } 178 } 179 }) 180 } 181 } 182} 183 184// TestFindAllIndexLimitN verifies that the n parameter (match count limit) is 185// honoured identically by both engines. 186func TestFindAllIndexLimitN(t *testing.T) { 187 corpus := makeCorpus(64 * 1024) // large enough to have many matches 188 patterns := []string{`func\s+\w+`, `\bvar\b`, `[A-Z]\w*`} 189 190 withThreshold(t, 0) // force re2 path 191 for _, pat := range patterns { 192 hybrid := MustCompile(pat) 193 grafana := grafanaregexp.MustCompile(pat) 194 195 got := hybrid.FindAllIndex(corpus, 1) 196 want := grafana.FindAllIndex(corpus, 1) 197 if !equalIndexSlices(got, want) { 198 t.Errorf("n=1, pattern=%q: hybrid=%v grafana=%v", pat, got, want) 199 } 200 // Sanity: n=1 should return at most one match. 201 if len(got) > 1 { 202 t.Errorf("n=1, pattern=%q: got %d matches, want <= 1", pat, len(got)) 203 } 204 } 205} 206 207// TestNoMatchReturnsEmpty verifies no-match returns nil/empty consistently. 208func TestNoMatchReturnsEmpty(t *testing.T) { 209 corpus := makeCorpus(1024) 210 211 for _, thresh := range []int64{disabled, 0} { 212 t.Run(fmt.Sprintf("thresh=%d", thresh), func(t *testing.T) { 213 withThreshold(t, thresh) 214 // MustCompile must be after withThreshold so that the lazy RE2 215 // compilation in Compile() sees the overridden threshold and 216 // actually initialises re.re2 when thresh=0. 217 re := MustCompile(`XYZZY_NEVER_MATCHES`) 218 if got := re.FindAllIndex(corpus, -1); len(got) != 0 { 219 t.Errorf("thresh=%d: expected empty, got %v", thresh, got) 220 } 221 }) 222 } 223} 224 225// ---- benchmarks ---- 226 227// BenchmarkEngines measures FindAllIndex performance for grafana/regexp vs 228// go-re2 across multiple input sizes and pattern complexities. 229// 230// Run with: 231// 232// go test -bench=BenchmarkEngines -benchmem -benchtime=3s ./internal/hybridre2/ 233func BenchmarkEngines(b *testing.B) { 234 patterns := []struct { 235 name string 236 pattern string 237 }{ 238 {"literal", `main`}, 239 {"case-insensitive", `(?i)func`}, 240 {"alternation-5", `func|var|const|type|import`}, 241 {"complex", `(func|var)\s+[A-Z]\w*\s*\(`}, 242 {"hard-no-match", `XYZZY_NEVER_MATCHES_AT_ALL`}, 243 } 244 245 sizes := []struct { 246 name string 247 size int 248 }{ 249 {"512B", 512}, 250 {"4KB", 4 * 1024}, 251 {"32KB", 32 * 1024}, 252 {"128KB", 128 * 1024}, 253 {"512KB", 512 * 1024}, 254 } 255 256 // Pre-build all corpora outside the benchmark loop. 257 corpora := make(map[string][]byte, len(sizes)) 258 for _, sz := range sizes { 259 corpora[sz.name] = makeCorpus(sz.size) 260 } 261 262 for _, pat := range patterns { 263 grafanaRe := grafanaregexp.MustCompile(pat.pattern) 264 265 for _, sz := range sizes { 266 corpus := corpora[sz.name] 267 name := pat.name + "/" + sz.name 268 269 b.Run("grafana/"+name, func(b *testing.B) { 270 b.SetBytes(int64(len(corpus))) 271 b.ResetTimer() 272 for i := 0; i < b.N; i++ { 273 _ = grafanaRe.FindAllIndex(corpus, -1) 274 } 275 }) 276 277 b.Run("go-re2/"+name, func(b *testing.B) { 278 withThreshold(b, 0) // force re2 for all sizes 279 // MustCompile must be after withThreshold so that re.re2 280 // is initialised (lazy compilation checks threshold() at 281 // compile time, not match time). 282 hybridRe := MustCompile(pat.pattern) 283 b.SetBytes(int64(len(corpus))) 284 b.ResetTimer() 285 for i := 0; i < b.N; i++ { 286 _ = hybridRe.FindAllIndex(corpus, -1) 287 } 288 }) 289 } 290 } 291} 292 293// ---- helpers ---- 294 295// makeCorpus returns a realistic-looking Go source corpus of approximately 296// the requested size. 297func makeCorpus(size int) []byte { 298 const template = `package main 299 300import ( 301 "fmt" 302 "strings" 303) 304 305// Foo is an exported function that transforms its input. 306func Foo(input string) string { 307 return strings.ToUpper(input) 308} 309 310// Bar demonstrates calling Foo. 311func Bar() { 312 result := Foo("hello world") 313 fmt.Println(result) 314} 315 316var globalVar = "some value" 317const MaxItems = 100 318 319type MyStruct struct { 320 Name string 321 Value int 322} 323 324func (m MyStruct) String() string { 325 return fmt.Sprintf("%s=%d", m.Name, m.Value) 326} 327 328` 329 buf := make([]byte, 0, size) 330 for len(buf) < size { 331 buf = append(buf, []byte(template)...) 332 } 333 return buf[:size] 334} 335 336func equalIndexSlices(a, b [][]int) bool { 337 if len(a) != len(b) { 338 return false 339 } 340 for i := range a { 341 if !equalIntSlice(a[i], b[i]) { 342 return false 343 } 344 } 345 return true 346} 347 348func equalIntSlice(a, b []int) bool { 349 if len(a) != len(b) { 350 return false 351 } 352 for i := range a { 353 if a[i] != b[i] { 354 return false 355 } 356 } 357 return true 358}