fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at tngl 10 kB View raw
1package e2e 2 3import ( 4 "bytes" 5 "context" 6 "flag" 7 "fmt" 8 "io" 9 "net/url" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/google/go-cmp/cmp" 18 19 "github.com/sourcegraph/zoekt" 20 "github.com/sourcegraph/zoekt/index" 21 "github.com/sourcegraph/zoekt/internal/archive" 22 "github.com/sourcegraph/zoekt/query" 23 "github.com/sourcegraph/zoekt/search" 24) 25 26var update = flag.Bool("update", false, "update golden file") 27 28var useShardCache = flag.Bool("shard_cache", false, "cache computed shards for faster test runs") 29 30// debugScore can be set to include much more output. Do not commit the 31// updated golden files, this is purely used for debugging in a local 32// environment. 33var debugScore = flag.Bool("debug_score", false, "include debug output in golden files.") 34 35func TestRanking(t *testing.T) { 36 if testing.Short() { 37 t.Skip("skipping due to short flag") 38 } 39 40 requireCTags(t) 41 42 archiveURLs := []string{ 43 "https://github.com/sourcegraph/sourcegraph-public-snapshot/tree/v5.2.2", // Nov 1 2023 44 "https://github.com/golang/go/tree/go1.21.4", // Nov 7 2023 45 "https://github.com/sourcegraph/cody-public-snapshot/tree/vscode-v0.14.5", // Nov 8 2023 46 // The commit before ranking e2e tests were added to avoid matching 47 // content inside our golden files. 48 "https://github.com/sourcegraph/zoekt/commit/ef907c2371176aa3f97713d5bf182983ef090c6a", // Nov 17 2023 49 "https://github.com/sourcegraph/conc/tree/5f936abd7ae87036af1f75c95fb9d0daaf00116b", // Jan 21 2024 50 } 51 q := func(query, target string) rankingQuery { 52 return rankingQuery{Query: query, Target: target} 53 } 54 queries := []rankingQuery{ 55 // golang/go 56 q("test server", "github.com/golang/go/src/net/http/httptest/server.go"), 57 q("bytes buffer", "github.com/golang/go/src/bytes/buffer.go"), 58 q("bufio buffer", "github.com/golang/go/src/bufio/scan.go"), 59 q("time compare\\(", "github.com/golang/go/src/time/time.go"), 60 61 // sourcegraph/sourcegraph 62 q("graphql type User", "github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/schema.graphql"), 63 q("Get database/user", "github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/users.go"), 64 q("InternalDoer", "github.com/sourcegraph/sourcegraph-public-snapshot/internal/httpcli/client.go"), 65 q("Repository metadata Write rbac", "github.com/sourcegraph/sourcegraph-public-snapshot/internal/rbac/constants.go"), // unsure if this is the best doc? 66 67 // cody 68 q("generate unit test", "github.com/sourcegraph/cody-public-snapshot/lib/shared/src/chat/recipes/generate-test.ts"), 69 q("r:cody sourcegraph url", "github.com/sourcegraph/cody-public-snapshot/lib/shared/src/sourcegraph-api/graphql/client.ts"), 70 71 // zoekt 72 q("zoekt searcher", "github.com/sourcegraph/zoekt/api.go"), 73 74 // exact phrases 75 q("assets are not configured for this binary", "github.com/sourcegraph/sourcegraph-public-snapshot/ui/assets/assets.go"), 76 q("sourcegraph/server docker image build", "github.com/sourcegraph/sourcegraph-public-snapshot/dev/tools.go"), 77 78 // symbols split up 79 q("bufio flush writer", "github.com/golang/go/src/net/http/transfer.go"), // bufioFlushWriter 80 q("coverage data writer", "github.com/golang/go/src/internal/coverage/encodecounter/encode.go"), // CoverageDataWriter 81 82 // sourcegraph/conc vs golang/go 83 q("WaitGroup", "github.com/sourcegraph/conc/waitgroup.go"), 84 } 85 86 var indexDir string 87 if *useShardCache { 88 t.Logf("reusing index dir to speed up testing. If you have unexpected results remove %s", shardCache) 89 indexDir = shardCache 90 } else { 91 indexDir = t.TempDir() 92 } 93 94 for _, u := range archiveURLs { 95 if err := indexURL(indexDir, u); err != nil { 96 t.Fatal(err) 97 } 98 } 99 100 ss, err := search.NewDirectorySearcher(indexDir) 101 if err != nil { 102 t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err) 103 } 104 defer ss.Close() 105 106 var ranks []int 107 for _, rq := range queries { 108 // normalise queryStr for writing to fs 109 name := strings.Map(func(r rune) rune { 110 if strings.ContainsRune(" :", r) { 111 return '_' 112 } 113 if '0' <= r && r <= '9' || 114 'a' <= r && r <= 'z' || 115 'A' <= r && r <= 'Z' { 116 return r 117 } 118 return -1 119 }, rq.Query) 120 121 t.Run(name, func(t *testing.T) { 122 q, err := query.Parse(rq.Query) 123 if err != nil { 124 t.Fatal(err) 125 } 126 127 sOpts := zoekt.SearchOptions{ 128 // Use the same options sourcegraph has by default 129 ChunkMatches: true, 130 MaxWallTime: 20 * time.Second, 131 ShardMaxMatchCount: 10_000 * 10, 132 TotalMaxMatchCount: 100_000 * 10, 133 MaxDocDisplayCount: 500, 134 135 DebugScore: *debugScore, 136 } 137 result, err := ss.Search(context.Background(), q, &sOpts) 138 if err != nil { 139 t.Fatal(err) 140 } 141 142 ranks = append(ranks, targetRank(rq, result.Files)) 143 144 var gotBuf bytes.Buffer 145 marshalMatches(&gotBuf, rq, q, result.Files) 146 assertGolden(t, name, gotBuf.Bytes()) 147 }) 148 } 149 150 t.Run("rank_stats", func(t *testing.T) { 151 if len(ranks) != len(queries) { 152 t.Skip("not computing rank stats since not all query cases ran") 153 } 154 155 var gotBuf bytes.Buffer 156 printf := func(format string, a ...any) { 157 _, _ = fmt.Fprintf(&gotBuf, format, a...) 158 } 159 160 printf("queries: %d\n", len(ranks)) 161 162 for _, recallThreshold := range []int{1, 5} { 163 count := 0 164 for _, rank := range ranks { 165 if rank <= recallThreshold && rank > 0 { 166 count++ 167 } 168 } 169 countp := float64(count) * 100 / float64(len(ranks)) 170 printf("recall@%d: %d (%.0f%%)\n", recallThreshold, count, countp) 171 } 172 173 // Mean reciprocal rank 174 mrr := float64(0) 175 for _, rank := range ranks { 176 if rank > 0 { 177 mrr += 1 / float64(rank) 178 } 179 } 180 mrr /= float64(len(ranks)) 181 printf("mrr: %f\n", mrr) 182 183 assertGolden(t, "rank_stats", gotBuf.Bytes()) 184 }) 185} 186 187func assertGolden(t *testing.T, name string, got []byte) { 188 t.Helper() 189 190 wantPath := filepath.Join("testdata", name+".txt") 191 if *update { 192 if err := os.WriteFile(wantPath, got, 0o600); err != nil { 193 t.Fatal(err) 194 } 195 } 196 want, err := os.ReadFile(wantPath) 197 if err != nil { 198 t.Fatal(err) 199 } 200 201 if d := cmp.Diff(string(want), string(got)); d != "" { 202 t.Fatalf("unexpected (-want, +got):\n%s", d) 203 } 204} 205 206type rankingQuery struct { 207 Query string 208 Target string 209} 210 211var ( 212 tarballCache = "/tmp/zoekt-test-ranking-tarballs-" + os.Getenv("USER") 213 shardCache = "/tmp/zoekt-test-ranking-shards-" + os.Getenv("USER") 214) 215 216func indexURL(indexDir, u string) error { 217 if err := os.MkdirAll(tarballCache, 0o700); err != nil { 218 return err 219 } 220 221 opts := archive.Options{ 222 Archive: u, 223 Incremental: true, 224 } 225 opts.SetDefaults() // sets metadata like Name and the codeload URL 226 u = opts.Archive 227 228 // if opts.Commit is set but opts.Branch is not, then we just need to give 229 // the commit a name for testing. 230 if opts.Commit != "" && opts.Branch == "" { 231 opts.Branch = "test" 232 } 233 234 // update Archive location to cached location 235 cacheBase := fmt.Sprintf("%s-%s%s.tar.gz", url.QueryEscape(opts.Name), opts.Branch, opts.Commit) // assume .tar.gz 236 path := filepath.Join(tarballCache, cacheBase) 237 opts.Archive = path 238 239 if _, err := os.Stat(path); os.IsNotExist(err) { 240 if err := download(u, path); err != nil { 241 return err 242 } 243 } 244 245 // TODO scip 246 // languageMap := make(ctags.LanguageMap) 247 // for _, lang := range []string{"kotlin", "rust", "ruby", "go", "python", "javascript", "c_sharp", "scala", "typescript", "zig"} { 248 // languageMap[lang] = ctags.ScipCTags 249 // } 250 251 err := archive.Index(opts, index.Options{ 252 IndexDir: indexDir, 253 CTagsMustSucceed: true, 254 RepositoryDescription: zoekt.Repository{ 255 // Use the latest commit date to calculate the repo rank when loading the shard. 256 // This is the same setting we use in production. 257 RawConfig: map[string]string{"latestCommitDate": "1"}, 258 }, 259 }) 260 if err != nil { 261 return fmt.Errorf("failed to index %s: %w", opts.Archive, err) 262 } 263 264 return nil 265} 266 267func download(url, dst string) error { 268 tmpPath := dst + ".part" 269 270 rc, err := archive.OpenReader(url) 271 if err != nil { 272 return err 273 } 274 defer rc.Close() 275 276 f, err := os.OpenFile(tmpPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) 277 if err != nil { 278 return err 279 } 280 defer f.Close() 281 282 _, err = io.Copy(f, rc) 283 if err != nil { 284 return err 285 } 286 287 err = f.Close() 288 if err != nil { 289 return err 290 } 291 292 return os.Rename(tmpPath, dst) 293} 294 295const ( 296 chunkMatchesPerFile = 3 297 fileMatchesPerSearch = 6 298) 299 300func docName(f zoekt.FileMatch) string { 301 return f.Repository + "/" + f.FileName 302} 303 304func marshalMatches(w io.Writer, rq rankingQuery, q query.Q, files []zoekt.FileMatch) { 305 _, _ = fmt.Fprintf(w, "queryString: %s\n", rq.Query) 306 _, _ = fmt.Fprintf(w, "query: %s\n", q) 307 _, _ = fmt.Fprintf(w, "targetRank: %d\n\n", targetRank(rq, files)) 308 309 files, hiddenFiles := splitAtIndex(files, fileMatchesPerSearch) 310 for _, f := range files { 311 doc := docName(f) 312 if doc == rq.Target { 313 doc = "**" + doc + "**" 314 } 315 _, _ = fmt.Fprintf(w, "%s%s\n", doc, addTabIfNonEmpty(f.Debug)) 316 317 chunks, hidden := splitAtIndex(f.ChunkMatches, chunkMatchesPerFile) 318 319 for _, m := range chunks { 320 _, _ = fmt.Fprintf(w, "%d:%s%s\n", m.ContentStart.LineNumber, strings.TrimRight(string(m.Content), "\n"), addTabIfNonEmpty(m.DebugScore)) 321 } 322 323 if len(hidden) > 0 { 324 _, _ = fmt.Fprintf(w, "hidden %d more line matches\n", len(hidden)) 325 } 326 _, _ = fmt.Fprintln(w) 327 } 328 329 if len(hiddenFiles) > 0 { 330 fmt.Fprintf(w, "hidden %d more file matches\n", len(hiddenFiles)) 331 } 332} 333 334func targetRank(rq rankingQuery, files []zoekt.FileMatch) int { 335 for i, f := range files { 336 if docName(f) == rq.Target { 337 return i + 1 338 } 339 } 340 return -1 341} 342 343func splitAtIndex[E any](s []E, idx int) ([]E, []E) { 344 if idx < len(s) { 345 return s[:idx], s[idx:] 346 } 347 return s, nil 348} 349 350func addTabIfNonEmpty(s string) string { 351 if s != "" { 352 return "\t" + s 353 } 354 return s 355} 356 357func requireCTags(tb testing.TB) { 358 tb.Helper() 359 360 if os.Getenv("CTAGS_COMMAND") != "" { 361 return 362 } 363 if _, err := exec.LookPath("universal-ctags"); err == nil { 364 return 365 } 366 367 // On CI we require ctags to be available. Otherwise we skip 368 if os.Getenv("CI") != "" { 369 tb.Fatal("universal-ctags is missing") 370 } else { 371 tb.Skip("universal-ctags is missing") 372 } 373} 374 375func checkScipCTags() string { 376 if ctags := os.Getenv("SCIP_CTAGS_COMMAND"); ctags != "" { 377 return ctags 378 } 379 380 if ctags, err := exec.LookPath("scip-ctags"); err == nil { 381 return ctags 382 } 383 384 return "" 385}