fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package e2e 2 3import ( 4 "bytes" 5 "context" 6 "flag" 7 "fmt" 8 "io" 9 "net/url" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/google/go-cmp/cmp" 18 "github.com/sourcegraph/zoekt" 19 "github.com/sourcegraph/zoekt/index" 20 "github.com/sourcegraph/zoekt/internal/archive" 21 "github.com/sourcegraph/zoekt/query" 22 "github.com/sourcegraph/zoekt/search" 23) 24 25var update = flag.Bool("update", false, "update golden file") 26 27var useShardCache = flag.Bool("shard_cache", false, "cache computed shards for faster test runs") 28 29// debugScore can be set to include much more output. Do not commit the 30// updated golden files, this is purely used for debugging in a local 31// environment. 32var debugScore = flag.Bool("debug_score", false, "include debug output in golden files.") 33 34func TestRanking(t *testing.T) { 35 if testing.Short() { 36 t.Skip("skipping due to short flag") 37 } 38 39 requireCTags(t) 40 41 archiveURLs := []string{ 42 "https://github.com/sourcegraph/sourcegraph-public-snapshot/tree/v5.2.2", // Nov 1 2023 43 "https://github.com/golang/go/tree/go1.21.4", // Nov 7 2023 44 "https://github.com/sourcegraph/cody/tree/vscode-v0.14.5", // Nov 8 2023 45 // The commit before ranking e2e tests were added to avoid matching 46 // content inside our golden files. 47 "https://github.com/sourcegraph/zoekt/commit/ef907c2371176aa3f97713d5bf182983ef090c6a", // Nov 17 2023 48 "https://github.com/sourcegraph/conc/tree/5f936abd7ae87036af1f75c95fb9d0daaf00116b", // Jan 21 2024 49 } 50 q := func(query, target string) rankingQuery { 51 return rankingQuery{Query: query, Target: target} 52 } 53 queries := []rankingQuery{ 54 // golang/go 55 q("test server", "github.com/golang/go/src/net/http/httptest/server.go"), 56 q("bytes buffer", "github.com/golang/go/src/bytes/buffer.go"), 57 q("bufio buffer", "github.com/golang/go/src/bufio/scan.go"), 58 q("time compare\\(", "github.com/golang/go/src/time/time.go"), 59 60 // sourcegraph/sourcegraph 61 q("graphql type User", "github.com/sourcegraph/sourcegraph-public-snapshot/cmd/frontend/graphqlbackend/schema.graphql"), 62 q("Get database/user", "github.com/sourcegraph/sourcegraph-public-snapshot/internal/database/users.go"), 63 q("InternalDoer", "github.com/sourcegraph/sourcegraph-public-snapshot/internal/httpcli/client.go"), 64 q("Repository metadata Write rbac", "github.com/sourcegraph/sourcegraph-public-snapshot/internal/rbac/constants.go"), // unsure if this is the best doc? 65 66 // cody 67 q("generate unit test", "github.com/sourcegraph/cody/lib/shared/src/chat/recipes/generate-test.ts"), 68 q("r:cody sourcegraph url", "github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/graphql/client.ts"), 69 70 // zoekt 71 q("zoekt searcher", "github.com/sourcegraph/zoekt/api.go"), 72 73 // exact phrases 74 q("assets are not configured for this binary", "github.com/sourcegraph/sourcegraph-public-snapshot/ui/assets/assets.go"), 75 q("sourcegraph/server docker image build", "github.com/sourcegraph/sourcegraph-public-snapshot/dev/tools.go"), 76 77 // symbols split up 78 q("bufio flush writer", "github.com/golang/go/src/net/http/transfer.go"), // bufioFlushWriter 79 q("coverage data writer", "github.com/golang/go/src/internal/coverage/encodecounter/encode.go"), // CoverageDataWriter 80 81 // sourcegraph/conc vs golang/go 82 q("WaitGroup", "github.com/sourcegraph/conc/waitgroup.go"), 83 } 84 85 var indexDir string 86 if *useShardCache { 87 t.Logf("reusing index dir to speed up testing. If you have unexpected results remove %s", shardCache) 88 indexDir = shardCache 89 } else { 90 indexDir = t.TempDir() 91 } 92 93 for _, u := range archiveURLs { 94 if err := indexURL(indexDir, u); err != nil { 95 t.Fatal(err) 96 } 97 } 98 99 ss, err := search.NewDirectorySearcher(indexDir) 100 if err != nil { 101 t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err) 102 } 103 defer ss.Close() 104 105 var ranks []int 106 for _, rq := range queries { 107 // normalise queryStr for writing to fs 108 name := strings.Map(func(r rune) rune { 109 if strings.ContainsRune(" :", r) { 110 return '_' 111 } 112 if '0' <= r && r <= '9' || 113 'a' <= r && r <= 'z' || 114 'A' <= r && r <= 'Z' { 115 return r 116 } 117 return -1 118 }, rq.Query) 119 120 t.Run(name, func(t *testing.T) { 121 q, err := query.Parse(rq.Query) 122 if err != nil { 123 t.Fatal(err) 124 } 125 126 sOpts := zoekt.SearchOptions{ 127 // Use the same options sourcegraph has by default 128 ChunkMatches: true, 129 MaxWallTime: 20 * time.Second, 130 ShardMaxMatchCount: 10_000 * 10, 131 TotalMaxMatchCount: 100_000 * 10, 132 MaxDocDisplayCount: 500, 133 134 DebugScore: *debugScore, 135 } 136 result, err := ss.Search(context.Background(), q, &sOpts) 137 if err != nil { 138 t.Fatal(err) 139 } 140 141 ranks = append(ranks, targetRank(rq, result.Files)) 142 143 var gotBuf bytes.Buffer 144 marshalMatches(&gotBuf, rq, q, result.Files) 145 assertGolden(t, name, gotBuf.Bytes()) 146 }) 147 } 148 149 t.Run("rank_stats", func(t *testing.T) { 150 if len(ranks) != len(queries) { 151 t.Skip("not computing rank stats since not all query cases ran") 152 } 153 154 var gotBuf bytes.Buffer 155 printf := func(format string, a ...any) { 156 _, _ = fmt.Fprintf(&gotBuf, format, a...) 157 } 158 159 printf("queries: %d\n", len(ranks)) 160 161 for _, recallThreshold := range []int{1, 5} { 162 count := 0 163 for _, rank := range ranks { 164 if rank <= recallThreshold && rank > 0 { 165 count++ 166 } 167 } 168 countp := float64(count) * 100 / float64(len(ranks)) 169 printf("recall@%d: %d (%.0f%%)\n", recallThreshold, count, countp) 170 } 171 172 // Mean reciprocal rank 173 mrr := float64(0) 174 for _, rank := range ranks { 175 if rank > 0 { 176 mrr += 1 / float64(rank) 177 } 178 } 179 mrr /= float64(len(ranks)) 180 printf("mrr: %f\n", mrr) 181 182 assertGolden(t, "rank_stats", gotBuf.Bytes()) 183 }) 184} 185 186func assertGolden(t *testing.T, name string, got []byte) { 187 t.Helper() 188 189 wantPath := filepath.Join("testdata", name+".txt") 190 if *update { 191 if err := os.WriteFile(wantPath, got, 0o600); err != nil { 192 t.Fatal(err) 193 } 194 } 195 want, err := os.ReadFile(wantPath) 196 if err != nil { 197 t.Fatal(err) 198 } 199 200 if d := cmp.Diff(string(want), string(got)); d != "" { 201 t.Fatalf("unexpected (-want, +got):\n%s", d) 202 } 203} 204 205type rankingQuery struct { 206 Query string 207 Target string 208} 209 210var ( 211 tarballCache = "/tmp/zoekt-test-ranking-tarballs-" + os.Getenv("USER") 212 shardCache = "/tmp/zoekt-test-ranking-shards-" + os.Getenv("USER") 213) 214 215func indexURL(indexDir, u string) error { 216 if err := os.MkdirAll(tarballCache, 0o700); err != nil { 217 return err 218 } 219 220 opts := archive.Options{ 221 Archive: u, 222 Incremental: true, 223 } 224 opts.SetDefaults() // sets metadata like Name and the codeload URL 225 u = opts.Archive 226 227 // if opts.Commit is set but opts.Branch is not, then we just need to give 228 // the commit a name for testing. 229 if opts.Commit != "" && opts.Branch == "" { 230 opts.Branch = "test" 231 } 232 233 // update Archive location to cached location 234 cacheBase := fmt.Sprintf("%s-%s%s.tar.gz", url.QueryEscape(opts.Name), opts.Branch, opts.Commit) // assume .tar.gz 235 path := filepath.Join(tarballCache, cacheBase) 236 opts.Archive = path 237 238 if _, err := os.Stat(path); os.IsNotExist(err) { 239 if err := download(u, path); err != nil { 240 return err 241 } 242 } 243 244 // TODO scip 245 // languageMap := make(ctags.LanguageMap) 246 // for _, lang := range []string{"kotlin", "rust", "ruby", "go", "python", "javascript", "c_sharp", "scala", "typescript", "zig"} { 247 // languageMap[lang] = ctags.ScipCTags 248 // } 249 250 err := archive.Index(opts, index.Options{ 251 IndexDir: indexDir, 252 CTagsMustSucceed: true, 253 RepositoryDescription: zoekt.Repository{ 254 // Use the latest commit date to calculate the repo rank when loading the shard. 255 // This is the same setting we use in production. 256 RawConfig: map[string]string{"latestCommitDate": "1"}, 257 }, 258 }) 259 if err != nil { 260 return fmt.Errorf("failed to index %s: %w", opts.Archive, err) 261 } 262 263 return nil 264} 265 266func download(url, dst string) error { 267 tmpPath := dst + ".part" 268 269 rc, err := archive.OpenReader(url) 270 if err != nil { 271 return err 272 } 273 defer rc.Close() 274 275 f, err := os.OpenFile(tmpPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) 276 if err != nil { 277 return err 278 } 279 defer f.Close() 280 281 _, err = io.Copy(f, rc) 282 if err != nil { 283 return err 284 } 285 286 err = f.Close() 287 if err != nil { 288 return err 289 } 290 291 return os.Rename(tmpPath, dst) 292} 293 294const ( 295 chunkMatchesPerFile = 3 296 fileMatchesPerSearch = 6 297) 298 299func docName(f zoekt.FileMatch) string { 300 return f.Repository + "/" + f.FileName 301} 302 303func marshalMatches(w io.Writer, rq rankingQuery, q query.Q, files []zoekt.FileMatch) { 304 _, _ = fmt.Fprintf(w, "queryString: %s\n", rq.Query) 305 _, _ = fmt.Fprintf(w, "query: %s\n", q) 306 _, _ = fmt.Fprintf(w, "targetRank: %d\n\n", targetRank(rq, files)) 307 308 files, hiddenFiles := splitAtIndex(files, fileMatchesPerSearch) 309 for _, f := range files { 310 doc := docName(f) 311 if doc == rq.Target { 312 doc = "**" + doc + "**" 313 } 314 _, _ = fmt.Fprintf(w, "%s%s\n", doc, addTabIfNonEmpty(f.Debug)) 315 316 chunks, hidden := splitAtIndex(f.ChunkMatches, chunkMatchesPerFile) 317 318 for _, m := range chunks { 319 _, _ = fmt.Fprintf(w, "%d:%s%s\n", m.ContentStart.LineNumber, strings.TrimRight(string(m.Content), "\n"), addTabIfNonEmpty(m.DebugScore)) 320 } 321 322 if len(hidden) > 0 { 323 _, _ = fmt.Fprintf(w, "hidden %d more line matches\n", len(hidden)) 324 } 325 _, _ = fmt.Fprintln(w) 326 } 327 328 if len(hiddenFiles) > 0 { 329 fmt.Fprintf(w, "hidden %d more file matches\n", len(hiddenFiles)) 330 } 331} 332 333func targetRank(rq rankingQuery, files []zoekt.FileMatch) int { 334 for i, f := range files { 335 if docName(f) == rq.Target { 336 return i + 1 337 } 338 } 339 return -1 340} 341 342func splitAtIndex[E any](s []E, idx int) ([]E, []E) { 343 if idx < len(s) { 344 return s[:idx], s[idx:] 345 } 346 return s, nil 347} 348 349func addTabIfNonEmpty(s string) string { 350 if s != "" { 351 return "\t" + s 352 } 353 return s 354} 355 356func requireCTags(tb testing.TB) { 357 tb.Helper() 358 359 if os.Getenv("CTAGS_COMMAND") != "" { 360 return 361 } 362 if _, err := exec.LookPath("universal-ctags"); err == nil { 363 return 364 } 365 366 // On CI we require ctags to be available. Otherwise we skip 367 if os.Getenv("CI") != "" { 368 tb.Fatal("universal-ctags is missing") 369 } else { 370 tb.Skip("universal-ctags is missing") 371 } 372} 373 374func checkScipCTags() string { 375 if ctags := os.Getenv("SCIP_CTAGS_COMMAND"); ctags != "" { 376 return ctags 377 } 378 379 if ctags, err := exec.LookPath("scip-ctags"); err == nil { 380 return ctags 381 } 382 383 return "" 384}