fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package e2e 2 3import ( 4 "bytes" 5 "context" 6 "flag" 7 "fmt" 8 "io" 9 "net/url" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/google/go-cmp/cmp" 18 "github.com/sourcegraph/zoekt" 19 "github.com/sourcegraph/zoekt/build" 20 "github.com/sourcegraph/zoekt/internal/archive" 21 "github.com/sourcegraph/zoekt/query" 22 "github.com/sourcegraph/zoekt/shards" 23) 24 25var update = flag.Bool("update", false, "update golden file") 26 27var useShardCache = flag.Bool("shard_cache", false, "cache computed shards for faster test runs") 28 29// debugScore can be set to include much more output. Do not commit the 30// updated golden files, this is purely used for debugging in a local 31// environment. 32var debugScore = flag.Bool("debug_score", false, "include debug output in golden files.") 33 34func TestRanking(t *testing.T) { 35 if testing.Short() { 36 t.Skip("skipping due to short flag") 37 } 38 39 requireCTags(t) 40 41 archiveURLs := []string{ 42 "https://github.com/sourcegraph/sourcegraph/tree/v5.2.2", 43 "https://github.com/golang/go/tree/go1.21.4", 44 "https://github.com/sourcegraph/cody/tree/vscode-v0.14.5", 45 // The commit before ranking e2e tests were added to avoid matching 46 // content inside our golden files. 47 "https://github.com/sourcegraph/zoekt/commit/ef907c2371176aa3f97713d5bf182983ef090c6a", 48 } 49 q := func(query, target string) rankingQuery { 50 return rankingQuery{Query: query, Target: target} 51 } 52 queries := []rankingQuery{ 53 // golang/go 54 q("test server", "github.com/golang/go/src/net/http/httptest/server.go"), 55 q("bytes buffer", "github.com/golang/go/src/bytes/buffer.go"), 56 q("bufio buffer", "github.com/golang/go/src/bufio/scan.go"), 57 58 // sourcegraph/sourcegraph 59 q("graphql type User", "github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/schema.graphql"), 60 q("Get database/user", "github.com/sourcegraph/sourcegraph/internal/database/users.go"), 61 q("InternalDoer", "github.com/sourcegraph/sourcegraph/internal/httpcli/client.go"), 62 q("Repository metadata Write rbac", "github.com/sourcegraph/sourcegraph/internal/rbac/constants.go"), // unsure if this is the best doc? 63 64 // cody 65 q("generate unit test", "github.com/sourcegraph/cody/lib/shared/src/chat/recipes/generate-test.ts"), 66 q("r:cody sourcegraph url", "github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/graphql/client.ts"), 67 68 // zoekt 69 q("zoekt searcher", "github.com/sourcegraph/zoekt/api.go"), 70 71 // exact phrases 72 q("assets are not configured for this binary", "github.com/sourcegraph/sourcegraph/ui/assets/assets.go"), 73 q("sourcegraph/server docker image build", "github.com/sourcegraph/sourcegraph/dev/tools.go"), 74 75 // symbols split up 76 q("bufio flush writer", "github.com/golang/go/src/net/http/transfer.go"), // bufioFlushWriter 77 q("coverage data writer", "github.com/golang/go/src/internal/coverage/encodecounter/encode.go"), // CoverageDataWriter 78 } 79 80 var indexDir string 81 if *useShardCache { 82 t.Logf("reusing index dir to speed up testing. If you have unexpected results remove %s", shardCache) 83 indexDir = shardCache 84 } else { 85 indexDir = t.TempDir() 86 } 87 88 for _, u := range archiveURLs { 89 if err := indexURL(indexDir, u); err != nil { 90 t.Fatal(err) 91 } 92 } 93 94 ss, err := shards.NewDirectorySearcher(indexDir) 95 if err != nil { 96 t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err) 97 } 98 defer ss.Close() 99 100 var ranks []int 101 for _, rq := range queries { 102 // normalise queryStr for writing to fs 103 name := strings.Map(func(r rune) rune { 104 if strings.ContainsRune(" :", r) { 105 return '_' 106 } 107 if '0' <= r && r <= '9' || 108 'a' <= r && r <= 'z' || 109 'A' <= r && r <= 'Z' { 110 return r 111 } 112 return -1 113 }, rq.Query) 114 115 t.Run(name, func(t *testing.T) { 116 q, err := query.Parse(rq.Query) 117 if err != nil { 118 t.Fatal(err) 119 } 120 121 // q is marshalled as part of the test, so avoid our rewrites for 122 // ranking. 123 qSearch := query.ExpirementalPhraseBoost(q, rq.Query, query.ExperimentalPhraseBoostOptions{}) 124 125 sOpts := zoekt.SearchOptions{ 126 // Use the same options sourcegraph has by default 127 ChunkMatches: true, 128 MaxWallTime: 20 * time.Second, 129 ShardMaxMatchCount: 10_000 * 10, 130 TotalMaxMatchCount: 100_000 * 10, 131 MaxDocDisplayCount: 500, 132 133 DebugScore: *debugScore, 134 } 135 result, err := ss.Search(context.Background(), qSearch, &sOpts) 136 if err != nil { 137 t.Fatal(err) 138 } 139 140 ranks = append(ranks, targetRank(rq, result.Files)) 141 142 var gotBuf bytes.Buffer 143 marshalMatches(&gotBuf, rq, q, result.Files) 144 assertGolden(t, name, gotBuf.Bytes()) 145 }) 146 } 147 148 t.Run("rank_stats", func(t *testing.T) { 149 if len(ranks) != len(queries) { 150 t.Skip("not computing rank stats since not all query cases ran") 151 } 152 153 var gotBuf bytes.Buffer 154 printf := func(format string, a ...any) { 155 _, _ = fmt.Fprintf(&gotBuf, format, a...) 156 } 157 158 printf("queries: %d\n", len(ranks)) 159 160 for _, recallThreshold := range []int{1, 5} { 161 count := 0 162 for _, rank := range ranks { 163 if rank <= recallThreshold && rank > 0 { 164 count++ 165 } 166 } 167 countp := float64(count) * 100 / float64(len(ranks)) 168 printf("recall@%d: %d (%.0f%%)\n", recallThreshold, count, countp) 169 } 170 171 // Mean reciprocal rank 172 mrr := float64(0) 173 for _, rank := range ranks { 174 if rank > 0 { 175 mrr += 1 / float64(rank) 176 } 177 } 178 mrr /= float64(len(ranks)) 179 printf("mrr: %f\n", mrr) 180 181 assertGolden(t, "rank_stats", gotBuf.Bytes()) 182 }) 183} 184 185func assertGolden(t *testing.T, name string, got []byte) { 186 t.Helper() 187 188 wantPath := filepath.Join("testdata", name+".txt") 189 if *update { 190 if err := os.WriteFile(wantPath, got, 0o600); err != nil { 191 t.Fatal(err) 192 } 193 } 194 want, err := os.ReadFile(wantPath) 195 if err != nil { 196 t.Fatal(err) 197 } 198 199 if d := cmp.Diff(string(want), string(got)); d != "" { 200 t.Fatalf("unexpected (-want, +got):\n%s", d) 201 } 202} 203 204type rankingQuery struct { 205 Query string 206 Target string 207} 208 209var ( 210 tarballCache = "/tmp/zoekt-test-ranking-tarballs-" + os.Getenv("USER") 211 shardCache = "/tmp/zoekt-test-ranking-shards-" + os.Getenv("USER") 212) 213 214func indexURL(indexDir, u string) error { 215 if err := os.MkdirAll(tarballCache, 0o700); err != nil { 216 return err 217 } 218 219 opts := archive.Options{ 220 Archive: u, 221 Incremental: true, 222 } 223 opts.SetDefaults() // sets metadata like Name and the codeload URL 224 u = opts.Archive 225 226 // if opts.Commit is set but opts.Branch is not, then we just need to give 227 // the commit a name for testing. 228 if opts.Commit != "" && opts.Branch == "" { 229 opts.Branch = "test" 230 } 231 232 // update Archive location to cached location 233 cacheBase := fmt.Sprintf("%s-%s%s.tar.gz", url.QueryEscape(opts.Name), opts.Branch, opts.Commit) // assume .tar.gz 234 path := filepath.Join(tarballCache, cacheBase) 235 opts.Archive = path 236 237 if _, err := os.Stat(path); os.IsNotExist(err) { 238 if err := download(u, path); err != nil { 239 return err 240 } 241 } 242 243 // TODO scip 244 // languageMap := make(ctags.LanguageMap) 245 // for _, lang := range []string{"kotlin", "rust", "ruby", "go", "python", "javascript", "c_sharp", "scala", "typescript", "zig"} { 246 // languageMap[lang] = ctags.ScipCTags 247 // } 248 249 err := archive.Index(opts, build.Options{ 250 IndexDir: indexDir, 251 CTagsMustSucceed: true, 252 }) 253 if err != nil { 254 return fmt.Errorf("failed to index %s: %w", opts.Archive, err) 255 } 256 257 return nil 258} 259 260func download(url, dst string) error { 261 tmpPath := dst + ".part" 262 263 rc, err := archive.OpenReader(url) 264 if err != nil { 265 return err 266 } 267 defer rc.Close() 268 269 f, err := os.OpenFile(tmpPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) 270 if err != nil { 271 return err 272 } 273 defer f.Close() 274 275 _, err = io.Copy(f, rc) 276 if err != nil { 277 return err 278 } 279 280 err = f.Close() 281 if err != nil { 282 return err 283 } 284 285 return os.Rename(tmpPath, dst) 286} 287 288const ( 289 chunkMatchesPerFile = 3 290 fileMatchesPerSearch = 6 291) 292 293func docName(f zoekt.FileMatch) string { 294 return f.Repository + "/" + f.FileName 295} 296 297func marshalMatches(w io.Writer, rq rankingQuery, q query.Q, files []zoekt.FileMatch) { 298 _, _ = fmt.Fprintf(w, "queryString: %s\n", rq.Query) 299 _, _ = fmt.Fprintf(w, "query: %s\n", q) 300 _, _ = fmt.Fprintf(w, "targetRank: %d\n\n", targetRank(rq, files)) 301 302 files, hiddenFiles := splitAtIndex(files, fileMatchesPerSearch) 303 for _, f := range files { 304 doc := docName(f) 305 if doc == rq.Target { 306 doc = "**" + doc + "**" 307 } 308 _, _ = fmt.Fprintf(w, "%s%s\n", doc, addTabIfNonEmpty(f.Debug)) 309 310 chunks, hidden := splitAtIndex(f.ChunkMatches, chunkMatchesPerFile) 311 312 for _, m := range chunks { 313 _, _ = fmt.Fprintf(w, "%d:%s%s\n", m.ContentStart.LineNumber, string(m.Content), addTabIfNonEmpty(m.DebugScore)) 314 } 315 316 if len(hidden) > 0 { 317 _, _ = fmt.Fprintf(w, "hidden %d more line matches\n", len(hidden)) 318 } 319 _, _ = fmt.Fprintln(w) 320 } 321 322 if len(hiddenFiles) > 0 { 323 fmt.Fprintf(w, "hidden %d more file matches\n", len(hiddenFiles)) 324 } 325} 326 327func targetRank(rq rankingQuery, files []zoekt.FileMatch) int { 328 for i, f := range files { 329 if docName(f) == rq.Target { 330 return i + 1 331 } 332 } 333 return -1 334} 335 336func splitAtIndex[E any](s []E, idx int) ([]E, []E) { 337 if idx < len(s) { 338 return s[:idx], s[idx:] 339 } 340 return s, nil 341} 342 343func addTabIfNonEmpty(s string) string { 344 if s != "" { 345 return "\t" + s 346 } 347 return s 348} 349 350func requireCTags(tb testing.TB) { 351 tb.Helper() 352 353 if os.Getenv("CTAGS_COMMAND") != "" { 354 return 355 } 356 if _, err := exec.LookPath("universal-ctags"); err == nil { 357 return 358 } 359 360 // On CI we require ctags to be available. Otherwise we skip 361 if os.Getenv("CI") != "" { 362 tb.Fatal("universal-ctags is missing") 363 } else { 364 tb.Skip("universal-ctags is missing") 365 } 366}