fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

archive: e2e test for ranking against sourcegraph repo (#695)

This is an initial framework for having golden file results for search
results against a real repository. At first we have only added one query
and one repository, but it should be straightforward to grow this list
further.

The golden files we write to disk are a summary of results. This matches how
we have been using the zoekt CLI tool on the keyword branch during our ranking
work.

Test Plan: go test

+285
+245
cmd/zoekt-archive-index/e2e_rank_test.go
··· 1 + package main 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "flag" 7 + "fmt" 8 + "io" 9 + "net/url" 10 + "os" 11 + "os/exec" 12 + "path/filepath" 13 + "strings" 14 + "testing" 15 + "time" 16 + 17 + "github.com/google/go-cmp/cmp" 18 + "github.com/sourcegraph/zoekt" 19 + "github.com/sourcegraph/zoekt/build" 20 + "github.com/sourcegraph/zoekt/query" 21 + "github.com/sourcegraph/zoekt/shards" 22 + ) 23 + 24 + var update = flag.Bool("update", false, "update golden file") 25 + 26 + // debugScore can be set to include much more output. Do not commit the 27 + // updated golden files, this is purely used for debugging in a local 28 + // environment. 29 + var debugScore = flag.Bool("debug_score", false, "include debug output in golden files.") 30 + 31 + func TestRanking(t *testing.T) { 32 + if testing.Short() { 33 + t.Skip("skipping due to short flag") 34 + } 35 + 36 + requireCTags(t) 37 + 38 + archiveURLs := []string{ 39 + "https://github.com/sourcegraph/sourcegraph/tree/v5.2.2", 40 + } 41 + queries := []string{ 42 + "graphql type User", 43 + } 44 + 45 + indexDir := t.TempDir() 46 + 47 + for _, u := range archiveURLs { 48 + if err := indexURL(indexDir, u); err != nil { 49 + t.Fatal(err) 50 + } 51 + } 52 + 53 + ss, err := shards.NewDirectorySearcher(indexDir) 54 + if err != nil { 55 + t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err) 56 + } 57 + defer ss.Close() 58 + 59 + for _, queryStr := range queries { 60 + // normalise queryStr for writing to fs 61 + name := strings.Map(func(r rune) rune { 62 + if strings.ContainsRune(" :", r) { 63 + return '_' 64 + } 65 + if '0' <= r && r <= '9' || 66 + 'a' <= r && r <= 'z' || 67 + 'A' <= r && r <= 'Z' { 68 + return r 69 + } 70 + return -1 71 + }, queryStr) 72 + 73 + t.Run(name, func(t *testing.T) { 74 + q, err := query.Parse(queryStr) 75 + if err != nil { 76 + t.Fatal(err) 77 + } 78 + 79 + sOpts := zoekt.SearchOptions{ 80 + // Use the same options sourcegraph has by default 81 + ChunkMatches: true, 82 + MaxWallTime: 20 * time.Second, 83 + ShardMaxMatchCount: 10_000 * 10, 84 + TotalMaxMatchCount: 100_000 * 10, 85 + MaxDocDisplayCount: 500, 86 + 87 + DebugScore: *debugScore, 88 + } 89 + result, err := ss.Search(context.Background(), q, &sOpts) 90 + if err != nil { 91 + t.Fatal(err) 92 + } 93 + 94 + var gotBuf bytes.Buffer 95 + marshalMatches(&gotBuf, queryStr, q, result.Files) 96 + got := gotBuf.Bytes() 97 + 98 + wantPath := filepath.Join("testdata", name+".txt") 99 + if *update { 100 + if err := os.WriteFile(wantPath, got, 0600); err != nil { 101 + t.Fatal(err) 102 + } 103 + } 104 + want, err := os.ReadFile(wantPath) 105 + if err != nil { 106 + t.Fatal(err) 107 + } 108 + 109 + if d := cmp.Diff(string(want), string(got)); d != "" { 110 + t.Fatalf("unexpected (-want, +got):\n%s", d) 111 + } 112 + }) 113 + } 114 + } 115 + 116 + var tarballCache = "/tmp/zoekt-test-ranking-tarballs-" + os.Getenv("USER") 117 + 118 + func indexURL(indexDir, u string) error { 119 + if err := os.MkdirAll(tarballCache, 0700); err != nil { 120 + return err 121 + } 122 + 123 + opts := Options{ 124 + Archive: u, 125 + } 126 + opts.SetDefaults() // sets metadata like Name and the codeload URL 127 + u = opts.Archive 128 + 129 + // update Archive location to cached location 130 + cacheBase := fmt.Sprintf("%s-%s%s.tar.gz", url.QueryEscape(opts.Name), opts.Branch, opts.Commit) // assume .tar.gz 131 + path := filepath.Join(tarballCache, cacheBase) 132 + opts.Archive = path 133 + 134 + if _, err := os.Stat(path); os.IsNotExist(err) { 135 + if err := download(u, path); err != nil { 136 + return err 137 + } 138 + } 139 + 140 + // TODO scip 141 + // languageMap := make(ctags.LanguageMap) 142 + // for _, lang := range []string{"kotlin", "rust", "ruby", "go", "python", "javascript", "c_sharp", "scala", "typescript", "zig"} { 143 + // languageMap[lang] = ctags.ScipCTags 144 + // } 145 + 146 + err := do(opts, build.Options{ 147 + IndexDir: indexDir, 148 + CTagsMustSucceed: true, 149 + }) 150 + if err != nil { 151 + return fmt.Errorf("failed to index %s: %w", opts.Archive, err) 152 + } 153 + 154 + return nil 155 + } 156 + 157 + func download(url, dst string) error { 158 + tmpPath := dst + ".part" 159 + 160 + rc, err := openReader(url) 161 + if err != nil { 162 + return err 163 + } 164 + defer rc.Close() 165 + 166 + f, err := os.OpenFile(tmpPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) 167 + if err != nil { 168 + return err 169 + } 170 + defer f.Close() 171 + 172 + _, err = io.Copy(f, rc) 173 + if err != nil { 174 + return err 175 + } 176 + 177 + err = f.Close() 178 + if err != nil { 179 + return err 180 + } 181 + 182 + return os.Rename(tmpPath, dst) 183 + } 184 + 185 + const ( 186 + chunkMatchesPerFile = 3 187 + fileMatchesPerSearch = 6 188 + ) 189 + 190 + func marshalMatches(w io.Writer, queryStr string, q query.Q, files []zoekt.FileMatch) { 191 + _, _ = fmt.Fprintf(w, "queryString: %s\n", queryStr) 192 + _, _ = fmt.Fprintf(w, "query: %s\n\n", q) 193 + 194 + files, hiddenFiles := splitAtIndex(files, fileMatchesPerSearch) 195 + for _, f := range files { 196 + _, _ = fmt.Fprintf(w, "%s/%s%s\n", f.Repository, f.FileName, addTabIfNonEmpty(f.Debug)) 197 + 198 + chunks, hidden := splitAtIndex(f.ChunkMatches, chunkMatchesPerFile) 199 + 200 + for _, m := range chunks { 201 + _, _ = fmt.Fprintf(w, "%d:%s%s\n", m.ContentStart.LineNumber, string(m.Content), addTabIfNonEmpty(m.DebugScore)) 202 + } 203 + 204 + if len(hidden) > 0 { 205 + _, _ = fmt.Fprintf(w, "hidden %d more line matches\n", len(hidden)) 206 + } 207 + _, _ = fmt.Fprintln(w) 208 + } 209 + 210 + if len(hiddenFiles) > 0 { 211 + fmt.Fprintf(w, "hidden %d more file matches\n", len(hiddenFiles)) 212 + } 213 + } 214 + 215 + func splitAtIndex[E any](s []E, idx int) ([]E, []E) { 216 + if idx < len(s) { 217 + return s[:idx], s[idx:] 218 + } 219 + return s, nil 220 + } 221 + 222 + func addTabIfNonEmpty(s string) string { 223 + if s != "" { 224 + return "\t" + s 225 + } 226 + return s 227 + } 228 + 229 + func requireCTags(tb testing.TB) { 230 + tb.Helper() 231 + 232 + if os.Getenv("CTAGS_COMMAND") != "" { 233 + return 234 + } 235 + if _, err := exec.LookPath("universal-ctags"); err == nil { 236 + return 237 + } 238 + 239 + // On CI we require ctags to be available. Otherwise we skip 240 + if os.Getenv("CI") != "" { 241 + tb.Fatal("universal-ctags is missing") 242 + } else { 243 + tb.Skip("universal-ctags is missing") 244 + } 245 + }
+40
cmd/zoekt-archive-index/testdata/graphql_type_User.txt
··· 1 + queryString: graphql type User 2 + query: (and substr:"graphql" substr:"type" case_substr:"User") 3 + 4 + github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/schema.graphql 5 + 6376:type User implements Node & SettingsSubject & Namespace { 6 + 3862: type: GitRefType 7 + 5037: type: GitRefType! 8 + hidden 460 more line matches 9 + 10 + github.com/sourcegraph/sourcegraph/internal/types/types.go 11 + 850:type User struct { 12 + 1372: Type *SearchCountStatistics 13 + 1766: Type string 14 + hidden 234 more line matches 15 + 16 + github.com/sourcegraph/sourcegraph/client/web/src/enterprise/insights/core/backend/gql-backend/methods/get-dashboard-owners.ts 17 + 22: type: InsightsDashboardOwnerType.Global, 18 + 32: type: InsightsDashboardOwnerType.Personal, 19 + 18: const { currentUser, site } = data 20 + hidden 8 more line matches 21 + 22 + github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/apitest/types.go 23 + 47:type User struct { 24 + 9: Typename string `json:"__typename"` 25 + 32: Typename string `json:"__typename"` 26 + hidden 11 more line matches 27 + 28 + github.com/sourcegraph/sourcegraph/cmd/frontend/internal/batches/resolvers/apitest/types.go 29 + 52:type User struct { 30 + 364: User *User 31 + 393: Type string 32 + hidden 68 more line matches 33 + 34 + github.com/sourcegraph/sourcegraph/internal/extsvc/github/common.go 35 + 2030:type User struct { 36 + 66: User *Actor `json:"User,omitempty"` 37 + 527: Type string 38 + hidden 136 more line matches 39 + 40 + hidden 494 more file matches