fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

all: add support for query.RepoBranches

RepoBranches is like our RepoSet query atom. Except it allows us to
specify which branches to search in each repository in the set. This is
required for Sourcegraph to efficiently construct queries which search
across different branches. See RFC 150 Version Contexts for more
context.

Change-Id: I8129dc08b2ad4aa270ad8cf6092353103c1cfacd

+106 -23
+13
eval.go
··· 44 44 switch r := q.(type) { 45 45 case *query.Repo: 46 46 return &query.Const{Value: strings.Contains(d.repoMetaData.Name, r.Pattern)} 47 + case *query.RepoBranches: 48 + branches, ok := r.Set[d.repoMetaData.Name] 49 + if !ok { 50 + return &query.Const{Value: false} 51 + } 52 + 53 + // New sub query is (or (branch branches[0]) ...) 54 + qs := make([]query.Q, len(branches)) 55 + for i, branch := range branches { 56 + // TODO we need exact matching, not pattern matching 57 + qs[i] = &query.Branch{Pattern: branch} 58 + } 59 + return query.NewOr(qs...) 47 60 case *query.RepoSet: 48 61 return &query.Const{Value: r.Set[d.repoMetaData.Name]} 49 62 case *query.Language:
+28 -1
query/query.go
··· 124 124 return fmt.Sprintf("repo:%s", q.Pattern) 125 125 } 126 126 127 + // RepoBranches is a list of branches in repos to match. It is a Sourcegraph 128 + // addition and only used in the RPC interface for efficient checking of large 129 + // repo lists. 130 + type RepoBranches struct { 131 + // Set is map reponame -> [branch] 132 + Set map[string][]string 133 + } 134 + 135 + func (q *RepoBranches) String() string { 136 + var detail string 137 + if len(q.Set) > 5 { 138 + // Large sets being output are not useful 139 + detail = fmt.Sprintf("size=%d", len(q.Set)) 140 + } else { 141 + repos := make([]string, len(q.Set)) 142 + i := 0 143 + for repo, branches := range q.Set { 144 + // repo@master:develop:master 145 + repos[i] = fmt.Sprintf("%s@%s", repo, strings.Join(branches, ":")) 146 + i++ 147 + } 148 + sort.Strings(repos) 149 + detail = strings.Join(repos, " ") 150 + } 151 + return fmt.Sprintf("(reposet %s)", detail) 152 + } 153 + 127 154 // RepoSet is a list of repos to match. It is a Sourcegraph addition and only 128 - // used in the Rest interface for efficient checking of large repo lists. 155 + // used in the RPC interface for efficient checking of large repo lists. 129 156 type RepoSet struct { 130 157 Set map[string]bool 131 158 }
+1
rpc/rpc.go
··· 127 127 gob.Register(&query.Or{}) 128 128 gob.Register(&query.Regexp{}) 129 129 gob.Register(&query.RepoSet{}) 130 + gob.Register(&query.RepoBranches{}) 130 131 gob.Register(&query.Repo{}) 131 132 gob.Register(&query.Substring{}) 132 133 gob.Register(&query.Symbol{})
-2
shards/eval.go
··· 81 81 return nil 82 82 } 83 83 84 - // TODO handle branches somehow? Our reposet will need to change to 85 - // handle that. 86 84 rs := &query.RepoSet{Set: make(map[string]bool, len(rl.Repos))} 87 85 for _, r := range rl.Repos { 88 86 rs.Set[r.Repository.Name] = true
+40 -5
shards/shards.go
··· 214 214 return shards, q 215 215 } 216 216 217 + // (and (reposet ...) (q)) 218 + // (and true (q)) with a filtered shards 219 + // (and false) // noop 220 + 221 + // (and (repobranches ...) (q)) 222 + // (and (repobranches ...) (q)) 223 + 224 + // TODO RepoBranches 225 + 226 + // TODO implement optimization 217 227 for i, c := range and.Children { 218 - setQuery, ok := c.(*query.RepoSet) 219 - if !ok { 228 + var setSize int 229 + var hasRepo func(string) bool 230 + 231 + switch setQuery := c.(type) { 232 + case *query.RepoSet: 233 + setSize = len(setQuery.Set) 234 + hasRepo = func(name string) bool { 235 + return setQuery.Set[name] 236 + } 237 + case *query.RepoBranches: 238 + setSize = len(setQuery.Set) 239 + hasRepo = func(name string) bool { 240 + return len(setQuery.Set[name]) > 0 241 + } 242 + default: 220 243 continue 221 244 } 222 245 223 - filtered := make([]rankedShard, 0, len(setQuery.Set)) 246 + filtered := make([]rankedShard, 0, setSize) 224 247 225 248 for _, s := range shards { 226 249 if repositorer, ok := s.Searcher.(repositorer); ok { 227 250 repo := repositorer.Repository() 228 - if setQuery.Set[repo.Name] { 251 + if hasRepo(repo.Name) { 229 252 filtered = append(filtered, s) 230 253 } 231 254 } 232 255 } 233 - and.Children[i] = &query.Const{Value: len(filtered) > 0} 256 + 257 + if _, ok := c.(*query.RepoSet); ok { 258 + // This optimization allows us to avoid the work done by 259 + // indexData.simplify for each shard. 260 + // 261 + // For example if our query is (and (reposet foo bar) (content baz)) 262 + // then at this point filtered is [foo bar] and q is the same. For each 263 + // shard indexData.simplify will simplify to (and true (content baz)) -> 264 + // (content baz). This work can be done now once, rather than per shard. 265 + and.Children[i] = &query.Const{Value: len(filtered) > 0} 266 + } 267 + // TODO the same optimization for RepoBranches in the common case (all 268 + // repos are searching HEAD) 234 269 235 270 // Stop after first RepoSet, otherwise we might append duplicate 236 271 // shards to `filtered`
+24 -15
shards/shards_test.go
··· 189 189 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n) 190 190 } 191 191 192 + repoBranches := &query.RepoBranches{Set: make(map[string][]string)} 193 + for _, name := range repoSetNames { 194 + repoBranches.Set[name] = []string{"HEAD"} 195 + } 196 + 192 197 set := query.NewRepoSet(repoSetNames...) 193 198 sub := &query.Substring{Pattern: "bla"} 194 - res, err = ss.Search(context.Background(), query.NewAnd(set, sub), &zoekt.SearchOptions{}) 195 - if err != nil { 196 - t.Errorf("Search: %v", err) 197 - } 198 - // Note: Assertion is based on fact that `rankSearcher` always returns a 199 - // result and using repoSet will half the number of results 200 - if len(res.Files) != len(repoSetNames) { 201 - t.Fatalf("with reposet: got %d results, want %d", len(res.Files), len(repoSetNames)) 199 + 200 + queries := []query.Q{ 201 + query.NewAnd(set, sub), 202 + // Test with the same reposet again 203 + query.NewAnd(set, sub), 204 + 205 + query.NewAnd(repoBranches, sub), 206 + // Test with the same repoBranches again 207 + query.NewAnd(repoBranches, sub), 202 208 } 203 209 204 - // With the same reposet multiple times 205 - res, err = ss.Search(context.Background(), query.NewAnd(set, set, sub), &zoekt.SearchOptions{}) 206 - if err != nil { 207 - t.Errorf("Search: %v", err) 208 - } 209 - if len(res.Files) != len(repoSetNames) { 210 - t.Fatalf("with reposet multiple times: got %d results, want %d", len(res.Files), len(repoSetNames)) 210 + for _, q := range queries { 211 + res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 212 + if err != nil { 213 + t.Errorf("Search(%s): %v", q, err) 214 + } 215 + // Note: Assertion is based on fact that `rankSearcher` always returns a 216 + // result and using repoSet will half the number of results 217 + if len(res.Files) != len(repoSetNames) { 218 + t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames)) 219 + } 211 220 } 212 221 } 213 222