fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

shards: optimize RepoBranches when all branches are the same (#54)

This is the same idea behind the optimization for reposet. We can avoid
the work of running simplify per shard if we know every shard in
filtered will be searching the same branches.

+69 -31
+1 -11
eval.go
··· 45 45 case *query.Repo: 46 46 return &query.Const{Value: strings.Contains(d.repoMetaData.Name, r.Pattern)} 47 47 case *query.RepoBranches: 48 - branches, ok := r.Set[d.repoMetaData.Name] 49 - if !ok { 50 - return &query.Const{Value: false} 51 - } 52 - 53 - // New sub query is (or (branch branches[0]) ...) 54 - qs := make([]query.Q, len(branches)) 55 - for i, branch := range branches { 56 - qs[i] = &query.Branch{Pattern: branch, Exact: true} 57 - } 58 - return query.NewOr(qs...) 48 + return r.Branches(d.repoMetaData.Name) 59 49 case *query.RepoSet: 60 50 return &query.Const{Value: r.Set[d.repoMetaData.Name]} 61 51 case *query.Language:
+16 -1
query/query.go
··· 148 148 sort.Strings(repos) 149 149 detail = strings.Join(repos, " ") 150 150 } 151 - return fmt.Sprintf("(reposet %s)", detail) 151 + return fmt.Sprintf("(repobranches %s)", detail) 152 + } 153 + 154 + // Branches returns a query representing the branches to search for name. 155 + func (q *RepoBranches) Branches(name string) Q { 156 + branches, ok := q.Set[name] 157 + if !ok { 158 + return &Const{Value: false} 159 + } 160 + 161 + // New sub query is (or (branch branches[0]) ...) 162 + qs := make([]Q, len(branches)) 163 + for i, branch := range branches { 164 + qs[i] = &Branch{Pattern: branch, Exact: true} 165 + } 166 + return NewOr(qs...) 152 167 } 153 168 154 169 // MarshalBinary implements a specialized encoder for RepoBranches.
+52 -19
shards/shards.go
··· 216 216 // (and (repobranches ...) (q)) 217 217 // (and (repobranches ...) (q)) 218 218 219 - // TODO RepoBranches 220 - 221 - // TODO implement optimization 222 219 for i, c := range and.Children { 223 220 var setSize int 224 221 var hasRepo func(string) bool ··· 238 235 continue 239 236 } 240 237 238 + // setSize may be larger than the number of shards we have. The size of 239 + // filtered is bounded by min(len(set), len(shards)) 240 + if setSize > len(shards) { 241 + setSize = len(shards) 242 + } 243 + 241 244 filtered := make([]rankedShard, 0, setSize) 242 245 243 246 for _, s := range shards { 244 - if repositorer, ok := s.Searcher.(repositorer); ok { 245 - repo := repositorer.Repository() 246 - if hasRepo(repo.Name) { 247 - filtered = append(filtered, s) 248 - } 247 + if hasRepo(s.name) { 248 + filtered = append(filtered, s) 249 249 } 250 250 } 251 251 252 + // We don't need to adjust the query since we are returning an empty set 253 + // of shards to search. 254 + if len(filtered) == 0 { 255 + return filtered, and 256 + } 257 + 258 + // This optimization allows us to avoid the work done by 259 + // indexData.simplify for each shard. 260 + // 261 + // For example if our query is (and (reposet foo bar) (content baz)) 262 + // then at this point filtered is [foo bar] and q is the same. For each 263 + // shard indexData.simplify will simplify to (and true (content baz)) -> 264 + // (content baz). This work can be done now once, rather than per shard. 252 265 if _, ok := c.(*query.RepoSet); ok { 253 - // This optimization allows us to avoid the work done by 254 - // indexData.simplify for each shard. 255 - // 256 - // For example if our query is (and (reposet foo bar) (content baz)) 257 - // then at this point filtered is [foo bar] and q is the same. For each 258 - // shard indexData.simplify will simplify to (and true (content baz)) -> 259 - // (content baz). This work can be done now once, rather than per shard. 260 - and.Children[i] = &query.Const{Value: len(filtered) > 0} 266 + and.Children[i] = &query.Const{Value: true} 267 + return filtered, query.Simplify(and) 268 + } 269 + if b, ok := c.(*query.RepoBranches); ok { 270 + // We can only replace if all the repos want the same branches. 271 + want := b.Set[filtered[0].name] 272 + for _, s := range filtered[1:] { 273 + if !strSliceEqual(want, b.Set[s.name]) { 274 + return filtered, and 275 + } 276 + } 277 + 278 + // Every repo wants the same branches, so we can replace RepoBranches 279 + // with a list of branch queries. 280 + and.Children[i] = b.Branches(filtered[0].name) 281 + return filtered, query.Simplify(and) 261 282 } 262 - // TODO the same optimization for RepoBranches in the common case (all 263 - // repos are searching HEAD) 264 283 265 284 // Stop after first RepoSet, otherwise we might append duplicate 266 285 // shards to `filtered` 267 - return filtered, query.Simplify(and) 286 + return filtered, and 268 287 } 269 288 270 289 return shards, and ··· 322 341 start = time.Now() 323 342 324 343 shards := ss.getShards() 344 + tr.LazyPrintf("before selectRepoSet shards:%d", len(shards)) 325 345 shards, q = selectRepoSet(shards, q) 346 + tr.LazyPrintf("after selectRepoSet shards:%d %s", len(shards), q) 326 347 327 348 all := make(chan shardResult, len(shards)) 328 349 ··· 617 638 618 639 return s, nil 619 640 } 641 + 642 + func strSliceEqual(a, b []string) bool { 643 + if len(a) != len(b) { 644 + return false 645 + } 646 + for i := range a { 647 + if a[i] != b[i] { 648 + return false 649 + } 650 + } 651 + return true 652 + }