fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

matchtree: disable word search optimization for symbol search (#571)

Before this change case sensitive symbol searches of the form
\bLITERAL\b would result in the error message

found *zoekt.andMatchTree inside query.Symbol

The root cause is the symbol search code is pretty janky. It constructs
a matchtree and then pulls out either the regex matchtree or the substr
matchtree, then creates a specific symbol searcher for those. It feels
like it could be more generic rather than a bunch of hard to follow copy
pasta. For now this was a more straightforward fix than creating a word
search for symbols.

Test Plan: expanded the unit tests to cover word search more often.
Additionally run zoekt-webserver and checked that a search like the
following works now:

sym:\bnewMatchTree\b case:yes

+64 -30
+1 -1
eval.go
··· 169 169 170 170 q = query.Map(q, query.ExpandFileContent) 171 171 172 - mt, err := d.newMatchTree(q) 172 + mt, err := d.newMatchTree(q, matchTreeOpt{}) 173 173 if err != nil { 174 174 return nil, err 175 175 }
+21 -8
matchtree.go
··· 834 834 return len(t.current) > 0, true 835 835 } 836 836 837 - func (d *indexData) newMatchTree(q query.Q) (matchTree, error) { 837 + type matchTreeOpt struct { 838 + // DisableWordMatchOptimization is used to disable the use of wordMatchTree. 839 + // This was added since we do not support wordMatchTree with symbol search. 840 + DisableWordMatchOptimization bool 841 + } 842 + 843 + func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) { 838 844 if q == nil { 839 845 return nil, fmt.Errorf("got nil (sub)query") 840 846 } ··· 856 862 } 857 863 858 864 var tr matchTree 859 - if wmt, ok := regexpToWordMatchTree(s); ok { 865 + if wmt, ok := regexpToWordMatchTree(s, opt); ok { 860 866 // A common search we get is "\bLITERAL\b". Avoid the regex engine and 861 867 // provide something faster. 862 868 tr = wmt ··· 880 886 case *query.And: 881 887 var r []matchTree 882 888 for _, ch := range s.Children { 883 - ct, err := d.newMatchTree(ch) 889 + ct, err := d.newMatchTree(ch, opt) 884 890 if err != nil { 885 891 return nil, err 886 892 } ··· 890 896 case *query.Or: 891 897 var r []matchTree 892 898 for _, ch := range s.Children { 893 - ct, err := d.newMatchTree(ch) 899 + ct, err := d.newMatchTree(ch, opt) 894 900 if err != nil { 895 901 return nil, err 896 902 } ··· 898 904 } 899 905 return &orMatchTree{r}, nil 900 906 case *query.Not: 901 - ct, err := d.newMatchTree(s.Child) 907 + ct, err := d.newMatchTree(s.Child, opt) 902 908 return &notMatchTree{ 903 909 child: ct, 904 910 }, err ··· 908 914 break 909 915 } 910 916 911 - ct, err := d.newMatchTree(s.Child) 917 + ct, err := d.newMatchTree(s.Child, opt) 912 918 if err != nil { 913 919 return nil, err 914 920 } ··· 963 969 }, nil 964 970 965 971 case *query.Symbol: 966 - subMT, err := d.newMatchTree(s.Expr) 972 + // Disable WordMatchTree since we don't support it in symbols yet. 973 + optCopy := opt 974 + optCopy.DisableWordMatchOptimization = true 975 + 976 + subMT, err := d.newMatchTree(s.Expr, optCopy) 967 977 if err != nil { 968 978 return nil, err 969 979 } ··· 1131 1141 return st, nil 1132 1142 } 1133 1143 1134 - func regexpToWordMatchTree(q *query.Regexp) (_ *wordMatchTree, ok bool) { 1144 + func regexpToWordMatchTree(q *query.Regexp, opt matchTreeOpt) (_ *wordMatchTree, ok bool) { 1145 + if opt.DisableWordMatchOptimization { 1146 + return nil, false 1147 + } 1135 1148 // Needs to be case sensitive 1136 1149 if !q.CaseSensitive || q.Regexp.Flags&syntax.FoldCase != 0 { 1137 1150 return nil, false
+42 -21
matchtree_test.go
··· 169 169 {query: "contain(er|ing)", skip: false}, 170 170 {query: "thread (needle|haystack)", skip: true}, 171 171 {query: "thread (needle|)", skip: false}, 172 + {query: `\bthread\b case:yes`, skip: true}, // word search 173 + {query: `\bthread\b case:no`, skip: false}, 172 174 } 173 175 174 176 for _, tt := range tests { ··· 179 181 } 180 182 181 183 d := &indexData{} 182 - mt, err := d.newMatchTree(q) 184 + mt, err := d.newMatchTree(q, matchTreeOpt{}) 183 185 if err != nil { 184 186 t.Errorf("Error creating match tree from query: %s", q) 185 187 continue ··· 187 189 188 190 visitMatchTree(mt, func(m matchTree) { 189 191 if _, ok := m.(*regexpMatchTree); ok && tt.skip { 192 + t.Log(mt) 190 193 t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q) 191 194 } 192 195 }) ··· 203 206 } 204 207 205 208 d := &indexData{} 206 - mt, err := d.newMatchTree(q) 209 + mt, err := d.newMatchTree(q, matchTreeOpt{}) 207 210 if err != nil { 208 211 t.Fatalf("Error creating match tree from query: %s", q) 209 212 } ··· 227 230 } 228 231 } 229 232 230 - func TestSymbolMatchRegexAll(t *testing.T) { 233 + func TestSymbolMatchTree(t *testing.T) { 231 234 tests := []struct { 232 - query string 233 - all bool 235 + query string 236 + substr string 237 + regex string 238 + regexAll bool 234 239 }{ 235 - {query: ".*", all: true}, 236 - {query: "(a|b)", all: false}, 237 - {query: "b.r", all: false}, 240 + {query: "sym:.*", regex: "(?i)(?-s:.)*", regexAll: true}, 241 + {query: "sym:(ab|cd)", regex: "(?i)ab|cd"}, 242 + {query: "sym:b.r", regex: "(?i)b(?-s:.)r"}, 243 + {query: "sym:horse", substr: "horse"}, 244 + {query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt 245 + {query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`}, 238 246 } 239 247 240 248 for _, tt := range tests { 241 - q, err := query.Parse("sym:" + tt.query) 249 + q, err := query.Parse(tt.query) 242 250 if err != nil { 243 - t.Errorf("Error parsing query: %s", "sym:"+tt.query) 251 + t.Errorf("Error parsing query: %s", tt.query) 244 252 continue 245 253 } 246 254 247 255 d := &indexData{} 248 - mt, err := d.newMatchTree(q) 256 + mt, err := d.newMatchTree(q, matchTreeOpt{}) 249 257 if err != nil { 250 258 t.Errorf("Error creating match tree from query: %s", q) 251 259 continue 252 260 } 253 261 254 - regexMT, ok := mt.(*symbolRegexpMatchTree) 255 - if !ok { 256 - t.Errorf("Expected symbol regex match tree from query: %s, got %v", q, mt) 257 - continue 262 + var ( 263 + substr string 264 + regex string 265 + regexAll bool 266 + ) 267 + if substrMT, ok := mt.(*symbolSubstrMatchTree); ok { 268 + substr = substrMT.query.Pattern 269 + } 270 + if regexMT, ok := mt.(*symbolRegexpMatchTree); ok { 271 + regex = regexMT.regexp.String() 272 + regexAll = regexMT.all 258 273 } 259 274 260 - if regexMT.all != tt.all { 261 - t.Errorf("Expected property all: %t from query: %s", tt.all, q) 275 + if substr != tt.substr { 276 + t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr) 277 + } 278 + if regex != tt.regex { 279 + t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex) 280 + } 281 + if regexAll != tt.regexAll { 282 + t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll) 262 283 } 263 284 } 264 285 } ··· 269 290 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, 270 291 repos: []uint16{0, 0, 1, 2, 3, 3}, 271 292 } 272 - mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}) 293 + mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{}) 273 294 if err != nil { 274 295 t.Fatal(err) 275 296 } ··· 292 313 fileBranchMasks: []uint64{1, 1, 1, 1, 1}, 293 314 repos: []uint16{0, 0, 1, 0, 1}, 294 315 } 295 - mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}) 316 + mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{}) 296 317 if err != nil { 297 318 t.Fatal(err) 298 319 } ··· 323 344 mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{ 324 345 {Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))}, 325 346 {Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))}, 326 - }}) 347 + }}, matchTreeOpt{}) 327 348 if err != nil { 328 349 t.Fatal(err) 329 350 } ··· 348 369 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, 349 370 repos: []uint16{0, 0, 1, 2, 3, 3}, 350 371 } 351 - mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}) 372 + mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{}) 352 373 if err != nil { 353 374 t.Fatal(err) 354 375 }