fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "regexp/syntax" 22 23 "github.com/grafana/regexp" 24 25 "github.com/sourcegraph/zoekt/internal/languages" 26) 27 28var _ = log.Printf 29 30// parseStringLiteral parses a string literal, consumes the starting 31// quote too. 32func parseStringLiteral(in []byte) (lit []byte, n int, err error) { 33 left := in[1:] 34 found := false 35 36loop: 37 for len(left) > 0 { 38 c := left[0] 39 left = left[1:] 40 switch c { 41 case '"': 42 found = true 43 break loop 44 case '\\': 45 // TODO - other escape sequences. 46 if len(left) == 0 { 47 return nil, 0, fmt.Errorf("query: missing char after \\") 48 } 49 c = left[0] 50 left = left[1:] 51 52 lit = append(lit, c) 53 default: 54 lit = append(lit, c) 55 } 56 } 57 if !found { 58 return nil, 0, fmt.Errorf("query: unterminated quoted string") 59 } 60 return lit, len(in) - len(left), nil 61} 62 63// orOperator is a placeholder intermediate so we can represent [A, 64// or, B] before we convert it to Or{A, B} 65type orOperator struct{} 66 67func (o *orOperator) String() string { 68 return "orOp" 69} 70 71func isSpace(c byte) bool { 72 return c == ' ' || c == '\t' 73} 74 75// Parse parses a string into a query. 76func Parse(qStr string) (Q, error) { 77 b := []byte(qStr) 78 79 qs, _, err := parseExprList(b) 80 if err != nil { 81 return nil, err 82 } 83 84 q, err := parseOperators(qs) 85 if err != nil { 86 return nil, err 87 } 88 89 return Simplify(q), nil 90} 91 92// parseExpr parses a single expression, returning the result, and the 93// number of bytes consumed. 94func parseExpr(in []byte) (Q, int, error) { 95 b := in[:] 96 var expr Q 97 for len(b) > 0 && isSpace(b[0]) { 98 b = b[1:] 99 } 100 101 tok, err := nextToken(b) 102 if err != nil { 103 return nil, 0, err 104 } 105 if tok == nil { 106 return nil, 0, nil 107 } 108 b = b[len(tok.Input):] 109 110 text := string(tok.Text) 111 switch tok.Type { 112 case tokCase: 113 switch text { 114 case "yes": 115 case "no": 116 case "auto": 117 default: 118 return nil, 0, fmt.Errorf("query: unknown case argument %q, want {yes,no,auto}", text) 119 } 120 expr = &caseQ{text} 121 case tokRepo: 122 r, err := regexp.Compile(text) 123 if err != nil { 124 return nil, 0, err 125 } 126 127 expr = &Repo{r} 128 case tokArchived: 129 switch text { 130 case "yes": 131 expr = RawConfig(RcOnlyArchived) 132 case "no": 133 expr = RawConfig(RcNoArchived) 134 default: 135 return nil, 0, fmt.Errorf("query: unknown archived argument %q, want {yes,no}", text) 136 } 137 case tokFork: 138 switch text { 139 case "yes": 140 expr = RawConfig(RcOnlyForks) 141 case "no": 142 expr = RawConfig(RcNoForks) 143 default: 144 return nil, 0, fmt.Errorf("query: unknown fork argument %q, want {yes,no}", text) 145 } 146 case tokPublic: 147 switch text { 148 case "yes": 149 expr = RawConfig(RcOnlyPublic) 150 case "no": 151 expr = RawConfig(RcOnlyPrivate) 152 default: 153 return nil, 0, fmt.Errorf("query: unknown public argument %q, want {yes,no}", text) 154 } 155 case tokBranch: 156 expr = &Branch{Pattern: text} 157 case tokText, tokRegex: 158 q, err := RegexpQuery(text, false, false) 159 if err != nil { 160 return nil, 0, err 161 } 162 expr = q 163 case tokFile: 164 q, err := RegexpQuery(text, false, true) 165 if err != nil { 166 return nil, 0, err 167 } 168 expr = q 169 case tokContent: 170 q, err := RegexpQuery(text, true, false) 171 if err != nil { 172 return nil, 0, err 173 } 174 expr = q 175 case tokLang: 176 canonical, ok := languages.GetLanguageByAlias(text) 177 if !ok { 178 expr = &Const{false} 179 } else { 180 expr = &Language{Language: canonical} 181 } 182 183 case tokSym: 184 if text == "" { 185 return nil, 0, fmt.Errorf("the sym: atom must have an argument") 186 } 187 188 q, err := RegexpQuery(text, false, false) 189 if err != nil { 190 return nil, 0, err 191 } 192 193 expr = &Symbol{q} 194 case tokParenClose: 195 // Caller must consume paren. 196 expr = nil 197 198 case tokParenOpen: 199 qs, n, err := parseExprList(b) 200 b = b[n:] 201 if err != nil { 202 return nil, 0, err 203 } 204 205 pTok, err := nextToken(b) 206 if err != nil { 207 return nil, 0, err 208 } 209 if pTok == nil || pTok.Type != tokParenClose { 210 return nil, 0, fmt.Errorf("query: missing close paren, got token %v", pTok) 211 } 212 213 b = b[len(pTok.Input):] 214 expr, err = parseOperators(qs) 215 if err != nil { 216 return nil, 0, err 217 } 218 case tokNegate: 219 subQ, n, err := parseExpr(b) 220 if err != nil { 221 return nil, 0, err 222 } 223 if subQ == nil { 224 return nil, 0, fmt.Errorf("query: '-' operator needs an argument") 225 } 226 b = b[n:] 227 expr = &Not{subQ} 228 229 case tokType: 230 var t uint8 231 switch text { 232 case "filematch": 233 t = TypeFileMatch 234 case "filename", "file": 235 t = TypeFileName 236 case "repo": 237 t = TypeRepo 238 default: 239 return nil, 0, fmt.Errorf("query: unknown type argument %q, want {filematch,filename,repo}", text) 240 } 241 // Later we will lift this into a root, like we do for caseQ 242 expr = &Type{Type: t, Child: nil} 243 } 244 245 return expr, len(in) - len(b), nil 246} 247 248const regexpFlags syntax.Flags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups 249 250// RegexpQuery parses an atom into either a regular expression, or a 251// simple substring atom. 252func RegexpQuery(text string, content, file bool) (Q, error) { 253 var expr Q 254 255 r, err := syntax.Parse(text, regexpFlags) 256 if err != nil { 257 return nil, err 258 } 259 260 r = OptimizeRegexp(r, regexpFlags) 261 262 if r.Op == syntax.OpLiteral { 263 expr = &Substring{ 264 Pattern: string(r.Rune), 265 FileName: file, 266 Content: content, 267 } 268 } else { 269 expr = &Regexp{ 270 Regexp: r, 271 FileName: file, 272 Content: content, 273 } 274 } 275 276 return expr, nil 277} 278 279// parseOperators interprets the orOperator in a list of queries. 280func parseOperators(in []Q) (Q, error) { 281 top := &Or{} 282 cur := &And{} 283 284 seenOr := false 285 for _, q := range in { 286 if _, ok := q.(*orOperator); ok { 287 seenOr = true 288 if len(cur.Children) == 0 { 289 return nil, fmt.Errorf("query: OR operator should have operand") 290 } 291 top.Children = append(top.Children, cur) 292 cur = &And{} 293 } else { 294 cur.Children = append(cur.Children, q) 295 } 296 } 297 298 if seenOr && len(cur.Children) == 0 { 299 return nil, fmt.Errorf("query: OR operator should have operand") 300 } 301 top.Children = append(top.Children, cur) 302 return top, nil 303} 304 305// parseExprList parses a list of query expressions. It is the 306// workhorse of the Parse function. 307func parseExprList(in []byte) ([]Q, int, error) { 308 b := in[:] 309 var qs []Q 310 for len(b) > 0 { 311 for len(b) > 0 && isSpace(b[0]) { 312 b = b[1:] 313 } 314 tok, _ := nextToken(b) 315 if tok != nil && tok.Type == tokParenClose { 316 break 317 } else if tok != nil && tok.Type == tokOr { 318 qs = append(qs, &orOperator{}) 319 b = b[len(tok.Input):] 320 continue 321 } 322 323 q, n, err := parseExpr(b) 324 if err != nil { 325 return nil, 0, err 326 } 327 328 if q == nil { 329 // eof or a ')' 330 break 331 } 332 qs = append(qs, q) 333 b = b[n:] 334 } 335 336 setCase := "auto" 337 newQS := qs[:0] 338 typeT := uint8(100) 339 for _, q := range qs { 340 switch s := q.(type) { 341 case *caseQ: 342 setCase = s.Flavor 343 case *Type: 344 if s.Type < typeT { 345 typeT = s.Type 346 } 347 default: 348 newQS = append(newQS, q) 349 } 350 } 351 qs = mapQueryList(newQS, func(q Q) Q { 352 if sc, ok := q.(setCaser); ok { 353 sc.setCase(setCase) 354 } 355 return q 356 }) 357 if typeT != 100 { 358 qs = []Q{&Type{Type: typeT, Child: NewAnd(qs...)}} 359 } 360 return qs, len(in) - len(b), nil 361} 362 363type token struct { 364 Type int 365 // The value of the token 366 Text []byte 367 368 // The input that we consumed to form the token. 369 Input []byte 370} 371 372func (t *token) String() string { 373 return fmt.Sprintf("%s:%q", tokNames[t.Type], t.Text) 374} 375 376// token types. 377const ( 378 tokText = 0 379 tokFile = 1 380 tokRepo = 2 381 tokCase = 3 382 tokBranch = 4 383 tokParenOpen = 5 384 tokParenClose = 6 385 tokError = 7 386 tokNegate = 8 387 tokRegex = 9 388 tokOr = 10 389 tokContent = 11 390 tokLang = 12 391 tokSym = 13 392 tokType = 14 393 tokArchived = 15 394 tokPublic = 16 395 tokFork = 17 396) 397 398var tokNames = map[int]string{ 399 tokArchived: "Archived", 400 tokBranch: "Branch", 401 tokCase: "Case", 402 tokError: "Error", 403 tokFile: "File", 404 tokFork: "Fork", 405 tokNegate: "Negate", 406 tokOr: "Or", 407 tokParenClose: "ParenClose", 408 tokParenOpen: "ParenOpen", 409 tokPublic: "Public", 410 tokRegex: "Regex", 411 tokRepo: "Repo", 412 tokText: "Text", 413 tokLang: "Language", 414 tokSym: "Symbol", 415 tokType: "Type", 416} 417 418var prefixes = map[string]int{ 419 "archived:": tokArchived, 420 "b:": tokBranch, 421 "branch:": tokBranch, 422 "c:": tokContent, 423 "case:": tokCase, 424 "content:": tokContent, 425 "f:": tokFile, 426 "file:": tokFile, 427 "fork:": tokFork, 428 "public:": tokPublic, 429 "r:": tokRepo, 430 "regex:": tokRegex, 431 "repo:": tokRepo, 432 "lang:": tokLang, 433 "sym:": tokSym, 434 "t:": tokType, 435 "type:": tokType, 436} 437 438var reservedWords = map[string]int{ 439 "or": tokOr, 440} 441 442func (t *token) setType() { 443 // After we consumed the input, we have to interpret some of the text, 444 // eg. to distinguish between ")" the text and ) the query grouping 445 // parenthesis. 446 if len(t.Text) == 1 && t.Text[0] == '(' { 447 t.Type = tokParenOpen 448 } 449 if len(t.Text) == 1 && t.Text[0] == ')' { 450 t.Type = tokParenClose 451 } 452 453 for w, typ := range reservedWords { 454 if string(t.Text) == w && string(t.Input) == w { 455 t.Type = typ 456 break 457 } 458 } 459 460 for pref, typ := range prefixes { 461 if !bytes.HasPrefix(t.Input, []byte(pref)) { 462 continue 463 } 464 465 t.Text = t.Text[len(pref):] 466 t.Type = typ 467 break 468 } 469} 470 471// nextToken returns the next token from the given input. 472func nextToken(in []byte) (*token, error) { 473 left := in[:] 474 parenCount := 0 475 var cur token 476 if len(left) == 0 { 477 return nil, nil 478 } 479 480 if left[0] == '-' { 481 return &token{ 482 Type: tokNegate, 483 Text: []byte{'-'}, 484 Input: in[:1], 485 }, nil 486 } 487 488 foundSpace := false 489 490loop: 491 for len(left) > 0 { 492 c := left[0] 493 switch c { 494 case '(': 495 parenCount++ 496 cur.Text = append(cur.Text, c) 497 left = left[1:] 498 case ')': 499 if parenCount == 0 { 500 if len(cur.Text) == 0 { 501 cur.Text = []byte{')'} 502 left = left[1:] 503 } 504 break loop 505 } 506 507 cur.Text = append(cur.Text, c) 508 left = left[1:] 509 parenCount-- 510 511 case '"': 512 t, n, err := parseStringLiteral(left) 513 if err != nil { 514 return nil, err 515 } 516 cur.Text = append(cur.Text, t...) 517 left = left[n:] 518 case '\\': 519 left = left[1:] 520 if len(left) == 0 { 521 return nil, fmt.Errorf("query: lone \\ at end") 522 } 523 c := left[0] 524 cur.Text = append(cur.Text, '\\', c) 525 left = left[1:] 526 527 case ' ', '\n', '\t': 528 if parenCount > 0 { 529 foundSpace = true 530 } 531 break loop 532 default: 533 cur.Text = append(cur.Text, c) 534 left = left[1:] 535 } 536 } 537 538 if len(cur.Text) == 0 { 539 return nil, nil 540 } 541 542 if foundSpace && cur.Text[0] == '(' { 543 cur.Text = cur.Text[:1] 544 cur.Input = in[:1] 545 } else { 546 cur.Input = in[:len(in)-len(left)] 547 } 548 cur.setType() 549 return &cur, nil 550}