fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package query 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "regexp/syntax" 22 23 "github.com/grafana/regexp" 24 25 "github.com/sourcegraph/zoekt/internal/languages" 26) 27 28var _ = log.Printf 29 30// parseStringLiteral parses a string literal, consumes the starting 31// quote too. 32func parseStringLiteral(in []byte) (lit []byte, n int, err error) { 33 left := in[1:] 34 found := false 35 36loop: 37 for len(left) > 0 { 38 c := left[0] 39 left = left[1:] 40 switch c { 41 case '"': 42 found = true 43 break loop 44 case '\\': 45 // TODO - other escape sequences. 46 if len(left) == 0 { 47 return nil, 0, fmt.Errorf("query: missing char after \\") 48 } 49 c = left[0] 50 left = left[1:] 51 52 lit = append(lit, c) 53 default: 54 lit = append(lit, c) 55 } 56 } 57 if !found { 58 return nil, 0, fmt.Errorf("query: unterminated quoted string") 59 } 60 return lit, len(in) - len(left), nil 61} 62 63// orOperator is a placeholder intermediate so we can represent [A, 64// or, B] before we convert it to Or{A, B} 65type orOperator struct{} 66 67func (o *orOperator) String() string { 68 return "orOp" 69} 70 71func isSpace(c byte) bool { 72 return c == ' ' || c == '\t' 73} 74 75// Parse parses a string into a query. 76func Parse(qStr string) (Q, error) { 77 b := []byte(qStr) 78 79 qs, _, err := parseExprList(b) 80 if err != nil { 81 return nil, err 82 } 83 84 q, err := parseOperators(qs) 85 if err != nil { 86 return nil, err 87 } 88 89 return Simplify(q), nil 90} 91 92// parseExpr parses a single expression, returning the result, and the 93// number of bytes consumed. 94func parseExpr(in []byte) (Q, int, error) { 95 b := in[:] 96 var expr Q 97 for len(b) > 0 && isSpace(b[0]) { 98 b = b[1:] 99 } 100 101 tok, err := nextToken(b) 102 if err != nil { 103 return nil, 0, err 104 } 105 if tok == nil { 106 return nil, 0, nil 107 } 108 b = b[len(tok.Input):] 109 110 text := string(tok.Text) 111 switch tok.Type { 112 case tokCase: 113 switch text { 114 case "yes": 115 case "no": 116 case "auto": 117 default: 118 return nil, 0, fmt.Errorf("query: unknown case argument %q, want {yes,no,auto}", text) 119 } 120 expr = &caseQ{text} 121 case tokRepo: 122 r, err := regexp.Compile(text) 123 if err != nil { 124 return nil, 0, err 125 } 126 127 expr = &Repo{r} 128 case tokArchived: 129 switch text { 130 case "yes": 131 expr = RawConfig(RcOnlyArchived) 132 case "no": 133 expr = RawConfig(RcNoArchived) 134 default: 135 return nil, 0, fmt.Errorf("query: unknown archived argument %q, want {yes,no}", text) 136 } 137 case tokFork: 138 switch text { 139 case "yes": 140 expr = RawConfig(RcOnlyForks) 141 case "no": 142 expr = RawConfig(RcNoForks) 143 default: 144 return nil, 0, fmt.Errorf("query: unknown fork argument %q, want {yes,no}", text) 145 } 146 case tokPublic: 147 switch text { 148 case "yes": 149 expr = RawConfig(RcOnlyPublic) 150 case "no": 151 expr = RawConfig(RcOnlyPrivate) 152 default: 153 return nil, 0, fmt.Errorf("query: unknown public argument %q, want {yes,no}", text) 154 } 155 case tokBranch: 156 expr = &Branch{Pattern: text} 157 case tokText, tokRegex: 158 q, err := RegexpQuery(text, false, false) 159 if err != nil { 160 return nil, 0, err 161 } 162 expr = q 163 case tokFile: 164 q, err := RegexpQuery(text, false, true) 165 if err != nil { 166 return nil, 0, err 167 } 168 expr = q 169 case tokContent: 170 q, err := RegexpQuery(text, true, false) 171 if err != nil { 172 return nil, 0, err 173 } 174 expr = q 175 case tokLang: 176 canonical, ok := languages.GetLanguageByAlias(text) 177 if !ok { 178 expr = &Const{false} 179 } else { 180 expr = &Language{Language: canonical} 181 } 182 183 case tokSym: 184 if text == "" { 185 return nil, 0, fmt.Errorf("the sym: atom must have an argument") 186 } 187 188 q, err := RegexpQuery(text, false, false) 189 if err != nil { 190 return nil, 0, err 191 } 192 193 expr = &Symbol{q} 194 case tokParenClose: 195 // Caller must consume paren. 196 expr = nil 197 198 case tokParenOpen: 199 qs, n, err := parseExprList(b) 200 b = b[n:] 201 if err != nil { 202 return nil, 0, err 203 } 204 205 pTok, err := nextToken(b) 206 if err != nil { 207 return nil, 0, err 208 } 209 if pTok == nil || pTok.Type != tokParenClose { 210 return nil, 0, fmt.Errorf("query: missing close paren, got token %v", pTok) 211 } 212 213 b = b[len(pTok.Input):] 214 expr, err = parseOperators(qs) 215 if err != nil { 216 return nil, 0, err 217 } 218 case tokNegate: 219 subQ, n, err := parseExpr(b) 220 if err != nil { 221 return nil, 0, err 222 } 223 if subQ == nil { 224 return nil, 0, fmt.Errorf("query: '-' operator needs an argument") 225 } 226 b = b[n:] 227 expr = &Not{subQ} 228 229 case tokType: 230 var t uint8 231 switch text { 232 case "filematch": 233 t = TypeFileMatch 234 case "filename", "file": 235 t = TypeFileName 236 case "repo": 237 t = TypeRepo 238 default: 239 return nil, 0, fmt.Errorf("query: unknown type argument %q, want {filematch,filename,repo}", text) 240 } 241 // Later we will lift this into a root, like we do for caseQ 242 expr = &Type{Type: t, Child: nil} 243 case tokMeta: 244 // Split on ':' to separate field and value 245 parts := bytes.SplitN([]byte(text), []byte(":"), 2) 246 if len(parts) != 2 { 247 return nil, 0, fmt.Errorf("query: invalid meta field syntax %q", text) 248 } 249 field := string(parts[0]) 250 valuePattern := string(parts[1]) 251 re, err := regexp.Compile(valuePattern) 252 if err != nil { 253 return nil, 0, fmt.Errorf("query: invalid regexp in meta value: %v", err) 254 } 255 expr = &Meta{ 256 Field: field, 257 Value: re, 258 } 259 } 260 261 return expr, len(in) - len(b), nil 262} 263 264const regexpFlags syntax.Flags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups 265 266// RegexpQuery parses an atom into either a regular expression, or a 267// simple substring atom. 268func RegexpQuery(text string, content, file bool) (Q, error) { 269 var expr Q 270 271 r, err := syntax.Parse(text, regexpFlags) 272 if err != nil { 273 return nil, err 274 } 275 276 r = OptimizeRegexp(r, regexpFlags) 277 278 if r.Op == syntax.OpLiteral { 279 expr = &Substring{ 280 Pattern: string(r.Rune), 281 FileName: file, 282 Content: content, 283 } 284 } else { 285 expr = &Regexp{ 286 Regexp: r, 287 FileName: file, 288 Content: content, 289 } 290 } 291 292 return expr, nil 293} 294 295// parseOperators interprets the orOperator in a list of queries. 296func parseOperators(in []Q) (Q, error) { 297 top := &Or{} 298 cur := &And{} 299 300 seenOr := false 301 for _, q := range in { 302 if _, ok := q.(*orOperator); ok { 303 seenOr = true 304 if len(cur.Children) == 0 { 305 return nil, fmt.Errorf("query: OR operator should have operand") 306 } 307 top.Children = append(top.Children, cur) 308 cur = &And{} 309 } else { 310 cur.Children = append(cur.Children, q) 311 } 312 } 313 314 if seenOr && len(cur.Children) == 0 { 315 return nil, fmt.Errorf("query: OR operator should have operand") 316 } 317 top.Children = append(top.Children, cur) 318 return top, nil 319} 320 321// parseExprList parses a list of query expressions. It is the 322// workhorse of the Parse function. 323func parseExprList(in []byte) ([]Q, int, error) { 324 b := in[:] 325 var qs []Q 326 for len(b) > 0 { 327 for len(b) > 0 && isSpace(b[0]) { 328 b = b[1:] 329 } 330 tok, _ := nextToken(b) 331 if tok != nil && tok.Type == tokParenClose { 332 break 333 } else if tok != nil && tok.Type == tokOr { 334 qs = append(qs, &orOperator{}) 335 b = b[len(tok.Input):] 336 continue 337 } 338 339 q, n, err := parseExpr(b) 340 if err != nil { 341 return nil, 0, err 342 } 343 344 if q == nil { 345 // eof or a ')' 346 break 347 } 348 qs = append(qs, q) 349 b = b[n:] 350 } 351 352 setCase := "auto" 353 newQS := qs[:0] 354 typeT := uint8(100) 355 for _, q := range qs { 356 switch s := q.(type) { 357 case *caseQ: 358 setCase = s.Flavor 359 case *Type: 360 if s.Type < typeT { 361 typeT = s.Type 362 } 363 default: 364 newQS = append(newQS, q) 365 } 366 } 367 qs = mapQueryList(newQS, func(q Q) Q { 368 if sc, ok := q.(setCaser); ok { 369 sc.setCase(setCase) 370 } 371 return q 372 }) 373 if typeT != 100 { 374 qs = []Q{&Type{Type: typeT, Child: NewAnd(qs...)}} 375 } 376 return qs, len(in) - len(b), nil 377} 378 379type token struct { 380 Type int 381 // The value of the token 382 Text []byte 383 384 // The input that we consumed to form the token. 385 Input []byte 386} 387 388func (t *token) String() string { 389 return fmt.Sprintf("%s:%q", tokNames[t.Type], t.Text) 390} 391 392// token types. 393const ( 394 tokText = 0 395 tokFile = 1 396 tokRepo = 2 397 tokCase = 3 398 tokBranch = 4 399 tokParenOpen = 5 400 tokParenClose = 6 401 tokError = 7 402 tokNegate = 8 403 tokRegex = 9 404 tokOr = 10 405 tokContent = 11 406 tokLang = 12 407 tokSym = 13 408 tokType = 14 409 tokArchived = 15 410 tokPublic = 16 411 tokFork = 17 412 tokMeta = 18 413) 414 415var tokNames = map[int]string{ 416 tokArchived: "Archived", 417 tokBranch: "Branch", 418 tokCase: "Case", 419 tokError: "Error", 420 tokFile: "File", 421 tokFork: "Fork", 422 tokNegate: "Negate", 423 tokOr: "Or", 424 tokParenClose: "ParenClose", 425 tokParenOpen: "ParenOpen", 426 tokPublic: "Public", 427 tokRegex: "Regex", 428 tokRepo: "Repo", 429 tokText: "Text", 430 tokLang: "Language", 431 tokSym: "Symbol", 432 tokType: "Type", 433 tokMeta: "Meta", 434} 435 436var prefixes = map[string]int{ 437 "archived:": tokArchived, 438 "b:": tokBranch, 439 "branch:": tokBranch, 440 "c:": tokContent, 441 "case:": tokCase, 442 "content:": tokContent, 443 "f:": tokFile, 444 "file:": tokFile, 445 "fork:": tokFork, 446 "public:": tokPublic, 447 "r:": tokRepo, 448 "regex:": tokRegex, 449 "repo:": tokRepo, 450 "lang:": tokLang, 451 "sym:": tokSym, 452 "t:": tokType, 453 "type:": tokType, 454 "meta.": tokMeta, 455} 456 457var reservedWords = map[string]int{ 458 "or": tokOr, 459} 460 461func (t *token) setType() { 462 // After we consumed the input, we have to interpret some of the text, 463 // eg. to distinguish between ")" the text and ) the query grouping 464 // parenthesis. 465 if len(t.Text) == 1 && t.Text[0] == '(' { 466 t.Type = tokParenOpen 467 } 468 if len(t.Text) == 1 && t.Text[0] == ')' { 469 t.Type = tokParenClose 470 } 471 472 for w, typ := range reservedWords { 473 if string(t.Text) == w && string(t.Input) == w { 474 t.Type = typ 475 break 476 } 477 } 478 479 for pref, typ := range prefixes { 480 if !bytes.HasPrefix(t.Input, []byte(pref)) { 481 continue 482 } 483 484 t.Text = t.Text[len(pref):] 485 t.Type = typ 486 break 487 } 488} 489 490// nextToken returns the next token from the given input. 491func nextToken(in []byte) (*token, error) { 492 left := in[:] 493 parenCount := 0 494 var cur token 495 if len(left) == 0 { 496 return nil, nil 497 } 498 499 if left[0] == '-' { 500 return &token{ 501 Type: tokNegate, 502 Text: []byte{'-'}, 503 Input: in[:1], 504 }, nil 505 } 506 507 foundSpace := false 508 509loop: 510 for len(left) > 0 { 511 c := left[0] 512 switch c { 513 case '(': 514 parenCount++ 515 cur.Text = append(cur.Text, c) 516 left = left[1:] 517 case ')': 518 if parenCount == 0 { 519 if len(cur.Text) == 0 { 520 cur.Text = []byte{')'} 521 left = left[1:] 522 } 523 break loop 524 } 525 526 cur.Text = append(cur.Text, c) 527 left = left[1:] 528 parenCount-- 529 530 case '"': 531 t, n, err := parseStringLiteral(left) 532 if err != nil { 533 return nil, err 534 } 535 cur.Text = append(cur.Text, t...) 536 left = left[n:] 537 case '\\': 538 left = left[1:] 539 if len(left) == 0 { 540 return nil, fmt.Errorf("query: lone \\ at end") 541 } 542 c := left[0] 543 cur.Text = append(cur.Text, '\\', c) 544 left = left[1:] 545 546 case ' ', '\n', '\t': 547 if parenCount > 0 { 548 foundSpace = true 549 } 550 break loop 551 default: 552 cur.Text = append(cur.Text, c) 553 left = left[1:] 554 } 555 } 556 557 if len(cur.Text) == 0 { 558 return nil, nil 559 } 560 561 if foundSpace && cur.Text[0] == '(' { 562 cur.Text = cur.Text[:1] 563 cur.Input = in[:1] 564 } else { 565 cur.Input = in[:len(in)-len(left)] 566 } 567 cur.setType() 568 return &cur, nil 569}