fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Add support for indexing and searching custom fields for repositories (#962)

At GitLab, we encountered limitations when searching within large namespaces
containing thousands of repositories. Specifically, we cannot pass a complete
list of RepoIDs due to size constraints.

This change introduces support for indexing and searching on custom repository
metadata by extending Repository to include an additional Metadata field.

All fields within Repository.Metadata are searchable using a regular
expression evaluator.

This enables more scalable filtering by allowing clients to express regular
expression prefix queries on metadata fields, such as:

traversal_ids:123-456-.*

Or any field really:

haystack:nee.*le

+406 -22
+3
api.go
··· 587 587 // The repository URL. 588 588 URL string 589 589 590 + // Additional metadata about the repository. 591 + Metadata map[string]string 592 + 590 593 // The physical source where this repo came from, eg. full 591 594 // path to the zip filename or git repository directory. This 592 595 // will not be exposed in the UI, but can be used to detect
+14
cmd/zoekt-index/main.go
··· 16 16 package main 17 17 18 18 import ( 19 + "encoding/json" 19 20 "flag" 20 21 "fmt" 21 22 "log" ··· 62 63 func main() { 63 64 cpuProfile := flag.String("cpu_profile", "", "write cpu profile to file") 64 65 ignoreDirs := flag.String("ignore_dirs", ".git,.hg,.svn", "comma separated list of directories to ignore.") 66 + metaFile := flag.String("meta", "", "path to .meta JSON file with repository description") 65 67 flag.Parse() 66 68 67 69 if flag.NArg() == 0 { ··· 96 98 } 97 99 } 98 100 } 101 + 102 + if *metaFile != "" { 103 + // Read and parse the .meta JSON file into opts.RepositoryDescription 104 + data, err := os.ReadFile(*metaFile) 105 + if err != nil { 106 + log.Fatalf("failed to read .meta file %s: %v", *metaFile, err) 107 + } 108 + if err := json.Unmarshal(data, &opts.RepositoryDescription); err != nil { 109 + log.Fatalf("failed to decode .meta file %s: %v", *metaFile, err) 110 + } 111 + } 112 + 99 113 for _, arg := range flag.Args() { 100 114 opts.RepositoryDescription.Source = arg 101 115 if err := indexArg(arg, *opts, ignoreDirMap); err != nil {
+111 -20
grpc/protos/zoekt/webserver/v1/query.pb.go
··· 158 158 // *Q_Not 159 159 // *Q_Branch 160 160 // *Q_Boost 161 + // *Q_Meta 161 162 Query isQ_Query `protobuf_oneof:"query"` 162 163 } 163 164 ··· 326 327 return nil 327 328 } 328 329 330 + func (x *Q) GetMeta() *Meta { 331 + if x, ok := x.GetQuery().(*Q_Meta); ok { 332 + return x.Meta 333 + } 334 + return nil 335 + } 336 + 329 337 type isQ_Query interface { 330 338 isQ_Query() 331 339 } ··· 402 410 Boost *Boost `protobuf:"bytes,18,opt,name=boost,proto3,oneof"` 403 411 } 404 412 413 + type Q_Meta struct { 414 + Meta *Meta `protobuf:"bytes,19,opt,name=meta,proto3,oneof"` 415 + } 416 + 405 417 func (*Q_RawConfig) isQ_Query() {} 406 418 407 419 func (*Q_Regexp) isQ_Query() {} ··· 437 449 func (*Q_Branch) isQ_Query() {} 438 450 439 451 func (*Q_Boost) isQ_Query() {} 452 + 453 + func (*Q_Meta) isQ_Query() {} 440 454 441 455 // RawConfig filters repositories based on their encoded RawConfig map. 442 456 type RawConfig struct { ··· 1385 1399 return 0 1386 1400 } 1387 1401 1402 + // Meta allows filtering results by repo metadata. 1403 + type Meta struct { 1404 + state protoimpl.MessageState 1405 + sizeCache protoimpl.SizeCache 1406 + unknownFields protoimpl.UnknownFields 1407 + 1408 + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` 1409 + Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` 1410 + } 1411 + 1412 + func (x *Meta) Reset() { 1413 + *x = Meta{} 1414 + if protoimpl.UnsafeEnabled { 1415 + mi := &file_zoekt_webserver_v1_query_proto_msgTypes[19] 1416 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 1417 + ms.StoreMessageInfo(mi) 1418 + } 1419 + } 1420 + 1421 + func (x *Meta) String() string { 1422 + return protoimpl.X.MessageStringOf(x) 1423 + } 1424 + 1425 + func (*Meta) ProtoMessage() {} 1426 + 1427 + func (x *Meta) ProtoReflect() protoreflect.Message { 1428 + mi := &file_zoekt_webserver_v1_query_proto_msgTypes[19] 1429 + if protoimpl.UnsafeEnabled && x != nil { 1430 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 1431 + if ms.LoadMessageInfo() == nil { 1432 + ms.StoreMessageInfo(mi) 1433 + } 1434 + return ms 1435 + } 1436 + return mi.MessageOf(x) 1437 + } 1438 + 1439 + // Deprecated: Use Meta.ProtoReflect.Descriptor instead. 1440 + func (*Meta) Descriptor() ([]byte, []int) { 1441 + return file_zoekt_webserver_v1_query_proto_rawDescGZIP(), []int{19} 1442 + } 1443 + 1444 + func (x *Meta) GetKey() string { 1445 + if x != nil { 1446 + return x.Key 1447 + } 1448 + return "" 1449 + } 1450 + 1451 + func (x *Meta) GetValue() string { 1452 + if x != nil { 1453 + return x.Value 1454 + } 1455 + return "" 1456 + } 1457 + 1388 1458 var File_zoekt_webserver_v1_query_proto protoreflect.FileDescriptor 1389 1459 1390 1460 var file_zoekt_webserver_v1_query_proto_rawDesc = []byte{ 1391 1461 0x0a, 0x1e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 1392 1462 0x72, 0x2f, 0x76, 0x31, 0x2f, 0x71, 0x75, 0x65, 0x72, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 1393 1463 0x12, 0x12, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 1394 - 0x72, 0x2e, 0x76, 0x31, 0x22, 0xe2, 0x07, 0x0a, 0x01, 0x51, 0x12, 0x3e, 0x0a, 0x0a, 0x72, 0x61, 1464 + 0x72, 0x2e, 0x76, 0x31, 0x22, 0x92, 0x08, 0x0a, 0x01, 0x51, 0x12, 0x3e, 0x0a, 0x0a, 0x72, 0x61, 1395 1465 0x77, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 1396 1466 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 1397 1467 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x61, 0x77, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, ··· 1453 1523 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x18, 0x12, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x7a, 1454 1524 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 1455 1525 0x31, 0x2e, 0x42, 0x6f, 0x6f, 0x73, 0x74, 0x48, 0x00, 0x52, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 1526 + 0x12, 0x2e, 0x0a, 0x04, 0x6d, 0x65, 0x74, 0x61, 0x18, 0x13, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 1527 + 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 1528 + 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x48, 0x00, 0x52, 0x04, 0x6d, 0x65, 0x74, 0x61, 1456 1529 0x42, 0x07, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x22, 0xef, 0x01, 0x0a, 0x09, 0x52, 0x61, 1457 1530 0x77, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x38, 0x0a, 0x05, 0x66, 0x6c, 0x61, 0x67, 0x73, 1458 1531 0x18, 0x01, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x22, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, ··· 1546 1619 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 1547 1620 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x51, 0x52, 0x05, 0x63, 0x68, 0x69, 1548 1621 0x6c, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 1549 - 0x01, 0x52, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 1622 + 0x01, 0x52, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x22, 0x2e, 0x0a, 0x04, 0x4d, 0x65, 0x74, 0x61, 1623 + 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 1624 + 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 1625 + 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 1550 1626 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 1551 1627 0x70, 0x68, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 1552 1628 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x77, 0x65, 0x62, 0x73, 0x65, ··· 1566 1642 } 1567 1643 1568 1644 var file_zoekt_webserver_v1_query_proto_enumTypes = make([]protoimpl.EnumInfo, 2) 1569 - var file_zoekt_webserver_v1_query_proto_msgTypes = make([]protoimpl.MessageInfo, 20) 1645 + var file_zoekt_webserver_v1_query_proto_msgTypes = make([]protoimpl.MessageInfo, 21) 1570 1646 var file_zoekt_webserver_v1_query_proto_goTypes = []interface{}{ 1571 1647 (RawConfig_Flag)(0), // 0: zoekt.webserver.v1.RawConfig.Flag 1572 1648 (Type_Kind)(0), // 1: zoekt.webserver.v1.Type.Kind ··· 1589 1665 (*Not)(nil), // 18: zoekt.webserver.v1.Not 1590 1666 (*Branch)(nil), // 19: zoekt.webserver.v1.Branch 1591 1667 (*Boost)(nil), // 20: zoekt.webserver.v1.Boost 1592 - nil, // 21: zoekt.webserver.v1.RepoSet.SetEntry 1668 + (*Meta)(nil), // 21: zoekt.webserver.v1.Meta 1669 + nil, // 22: zoekt.webserver.v1.RepoSet.SetEntry 1593 1670 } 1594 1671 var file_zoekt_webserver_v1_query_proto_depIdxs = []int32{ 1595 1672 3, // 0: zoekt.webserver.v1.Q.raw_config:type_name -> zoekt.webserver.v1.RawConfig ··· 1609 1686 18, // 14: zoekt.webserver.v1.Q.not:type_name -> zoekt.webserver.v1.Not 1610 1687 19, // 15: zoekt.webserver.v1.Q.branch:type_name -> zoekt.webserver.v1.Branch 1611 1688 20, // 16: zoekt.webserver.v1.Q.boost:type_name -> zoekt.webserver.v1.Boost 1612 - 0, // 17: zoekt.webserver.v1.RawConfig.flags:type_name -> zoekt.webserver.v1.RawConfig.Flag 1613 - 2, // 18: zoekt.webserver.v1.Symbol.expr:type_name -> zoekt.webserver.v1.Q 1614 - 10, // 19: zoekt.webserver.v1.BranchesRepos.list:type_name -> zoekt.webserver.v1.BranchRepos 1615 - 21, // 20: zoekt.webserver.v1.RepoSet.set:type_name -> zoekt.webserver.v1.RepoSet.SetEntry 1616 - 2, // 21: zoekt.webserver.v1.Type.child:type_name -> zoekt.webserver.v1.Q 1617 - 1, // 22: zoekt.webserver.v1.Type.type:type_name -> zoekt.webserver.v1.Type.Kind 1618 - 2, // 23: zoekt.webserver.v1.And.children:type_name -> zoekt.webserver.v1.Q 1619 - 2, // 24: zoekt.webserver.v1.Or.children:type_name -> zoekt.webserver.v1.Q 1620 - 2, // 25: zoekt.webserver.v1.Not.child:type_name -> zoekt.webserver.v1.Q 1621 - 2, // 26: zoekt.webserver.v1.Boost.child:type_name -> zoekt.webserver.v1.Q 1622 - 27, // [27:27] is the sub-list for method output_type 1623 - 27, // [27:27] is the sub-list for method input_type 1624 - 27, // [27:27] is the sub-list for extension type_name 1625 - 27, // [27:27] is the sub-list for extension extendee 1626 - 0, // [0:27] is the sub-list for field type_name 1689 + 21, // 17: zoekt.webserver.v1.Q.meta:type_name -> zoekt.webserver.v1.Meta 1690 + 0, // 18: zoekt.webserver.v1.RawConfig.flags:type_name -> zoekt.webserver.v1.RawConfig.Flag 1691 + 2, // 19: zoekt.webserver.v1.Symbol.expr:type_name -> zoekt.webserver.v1.Q 1692 + 10, // 20: zoekt.webserver.v1.BranchesRepos.list:type_name -> zoekt.webserver.v1.BranchRepos 1693 + 22, // 21: zoekt.webserver.v1.RepoSet.set:type_name -> zoekt.webserver.v1.RepoSet.SetEntry 1694 + 2, // 22: zoekt.webserver.v1.Type.child:type_name -> zoekt.webserver.v1.Q 1695 + 1, // 23: zoekt.webserver.v1.Type.type:type_name -> zoekt.webserver.v1.Type.Kind 1696 + 2, // 24: zoekt.webserver.v1.And.children:type_name -> zoekt.webserver.v1.Q 1697 + 2, // 25: zoekt.webserver.v1.Or.children:type_name -> zoekt.webserver.v1.Q 1698 + 2, // 26: zoekt.webserver.v1.Not.child:type_name -> zoekt.webserver.v1.Q 1699 + 2, // 27: zoekt.webserver.v1.Boost.child:type_name -> zoekt.webserver.v1.Q 1700 + 28, // [28:28] is the sub-list for method output_type 1701 + 28, // [28:28] is the sub-list for method input_type 1702 + 28, // [28:28] is the sub-list for extension type_name 1703 + 28, // [28:28] is the sub-list for extension extendee 1704 + 0, // [0:28] is the sub-list for field type_name 1627 1705 } 1628 1706 1629 1707 func init() { file_zoekt_webserver_v1_query_proto_init() } ··· 1860 1938 return nil 1861 1939 } 1862 1940 } 1941 + file_zoekt_webserver_v1_query_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { 1942 + switch v := v.(*Meta); i { 1943 + case 0: 1944 + return &v.state 1945 + case 1: 1946 + return &v.sizeCache 1947 + case 2: 1948 + return &v.unknownFields 1949 + default: 1950 + return nil 1951 + } 1952 + } 1863 1953 } 1864 1954 file_zoekt_webserver_v1_query_proto_msgTypes[0].OneofWrappers = []interface{}{ 1865 1955 (*Q_RawConfig)(nil), ··· 1880 1970 (*Q_Not)(nil), 1881 1971 (*Q_Branch)(nil), 1882 1972 (*Q_Boost)(nil), 1973 + (*Q_Meta)(nil), 1883 1974 } 1884 1975 type x struct{} 1885 1976 out := protoimpl.TypeBuilder{ ··· 1887 1978 GoPackagePath: reflect.TypeOf(x{}).PkgPath(), 1888 1979 RawDescriptor: file_zoekt_webserver_v1_query_proto_rawDesc, 1889 1980 NumEnums: 2, 1890 - NumMessages: 20, 1981 + NumMessages: 21, 1891 1982 NumExtensions: 0, 1892 1983 NumServices: 0, 1893 1984 },
+7
grpc/protos/zoekt/webserver/v1/query.proto
··· 24 24 Not not = 16; 25 25 Branch branch = 17; 26 26 Boost boost = 18; 27 + Meta meta = 19; 27 28 } 28 29 } 29 30 ··· 148 149 Q child = 1; 149 150 double boost = 2; 150 151 } 152 + 153 + // Meta allows filtering results by repo metadata. 154 + message Meta { 155 + string key = 1; 156 + string value = 2; 157 + }
+2
index/builder.go
··· 715 715 716 716 repository.LatestCommitDate = b.opts.RepositoryDescription.LatestCommitDate 717 717 718 + repository.Metadata = b.opts.RepositoryDescription.Metadata 719 + 718 720 tempPath, finalPath, err := JsonMarshalRepoMetaTemp(shard, repository) 719 721 if err != nil { 720 722 return fmt.Errorf("writing repository metadta for shard %q: %w", shard, err)
+3 -2
index/builder_test.go
··· 31 31 opts := Options{ 32 32 IndexDir: dir, 33 33 RepositoryDescription: zoekt.Repository{ 34 - Name: "repo", 35 - Source: "./testdata/repo/", 34 + Name: "repo", 35 + Source: "./testdata/repo/", 36 + Metadata: map[string]string{"foo": "bar"}, 36 37 }, 37 38 DisableCTags: true, 38 39 }
+11
index/eval.go
··· 118 118 if !has { 119 119 return &query.Const{Value: false} 120 120 } 121 + case *query.Meta: 122 + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { 123 + if repo.Metadata == nil { 124 + return false 125 + } 126 + v, ok := repo.Metadata[r.Field] 127 + if !ok { 128 + return false 129 + } 130 + return r.Value.MatchString(v) 131 + }) 121 132 } 122 133 return q 123 134 })
+25
index/eval_test.go
··· 463 463 } 464 464 } 465 465 } 466 + 467 + func TestSimplifyMeta(t *testing.T) { 468 + re := regexp.MustCompile("^stable$") 469 + d := compoundReposShard(t, "foo", "bar") 470 + 471 + // Inject metadata into the fake repos 472 + d.repoMetaData[0].Metadata = map[string]string{"release": "stable"} 473 + d.repoMetaData[1].Metadata = map[string]string{"release": "beta"} 474 + 475 + all := &query.Meta{Field: "release", Value: regexp.MustCompile(".*")} 476 + some := &query.Meta{Field: "release", Value: re} 477 + none := &query.Meta{Field: "release", Value: regexp.MustCompile("^nonexistent$")} 478 + 479 + if got := d.simplify(all); !reflect.DeepEqual(got, &query.Const{Value: true}) { 480 + t.Errorf("simplify(all): got %v, want Const(true)", got) 481 + } 482 + 483 + if got := d.simplify(some); got != some { 484 + t.Errorf("simplify(some): got %v, want unchanged", got) 485 + } 486 + 487 + if got := d.simplify(none); !reflect.DeepEqual(got, &query.Const{Value: false}) { 488 + t.Errorf("simplify(none): got %v, want Const(false)", got) 489 + } 490 + }
+33
index/index_test.go
··· 2143 2143 } 2144 2144 } 2145 2145 2146 + func TestRepoWithMetadata(t *testing.T) { 2147 + sb := newShardBuilder() 2148 + sb.repoList = []zoekt.Repository{ 2149 + { 2150 + Name: "repo1", 2151 + Metadata: map[string]string{"language": "go", "custom_key": "value"}, 2152 + }, 2153 + } 2154 + 2155 + var buf bytes.Buffer 2156 + if err := sb.Write(&buf); err != nil { 2157 + t.Fatalf("failed to write shard: %v", err) 2158 + } 2159 + 2160 + // Simulate reading the shard back 2161 + f := &memSeeker{buf.Bytes()} 2162 + repoMetaData, _, err := ReadMetadata(f) 2163 + if err != nil { 2164 + t.Fatalf("failed to read metadata: %v", err) 2165 + } 2166 + 2167 + // Verify the metadata 2168 + if len(repoMetaData) != 1 { 2169 + t.Fatalf("expected 1 repository, got %d", len(repoMetaData)) 2170 + } 2171 + if got, want := repoMetaData[0].Metadata["language"], "go"; got != want { 2172 + t.Errorf("expected metadata 'language' to be %q, got %q", want, got) 2173 + } 2174 + if got, want := repoMetaData[0].Metadata["custom_key"], "value"; got != want { 2175 + t.Errorf("expected metadata 'custom_key' to be %q, got %q", want, got) 2176 + } 2177 + } 2178 + 2146 2179 func TestOr(t *testing.T) { 2147 2180 b := testShardBuilder(t, nil, 2148 2181 Document{Name: "f1", Content: []byte("needle")},
+22
index/matchtree.go
··· 1053 1053 boost: s.Boost, 1054 1054 }, nil 1055 1055 1056 + case *query.Meta: 1057 + reposWant := make([]bool, len(d.repoMetaData)) 1058 + for repoIdx, r := range d.repoMetaData { 1059 + if r.Metadata != nil { 1060 + if val, ok := r.Metadata[s.Field]; ok && s.Value.MatchString(val) { 1061 + reposWant[repoIdx] = true 1062 + } 1063 + } 1064 + } 1065 + 1066 + return &docMatchTree{ 1067 + reason: "Meta", 1068 + numDocs: d.numDocs(), 1069 + predicate: func(docID uint32) bool { 1070 + repoIdx := d.repos[docID] 1071 + if int(repoIdx) >= len(reposWant) { 1072 + return false 1073 + } 1074 + return reposWant[repoIdx] 1075 + }, 1076 + }, nil 1077 + 1056 1078 case *query.Substring: 1057 1079 return d.newSubstringMatchTree(s) 1058 1080
+39
index/matchtree_test.go
··· 423 423 } 424 424 } 425 425 } 426 + 427 + func TestMetaQueryMatchTree(t *testing.T) { 428 + d := &indexData{ 429 + repoMetaData: []zoekt.Repository{ 430 + {Name: "r0", Metadata: map[string]string{"license": "Apache-2.0"}}, 431 + {Name: "r1", Metadata: map[string]string{"license": "MIT"}}, 432 + {Name: "r2"}, // no metadata 433 + {Name: "r3", Metadata: map[string]string{"haystack": "needle"}}, 434 + {Name: "r4", Metadata: map[string]string{"note": "test"}}, 435 + }, 436 + fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs 437 + repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos 438 + } 439 + 440 + q := &query.Meta{ 441 + Field: "license", 442 + Value: regexp.MustCompile("M.T"), 443 + } 444 + 445 + mt, err := d.newMatchTree(q, matchTreeOpt{}) 446 + if err != nil { 447 + t.Fatalf("failed to build matchTree: %v", err) 448 + } 449 + 450 + var matched []uint32 451 + for { 452 + doc := mt.nextDoc() 453 + if doc == math.MaxUint32 { 454 + break 455 + } 456 + matched = append(matched, doc) 457 + mt.prepare(doc) 458 + } 459 + 460 + want := []uint32{1} // only doc from r1 should match 461 + if !reflect.DeepEqual(matched, want) { 462 + t.Errorf("meta match failed: got %v, want %v", matched, want) 463 + } 464 + }
+6
index/merge.go
··· 116 116 } 117 117 lastRepoID = repoID 118 118 119 + // Initialize repo metadata if it does not already exist. 120 + repo := d.repoMetaData[repoID] 121 + if repo.Metadata == nil { 122 + repo.Metadata = make(map[string]string) 123 + } 124 + 119 125 // TODO we are losing empty repos on merging since we only get here if 120 126 // there is an associated document. 121 127
+19
query/parse.go
··· 240 240 } 241 241 // Later we will lift this into a root, like we do for caseQ 242 242 expr = &Type{Type: t, Child: nil} 243 + case tokMeta: 244 + // Split on ':' to separate field and value 245 + parts := bytes.SplitN([]byte(text), []byte(":"), 2) 246 + if len(parts) != 2 { 247 + return nil, 0, fmt.Errorf("query: invalid meta field syntax %q", text) 248 + } 249 + field := string(parts[0]) 250 + valuePattern := string(parts[1]) 251 + re, err := regexp.Compile(valuePattern) 252 + if err != nil { 253 + return nil, 0, fmt.Errorf("query: invalid regexp in meta value: %v", err) 254 + } 255 + expr = &Meta{ 256 + Field: field, 257 + Value: re, 258 + } 243 259 } 244 260 245 261 return expr, len(in) - len(b), nil ··· 393 409 tokArchived = 15 394 410 tokPublic = 16 395 411 tokFork = 17 412 + tokMeta = 18 396 413 ) 397 414 398 415 var tokNames = map[int]string{ ··· 413 430 tokLang: "Language", 414 431 tokSym: "Symbol", 415 432 tokType: "Type", 433 + tokMeta: "Meta", 416 434 } 417 435 418 436 var prefixes = map[string]int{ ··· 433 451 "sym:": tokSym, 434 452 "t:": tokType, 435 453 "type:": tokType, 454 + "meta.": tokMeta, 436 455 } 437 456 438 457 var reservedWords = map[string]int{
+69
query/parse_test.go
··· 179 179 } 180 180 } 181 181 } 182 + 183 + func TestMetaQueryParsing(t *testing.T) { 184 + cases := []struct { 185 + input string 186 + field string 187 + pattern string 188 + err bool 189 + }{ 190 + { 191 + input: "meta.visibility_level:20", 192 + field: "visibility_level", 193 + pattern: "20", 194 + err: false, 195 + }, 196 + { 197 + input: "meta.needle:ha.*stack", 198 + field: "needle", 199 + pattern: "ha.*stack", 200 + err: false, 201 + }, 202 + { 203 + input: "meta.public:true", 204 + field: "public", 205 + pattern: "true", 206 + err: false, 207 + }, 208 + { 209 + input: "meta.language:go", 210 + field: "language", 211 + pattern: "go", 212 + err: false, 213 + }, 214 + { 215 + input: "meta.invalid_field:(", 216 + field: "invalid_field", 217 + pattern: "(", 218 + err: true, 219 + }, 220 + } 221 + 222 + for _, c := range cases { 223 + t.Run(c.input, func(t *testing.T) { 224 + q, err := Parse(c.input) 225 + if c.err { 226 + if err == nil { 227 + t.Errorf("expected error, got nil") 228 + } 229 + return 230 + } 231 + 232 + if err != nil { 233 + t.Errorf("unexpected error: %v", err) 234 + } 235 + 236 + meta, ok := q.(*Meta) 237 + if !ok || meta == nil { 238 + t.Errorf("expected *Meta, got %T", q) 239 + return 240 + } 241 + 242 + if meta.Field != c.field { 243 + t.Errorf("expected field %q, got %q", c.field, meta.Field) 244 + } 245 + if meta.Value == nil || meta.Value.String() != c.pattern { 246 + t.Errorf("expected pattern %q, got %v", c.pattern, meta.Value) 247 + } 248 + }) 249 + } 250 + }
+11
query/query.go
··· 509 509 return fmt.Sprintf("branch:%q", q.Pattern) 510 510 } 511 511 512 + // Meta represents a query for metadata fields. 513 + type Meta struct { 514 + Field string // The metadata field name 515 + Value *regexp.Regexp // The value to match 516 + } 517 + 518 + // String returns a string representation of the Meta query. 519 + func (m *Meta) String() string { 520 + return fmt.Sprintf("meta.%s:%s", m.Field, m.Value) 521 + } 522 + 512 523 func queryChildren(q Q) []Q { 513 524 switch s := q.(type) { 514 525 case *And:
+13
query/query_proto.go
··· 93 93 return BranchFromProto(v.Branch), nil 94 94 case *webserverv1.Q_Boost: 95 95 return BoostFromProto(v.Boost) 96 + case *webserverv1.Q_Meta: 97 + return MetaFromProto(v.Meta) 96 98 default: 97 99 panic(fmt.Sprintf("unknown query node %T", p.Query)) 98 100 } ··· 372 374 return &Boost{ 373 375 Child: child, 374 376 Boost: p.GetBoost(), 377 + }, nil 378 + } 379 + 380 + func MetaFromProto(p *webserverv1.Meta) (*Meta, error) { 381 + re, err := regexp.Compile(p.GetValue()) 382 + if err != nil { 383 + return nil, fmt.Errorf("invalid regexp in Meta.Value: %w", err) 384 + } 385 + return &Meta{ 386 + Field: p.GetKey(), 387 + Value: re, 375 388 }, nil 376 389 } 377 390
+10
query/query_test.go
··· 178 178 }) 179 179 } 180 180 } 181 + 182 + func TestMetaSimplify(t *testing.T) { 183 + re := regexp.MustCompile("v1") 184 + q := &Meta{Field: "version", Value: re} 185 + 186 + simplified := q.Simplify() 187 + if !reflect.DeepEqual(q, simplified) { 188 + t.Errorf("Meta.Simplify() = %v, want %v", simplified, q) 189 + } 190 + }
testdata/shards/repo2_v16.00000.zoekt

This is a binary file and will not be displayed.

testdata/shards/repo_v16.00000.zoekt

This is a binary file and will not be displayed.

+8
web/trace.go
··· 26 26 q query.Q, 27 27 opts *zoekt.SearchOptions, 28 28 ) (*zoekt.SearchResult, error) { 29 + if opts == nil { 30 + opts = &zoekt.SearchOptions{} 31 + } 32 + 29 33 ctx = trace.WithOpenTracingEnabled(ctx, opts.Trace) 30 34 spanContext := trace.SpanContextFromContext(ctx) 31 35 if opts.Trace && spanContext != nil { ··· 42 46 opts *zoekt.SearchOptions, 43 47 sender zoekt.Sender, 44 48 ) error { 49 + if opts == nil { 50 + opts = &zoekt.SearchOptions{} 51 + } 52 + 45 53 ctx = trace.WithOpenTracingEnabled(ctx, opts.Trace) 46 54 spanContext := trace.SpanContextFromContext(ctx) 47 55 if opts.Trace && spanContext != nil {