fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1syntax = "proto3"; 2 3package grpc.v1; 4 5import "google/protobuf/duration.proto"; 6import "google/protobuf/timestamp.proto"; 7import "query.proto"; 8 9option go_package = "github.com/sourcegraph/zoekt/grpc/v1"; 10 11service WebserverService { 12 rpc Search(SearchRequest) returns (SearchResponse) {} 13 14 rpc StreamSearch(SearchRequest) returns (stream SearchResponse) {} 15 16 // List lists repositories. The query `q` can only contain 17 // query.Repo atoms. 18 rpc List(ListRequest) returns (ListResponse) {} 19} 20 21message SearchRequest { 22 Q query = 1; 23 SearchOptions opts = 2; 24} 25 26message SearchResponse { 27 Stats stats = 1; 28 Progress progress = 2; 29 30 repeated FileMatch files = 3; 31 32 // RepoURLs holds a repo => template string map. 33 map<string, string> repo_urls = 4; 34 35 // FragmentNames holds a repo => template string map, for 36 // the line number fragment. 37 map<string, string> line_fragments = 5; 38} 39 40message SearchOptions { 41 // Return an upper-bound estimate of eligible documents in 42 // stats.ShardFilesConsidered. 43 bool estimate_doc_count = 1; 44 45 // Return the whole file. 46 bool whole = 2; 47 48 // Maximum number of matches: skip all processing an index 49 // shard after we found this many non-overlapping matches. 50 int64 shard_max_match_count = 3; 51 52 // Maximum number of matches: stop looking for more matches 53 // once we have this many matches across shards. 54 int64 total_max_match_count = 4; 55 56 // Maximum number of matches: skip processing documents for a repository in 57 // a shard once we have found ShardRepoMaxMatchCount. 58 // 59 // A compound shard may contain multiple repositories. This will most often 60 // be set to 1 to find all repositories containing a result. 61 int64 shard_repo_max_match_count = 5; 62 63 // Abort the search after this much time has passed. 64 google.protobuf.Duration max_wall_time = 6; 65 66 // FlushWallTime if non-zero will stop streaming behaviour at first and 67 // instead will collate and sort results. At FlushWallTime the results will 68 // be sent and then the behaviour will revert to the normal streaming. 69 google.protobuf.Duration flush_wall_time = 7; 70 71 // Trim the number of results after collating and sorting the 72 // results 73 int64 max_doc_display_count = 8; 74 75 // If set to a number greater than zero then up to this many number 76 // of context lines will be added before and after each matched line. 77 // Note that the included context lines might contain matches and 78 // it's up to the consumer of the result to remove those lines. 79 int64 num_context_lines = 9; 80 81 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 82 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 83 bool chunk_matches = 10; 84 85 // EXPERIMENTAL. If true, document ranks are used as additional input for 86 // sorting matches. 87 bool use_document_ranks = 11; 88 89 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 90 // their weight in the file match score. If the value is <= 0.0, the default weight value 91 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 92 double document_ranks_weight = 12; 93 94 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 95 // a command-line flag 96 bool trace = 13; 97 98 // If set, the search results will contain debug information for scoring. 99 bool debug_score = 14; 100 101 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 102 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 103 // When enabled, all other scoring signals are ignored, including document ranks. 104 bool use_keyword_scoring = 15; 105} 106 107message ListRequest { 108 Q query = 1; 109 ListOptions opts = 2; 110} 111 112message ListOptions { 113 enum RepoListField { 114 REPO_LIST_FIELD_UNKNOWN = 0; 115 REPO_LIST_FIELD_REPOS = 1; 116 REPO_LIST_FIELD_MINIMAL = 2; 117 REPO_LIST_FIELD_REPOS_MAP = 3; 118 } 119 120 // Field decides which field to populate in RepoList response. 121 RepoListField field = 1; 122 123 // Return only Minimal data per repo that Sourcegraph frontend needs. 124 // 125 // Deprecated: use Field 126 bool minimal = 16; 127} 128 129message ListResponse { 130 // Returned when ListOptions.Field is RepoListFieldRepos. 131 repeated RepoListEntry repos = 1; 132 133 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 134 map<uint32, MinimalRepoListEntry> repos_map = 2; 135 136 int64 crashes = 3; 137 138 // Stats response to a List request. 139 // This is the aggregate RepoStats of all repos matching the input query. 140 RepoStats stats = 4; 141 142 // Returned when ListOptions.Field is RepoListFieldMinimal. 143 // 144 // Deprecated: use ReposMap. 145 map<uint32, MinimalRepoListEntry> minimal = 5; 146} 147 148message RepoListEntry { 149 Repository repository = 1; 150 IndexMetadata index_metadata = 2; 151 RepoStats stats = 3; 152} 153 154message Repository { 155 // Sourcegraph's repository ID 156 uint32 id = 1; 157 158 // The repository name 159 string name = 2; 160 161 // The repository URL. 162 string url = 3; 163 164 // The physical source where this repo came from, eg. full 165 // path to the zip filename or git repository directory. This 166 // will not be exposed in the UI, but can be used to detect 167 // orphaned index shards. 168 string source = 4; 169 170 // The branches indexed in this repo. 171 repeated RepositoryBranch branches = 5; 172 173 // Nil if this is not the super project. 174 map<string, Repository> sub_repo_map = 6; 175 176 // URL template to link to the commit of a branch 177 string commit_url_template = 7; 178 179 // The repository URL for getting to a file. Has access to 180 // {{Branch}}, {{Path}} 181 string file_url_template = 8; 182 183 // The URL fragment to add to a file URL for line numbers. has 184 // access to {{LineNumber}}. The fragment should include the 185 // separator, generally '#' or ';'. 186 string line_fragment_template = 9; 187 188 // Perf optimization: priority is set when we load the shard. It corresponds to 189 // the value of "priority" stored in RawConfig. 190 double priority = 10; 191 192 // All zoekt.* configuration settings. 193 map<string, string> raw_config = 11; 194 195 // Importance of the repository, bigger is more important 196 uint32 rank = 12; 197 198 // index_options is a hash of the options used to create the index for the 199 // repo. 200 string index_options = 13; 201 202 // has_symbols is true if this repository has indexed ctags 203 // output. Sourcegraph specific: This field is more appropriate for 204 // IndexMetadata. However, we store it here since the Sourcegraph frontend 205 // can read this structure but not IndexMetadata. 206 bool has_symbols = 14; 207 208 // tombstone is true if we are not allowed to search this repo. 209 bool tombstone = 15; 210 211 // latest_commit_date is the date of the latest commit among all indexed Branches. 212 // The date might be time.Time's 0-value if the repository was last indexed 213 // before this field was added. 214 google.protobuf.Timestamp latest_commit_date = 16; 215 216 // file_tombstones is a set of file paths that should be ignored across all branches 217 // in this shard. 218 repeated string FileTombstones = 17; 219} 220 221message IndexMetadata { 222 int64 index_format_version = 1; 223 int64 index_feature_version = 2; 224 int64 index_min_reader_version = 3; 225 google.protobuf.Timestamp index_time = 4; 226 bool plain_ascii = 5; 227 map<string, uint32> language_map = 6; 228 string zoekt_version = 7; 229 string id = 8; 230} 231 232message MinimalRepoListEntry { 233 bool has_symbols = 1; 234 repeated RepositoryBranch branches = 2; 235} 236 237// RepositoryBranch describes an indexed branch, which is a name 238// combined with a version. 239message RepositoryBranch { 240 string name = 1; 241 string version = 2; 242} 243 244// RepoStats is a collection of statistics for a set of repositories. 245message RepoStats { 246 // repos is used for aggregrating the number of repositories. 247 int64 repos = 1; 248 249 // shards is the total number of search shards. 250 int64 shards = 2; 251 252 // documents holds the number of documents or files. 253 int64 documents = 3; 254 255 // index_bytes is the amount of RAM used for index overhead. 256 int64 index_bytes = 4; 257 258 // content_bytes is the amount of RAM used for raw content. 259 int64 content_bytes = 5; 260 261 // Sourcegraph specific stats below. These are not as efficient to calculate 262 // as the above statistics. We experimentally measured about a 10% slower 263 // shard load time. However, we find these values very useful to track and 264 // computing them outside of load time introduces a lot of complexity. 265 266 // new_lines_count is the number of newlines "\n" that appear in the zoekt 267 // indexed documents. This is not exactly the same as line count, since it 268 // will not include lines not terminated by "\n" (eg a file with no "\n", or 269 // a final line without "\n"). Note: Zoekt deduplicates documents across 270 // branches, so if a path has the same contents on multiple branches, there 271 // is only one document for it. As such that document's newlines is only 272 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 273 // for counts which do not deduplicate. 274 uint64 new_lines_count = 6; 275 276 // default_branch_new_lines_count is the number of newlines "\n" in the default 277 // branch. 278 uint64 default_branch_new_lines_count = 7; 279 280 // other_branches_new_lines_count is the number of newlines "\n" in all branches 281 // except the default branch. 282 uint64 other_branches_new_lines_count = 8; 283} 284 285message Stats { 286 // Amount of I/O for reading contents. 287 int64 content_bytes_loaded = 1; 288 289 // Amount of I/O for reading from index. 290 int64 index_bytes_loaded = 2; 291 292 // Number of search shards that had a crash. 293 int64 crashes = 3; 294 295 // Wall clock time for this search 296 google.protobuf.Duration duration = 4; 297 298 // Number of files containing a match. 299 int64 file_count = 5; 300 301 // Number of files in shards that we considered. 302 int64 shard_files_considered = 6; 303 304 // Files that we evaluated. Equivalent to files for which all 305 // atom matches (including negations) evaluated to true. 306 int64 files_considered = 7; 307 308 // Files for which we loaded file content to verify substring matches 309 int64 files_loaded = 8; 310 311 // Candidate files whose contents weren't examined because we 312 // gathered enough matches. 313 int64 files_skipped = 9; 314 315 // Shards that we scanned to find matches. 316 int64 shards_scanned = 10; 317 318 // Shards that we did not process because a query was canceled. 319 int64 shards_skipped = 11; 320 321 // Shards that we did not process because the query was rejected by the 322 // ngram filter indicating it had no matches. 323 int64 shards_skipped_filter = 12; 324 325 // Number of non-overlapping matches 326 int64 match_count = 13; 327 328 // Number of candidate matches as a result of searching ngrams. 329 int64 ngram_matches = 14; 330 331 // Wall clock time for queued search. 332 google.protobuf.Duration wait = 15; 333 334 // Number of times regexp was called on files that we evaluated. 335 int64 regexps_considered = 16; 336 337 // FlushReason explains why results were flushed. 338 FlushReason flush_reason = 17; 339} 340 341enum FlushReason { 342 UNKNOWN = 0; 343 TIMER_EXPIRED = 1; 344 FINAL_FLUSH = 2; 345 MAX_SIZE = 3; 346} 347 348// Progress contains information about the global progress of the running search query. 349// This is used by the frontend to reorder results and emit them when stable. 350// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 351message Progress { 352 // Priority of the shard that was searched. 353 double priority = 1; 354 355 // max_pending_priority is the maximum priority of pending result that is being searched in parallel. 356 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 357 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 358 // 359 // max_pending_priority decreases monotonically in each SearchResult. 360 double max_pending_priority = 2; 361} 362 363// FileMatch contains all the matches within a file. 364message FileMatch { 365 // Ranking; the higher, the better. 366 double score = 1; 367 368 // For debugging. Needs DebugScore set, but public so tests in 369 // other packages can print some diagnostics. 370 string debug = 2; 371 372 string file_name = 3; 373 374 // Repository is the globally unique name of the repo of the 375 // match 376 string repository = 4; 377 repeated string branches = 5; 378 379 // One of line_matches or chunk_matches will be returned depending on whether 380 // the SearchOptions.ChunkMatches is set. 381 repeated LineMatch line_matches = 6; 382 repeated ChunkMatch chunk_matches = 7; 383 384 // repository_id is a Sourcegraph extension. This is the ID of Repository in 385 // Sourcegraph. 386 uint32 repository_id = 8; 387 388 double repository_priority = 9; 389 390 // Only set if requested 391 bytes content = 10; 392 393 // Checksum of the content. 394 bytes checksum = 11; 395 396 // Detected language of the result. 397 string language = 12; 398 399 // sub_repository_name is the globally unique name of the repo, 400 // if it came from a subrepository 401 string sub_repository_name = 13; 402 403 // sub_repository_path holds the prefix where the subrepository 404 // was mounted. 405 string sub_repository_path = 14; 406 407 // Commit SHA1 (hex) of the (sub)repo holding the file. 408 string version = 15; 409} 410 411message LineMatch { 412 bytes line = 1; 413 int64 line_start = 2; 414 int64 line_end = 3; 415 int64 line_number = 4; 416 417 // before and after are only set when SearchOptions.NumContextLines is > 0 418 bytes before = 5; 419 bytes after = 6; 420 421 // If set, this was a match on the filename. 422 bool file_name = 7; 423 424 // The higher the better. Only ranks the quality of the match 425 // within the file, does not take rank of file into account 426 double score = 8; 427 string debug_score = 9; 428 429 repeated LineFragmentMatch line_fragments = 10; 430} 431 432message LineFragmentMatch { 433 // Offset within the line, in bytes. 434 int64 line_offset = 1; 435 436 // Offset from file start, in bytes. 437 uint32 offset = 2; 438 439 // Number bytes that match. 440 int64 match_length = 3; 441 442 optional SymbolInfo symbol_info = 4; 443} 444 445message SymbolInfo { 446 string sym = 1; 447 string kind = 2; 448 string parent = 3; 449 string parent_kind = 4; 450} 451 452message ChunkMatch { 453 // A contiguous range of complete lines that fully contains Ranges. 454 bytes content = 1; 455 // The location (inclusive) of the beginning of content 456 // relative to the beginning of the file. It will always be at the 457 // beginning of a line (Column will always be 1). 458 Location content_start = 2; 459 460 // True if this match is a match on the file name, in 461 // which case Content will contain the file name. 462 bool file_name = 3; 463 464 // A set of matching ranges within this chunk. Each range is relative 465 // to the beginning of the file (not the beginning of Content). 466 repeated Range ranges = 4; 467 468 // The symbol information associated with Ranges. If it is non-nil, 469 // its length will equal that of Ranges. Any of its elements may be nil. 470 repeated SymbolInfo symbol_info = 5; 471 472 double score = 6; 473 string debug_score = 7; 474} 475 476message Range { 477 // The inclusive beginning of the range. 478 Location start = 1; 479 // The exclusive end of the range. 480 Location end = 2; 481} 482 483message Location { 484 // 0-based byte offset from the beginning of the file 485 uint32 byte_offset = 1; 486 // 1-based line number from the beginning of the file 487 uint32 line_number = 2; 488 // 1-based column number (in runes) from the beginning of line 489 uint32 column = 3; 490}