grpc/v1/webserver.proto at ae9c94df0be52d0089fccfe138d80552b1b42676 · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / grpc / v1 / webserver.proto
at ae9c94df0be52d0089fccfe138d80552b1b42676 15 kB View raw
Camden Cheek Fix bazel build (#590) 3y ago
  1syntax = "proto3";
  2
  3package grpc.v1;
  4
  5import "google/protobuf/duration.proto";
  6import "google/protobuf/timestamp.proto";
  7import "query.proto";
  8
  9option go_package = "github.com/sourcegraph/zoekt/grpc/v1";
 10
 11service WebserverService {
 12  rpc Search(SearchRequest) returns (SearchResponse) {}
 13
 14  rpc StreamSearch(SearchRequest) returns (stream SearchResponse) {}
 15
 16  // List lists repositories. The query `q` can only contain
 17  // query.Repo atoms.
 18  rpc List(ListRequest) returns (ListResponse) {}
 19}
 20
 21message SearchRequest {
 22  Q query = 1;
 23  SearchOptions opts = 2;
 24}
 25
 26message SearchResponse {
 27  Stats stats = 1;
 28  Progress progress = 2;
 29
 30  repeated FileMatch files = 3;
 31
 32  // RepoURLs holds a repo => template string map.
 33  map<string, string> repo_urls = 4;
 34
 35  // FragmentNames holds a repo => template string map, for
 36  // the line number fragment.
 37  map<string, string> line_fragments = 5;
 38}
 39
 40message SearchOptions {
 41  // Return an upper-bound estimate of eligible documents in
 42  // stats.ShardFilesConsidered.
 43  bool estimate_doc_count = 1;
 44
 45  // Return the whole file.
 46  bool whole = 2;
 47
 48  // Maximum number of matches: skip all processing an index
 49  // shard after we found this many non-overlapping matches.
 50  int64 shard_max_match_count = 3;
 51
 52  // Maximum number of matches: stop looking for more matches
 53  // once we have this many matches across shards.
 54  int64 total_max_match_count = 4;
 55
 56  // Maximum number of matches: skip processing documents for a repository in
 57  // a shard once we have found ShardRepoMaxMatchCount.
 58  //
 59  // A compound shard may contain multiple repositories. This will most often
 60  // be set to 1 to find all repositories containing a result.
 61  int64 shard_repo_max_match_count = 5;
 62
 63  // Abort the search after this much time has passed.
 64  google.protobuf.Duration max_wall_time = 6;
 65
 66  // FlushWallTime if non-zero will stop streaming behaviour at first and
 67  // instead will collate and sort results. At FlushWallTime the results will
 68  // be sent and then the behaviour will revert to the normal streaming.
 69  google.protobuf.Duration flush_wall_time = 7;
 70
 71  // Trim the number of results after collating and sorting the
 72  // results
 73  int64 max_doc_display_count = 8;
 74
 75  // If set to a number greater than zero then up to this many number
 76  // of context lines will be added before and after each matched line.
 77  // Note that the included context lines might contain matches and
 78  // it's up to the consumer of the result to remove those lines.
 79  int64 num_context_lines = 9;
 80
 81  // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches
 82  // EXPERIMENTAL: the behavior of this flag may be changed in future versions.
 83  bool chunk_matches = 10;
 84
 85  // EXPERIMENTAL. If true, document ranks are used as additional input for
 86  // sorting matches.
 87  bool use_document_ranks = 11;
 88
 89  // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust
 90  // their weight in the file match score. If the value is <= 0.0, the default weight value
 91  // will be used. This option is temporary and is only exposed for testing/ tuning purposes.
 92  double document_ranks_weight = 12;
 93
 94  // Trace turns on opentracing for this request if true and if the Jaeger address was provided as
 95  // a command-line flag
 96  bool trace = 13;
 97
 98  // If set, the search results will contain debug information for scoring.
 99  bool debug_score = 14;
100
101  // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
102  // Currently, this treats each match in a file as a term and computes an approximation to BM25.
103  // When enabled, all other scoring signals are ignored, including document ranks.
104  bool use_keyword_scoring = 15;
105}
106
107message ListRequest {
108  Q query = 1;
109  ListOptions opts = 2;
110}
111
112message ListOptions {
113  enum RepoListField {
114    REPO_LIST_FIELD_UNKNOWN = 0;
115    REPO_LIST_FIELD_REPOS = 1;
116    REPO_LIST_FIELD_MINIMAL = 2;
117    REPO_LIST_FIELD_REPOS_MAP = 3;
118  }
119
120  // Field decides which field to populate in RepoList response.
121  RepoListField field = 1;
122
123  // Return only Minimal data per repo that Sourcegraph frontend needs.
124  //
125  // Deprecated: use Field
126  bool minimal = 16;
127}
128
129message ListResponse {
130  // Returned when ListOptions.Field is RepoListFieldRepos.
131  repeated RepoListEntry repos = 1;
132
133  // ReposMap is set when ListOptions.Field is RepoListFieldReposMap.
134  map<uint32, MinimalRepoListEntry> repos_map = 2;
135
136  int64 crashes = 3;
137
138  // Stats response to a List request.
139  // This is the aggregate RepoStats of all repos matching the input query.
140  RepoStats stats = 4;
141
142  // Returned when ListOptions.Field is RepoListFieldMinimal.
143  //
144  // Deprecated: use ReposMap.
145  map<uint32, MinimalRepoListEntry> minimal = 5;
146}
147
148message RepoListEntry {
149  Repository repository = 1;
150  IndexMetadata index_metadata = 2;
151  RepoStats stats = 3;
152}
153
154message Repository {
155  // Sourcegraph's repository ID
156  uint32 id = 1;
157
158  // The repository name
159  string name = 2;
160
161  // The repository URL.
162  string url = 3;
163
164  // The physical source where this repo came from, eg. full
165  // path to the zip filename or git repository directory. This
166  // will not be exposed in the UI, but can be used to detect
167  // orphaned index shards.
168  string source = 4;
169
170  // The branches indexed in this repo.
171  repeated RepositoryBranch branches = 5;
172
173  // Nil if this is not the super project.
174  map<string, Repository> sub_repo_map = 6;
175
176  // URL template to link to the commit of a branch
177  string commit_url_template = 7;
178
179  // The repository URL for getting to a file.  Has access to
180  // {{Branch}}, {{Path}}
181  string file_url_template = 8;
182
183  // The URL fragment to add to a file URL for line numbers. has
184  // access to {{LineNumber}}. The fragment should include the
185  // separator, generally '#' or ';'.
186  string line_fragment_template = 9;
187
188  // Perf optimization: priority is set when we load the shard. It corresponds to
189  // the value of "priority" stored in RawConfig.
190  double priority = 10;
191
192  // All zoekt.* configuration settings.
193  map<string, string> raw_config = 11;
194
195  // Importance of the repository, bigger is more important
196  uint32 rank = 12;
197
198  // index_options is a hash of the options used to create the index for the
199  // repo.
200  string index_options = 13;
201
202  // has_symbols is true if this repository has indexed ctags
203  // output. Sourcegraph specific: This field is more appropriate for
204  // IndexMetadata. However, we store it here since the Sourcegraph frontend
205  // can read this structure but not IndexMetadata.
206  bool has_symbols = 14;
207
208  // tombstone is true if we are not allowed to search this repo.
209  bool tombstone = 15;
210
211  // latest_commit_date is the date of the latest commit among all indexed Branches.
212  // The date might be time.Time's 0-value if the repository was last indexed
213  // before this field was added.
214  google.protobuf.Timestamp latest_commit_date = 16;
215
216  // file_tombstones is a set of file paths that should be ignored across all branches
217  // in this shard.
218  repeated string FileTombstones = 17;
219}
220
221message IndexMetadata {
222  int64 index_format_version = 1;
223  int64 index_feature_version = 2;
224  int64 index_min_reader_version = 3;
225  google.protobuf.Timestamp index_time = 4;
226  bool plain_ascii = 5;
227  map<string, uint32> language_map = 6;
228  string zoekt_version = 7;
229  string id = 8;
230}
231
232message MinimalRepoListEntry {
233  bool has_symbols = 1;
234  repeated RepositoryBranch branches = 2;
235}
236
237// RepositoryBranch describes an indexed branch, which is a name
238// combined with a version.
239message RepositoryBranch {
240  string name = 1;
241  string version = 2;
242}
243
244// RepoStats is a collection of statistics for a set of repositories.
245message RepoStats {
246  // repos is used for aggregrating the number of repositories.
247  int64 repos = 1;
248
249  // shards is the total number of search shards.
250  int64 shards = 2;
251
252  // documents holds the number of documents or files.
253  int64 documents = 3;
254
255  // index_bytes is the amount of RAM used for index overhead.
256  int64 index_bytes = 4;
257
258  // content_bytes is the amount of RAM used for raw content.
259  int64 content_bytes = 5;
260
261  // Sourcegraph specific stats below. These are not as efficient to calculate
262  // as the above statistics. We experimentally measured about a 10% slower
263  // shard load time. However, we find these values very useful to track and
264  // computing them outside of load time introduces a lot of complexity.
265
266  // new_lines_count is the number of newlines "\n" that appear in the zoekt
267  // indexed documents. This is not exactly the same as line count, since it
268  // will not include lines not terminated by "\n" (eg a file with no "\n", or
269  // a final line without "\n"). Note: Zoekt deduplicates documents across
270  // branches, so if a path has the same contents on multiple branches, there
271  // is only one document for it. As such that document's newlines is only
272  // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount
273  // for counts which do not deduplicate.
274  uint64 new_lines_count = 6;
275
276  // default_branch_new_lines_count is the number of newlines "\n" in the default
277  // branch.
278  uint64 default_branch_new_lines_count = 7;
279
280  // other_branches_new_lines_count is the number of newlines "\n" in all branches
281  // except the default branch.
282  uint64 other_branches_new_lines_count = 8;
283}
284
285message Stats {
286  // Amount of I/O for reading contents.
287  int64 content_bytes_loaded = 1;
288
289  // Amount of I/O for reading from index.
290  int64 index_bytes_loaded = 2;
291
292  // Number of search shards that had a crash.
293  int64 crashes = 3;
294
295  // Wall clock time for this search
296  google.protobuf.Duration duration = 4;
297
298  // Number of files containing a match.
299  int64 file_count = 5;
300
301  // Number of files in shards that we considered.
302  int64 shard_files_considered = 6;
303
304  // Files that we evaluated. Equivalent to files for which all
305  // atom matches (including negations) evaluated to true.
306  int64 files_considered = 7;
307
308  // Files for which we loaded file content to verify substring matches
309  int64 files_loaded = 8;
310
311  // Candidate files whose contents weren't examined because we
312  // gathered enough matches.
313  int64 files_skipped = 9;
314
315  // Shards that we scanned to find matches.
316  int64 shards_scanned = 10;
317
318  // Shards that we did not process because a query was canceled.
319  int64 shards_skipped = 11;
320
321  // Shards that we did not process because the query was rejected by the
322  // ngram filter indicating it had no matches.
323  int64 shards_skipped_filter = 12;
324
325  // Number of non-overlapping matches
326  int64 match_count = 13;
327
328  // Number of candidate matches as a result of searching ngrams.
329  int64 ngram_matches = 14;
330
331  // Wall clock time for queued search.
332  google.protobuf.Duration wait = 15;
333
334  // Number of times regexp was called on files that we evaluated.
335  int64 regexps_considered = 16;
336
337  // FlushReason explains why results were flushed.
338  FlushReason flush_reason = 17;
339}
340
341enum FlushReason {
342  UNKNOWN = 0;
343  TIMER_EXPIRED = 1;
344  FINAL_FLUSH = 2;
345  MAX_SIZE = 3;
346}
347
348// Progress contains information about the global progress of the running search query.
349// This is used by the frontend to reorder results and emit them when stable.
350// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances.
351message Progress {
352  // Priority of the shard that was searched.
353  double priority = 1;
354
355  // max_pending_priority is the maximum priority of pending result that is being searched in parallel.
356  // This is used to reorder results when the result set is known to be stable-- that is, when a result's
357  // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user.
358  //
359  // max_pending_priority decreases monotonically in each SearchResult.
360  double max_pending_priority = 2;
361}
362
363// FileMatch contains all the matches within a file.
364message FileMatch {
365  // Ranking; the higher, the better.
366  double score = 1;
367
368  // For debugging. Needs DebugScore set, but public so tests in
369  // other packages can print some diagnostics.
370  string debug = 2;
371
372  string file_name = 3;
373
374  // Repository is the globally unique name of the repo of the
375  // match
376  string repository = 4;
377  repeated string branches = 5;
378
379  // One of line_matches or chunk_matches will be returned depending on whether
380  // the SearchOptions.ChunkMatches is set.
381  repeated LineMatch line_matches = 6;
382  repeated ChunkMatch chunk_matches = 7;
383
384  // repository_id is a Sourcegraph extension. This is the ID of Repository in
385  // Sourcegraph.
386  uint32 repository_id = 8;
387
388  double repository_priority = 9;
389
390  // Only set if requested
391  bytes content = 10;
392
393  // Checksum of the content.
394  bytes checksum = 11;
395
396  // Detected language of the result.
397  string language = 12;
398
399  // sub_repository_name is the globally unique name of the repo,
400  // if it came from a subrepository
401  string sub_repository_name = 13;
402
403  // sub_repository_path holds the prefix where the subrepository
404  // was mounted.
405  string sub_repository_path = 14;
406
407  // Commit SHA1 (hex) of the (sub)repo holding the file.
408  string version = 15;
409}
410
411message LineMatch {
412  bytes line = 1;
413  int64 line_start = 2;
414  int64 line_end = 3;
415  int64 line_number = 4;
416
417  // before and after are only set when SearchOptions.NumContextLines is > 0
418  bytes before = 5;
419  bytes after = 6;
420
421  // If set, this was a match on the filename.
422  bool file_name = 7;
423
424  // The higher the better. Only ranks the quality of the match
425  // within the file, does not take rank of file into account
426  double score = 8;
427  string debug_score = 9;
428
429  repeated LineFragmentMatch line_fragments = 10;
430}
431
432message LineFragmentMatch {
433  // Offset within the line, in bytes.
434  int64 line_offset = 1;
435
436  // Offset from file start, in bytes.
437  uint32 offset = 2;
438
439  // Number bytes that match.
440  int64 match_length = 3;
441
442  optional SymbolInfo symbol_info = 4;
443}
444
445message SymbolInfo {
446  string sym = 1;
447  string kind = 2;
448  string parent = 3;
449  string parent_kind = 4;
450}
451
452message ChunkMatch {
453  // A contiguous range of complete lines that fully contains Ranges.
454  bytes content = 1;
455  // The location (inclusive) of the beginning of content
456  // relative to the beginning of the file. It will always be at the
457  // beginning of a line (Column will always be 1).
458  Location content_start = 2;
459
460  // True if this match is a match on the file name, in
461  // which case Content will contain the file name.
462  bool file_name = 3;
463
464  // A set of matching ranges within this chunk. Each range is relative
465  // to the beginning of the file (not the beginning of Content).
466  repeated Range ranges = 4;
467
468  // The symbol information associated with Ranges. If it is non-nil,
469  // its length will equal that of Ranges. Any of its elements may be nil.
470  repeated SymbolInfo symbol_info = 5;
471
472  double score = 6;
473  string debug_score = 7;
474}
475
476message Range {
477  // The inclusive beginning of the range.
478  Location start = 1;
479  // The exclusive end of the range.
480  Location end = 2;
481}
482
483message Location {
484  // 0-based byte offset from the beginning of the file
485  uint32 byte_offset = 1;
486  // 1-based line number from the beginning of the file
487  uint32 line_number = 2;
488  // 1-based column number (in runes) from the beginning of line
489  uint32 column = 3;
490}
Configure Feed

Configure Feed