fork of https://github.com/sourcegraph/zoekt
1syntax = "proto3";
2
3package grpc.v1;
4
5import "google/protobuf/duration.proto";
6import "google/protobuf/timestamp.proto";
7import "query.proto";
8
9option go_package = "github.com/sourcegraph/zoekt/grpc/v1";
10
11service WebserverService {
12 rpc Search(SearchRequest) returns (SearchResponse) {}
13
14 rpc StreamSearch(SearchRequest) returns (stream SearchResponse) {}
15
16 // List lists repositories. The query `q` can only contain
17 // query.Repo atoms.
18 rpc List(ListRequest) returns (ListResponse) {}
19}
20
21message SearchRequest {
22 Q query = 1;
23 SearchOptions opts = 2;
24}
25
26message SearchResponse {
27 Stats stats = 1;
28 Progress progress = 2;
29
30 repeated FileMatch files = 3;
31
32 // RepoURLs holds a repo => template string map.
33 map<string, string> repo_urls = 4;
34
35 // FragmentNames holds a repo => template string map, for
36 // the line number fragment.
37 map<string, string> line_fragments = 5;
38}
39
40message SearchOptions {
41 // Return an upper-bound estimate of eligible documents in
42 // stats.ShardFilesConsidered.
43 bool estimate_doc_count = 1;
44
45 // Return the whole file.
46 bool whole = 2;
47
48 // Maximum number of matches: skip all processing an index
49 // shard after we found this many non-overlapping matches.
50 int64 shard_max_match_count = 3;
51
52 // Maximum number of matches: stop looking for more matches
53 // once we have this many matches across shards.
54 int64 total_max_match_count = 4;
55
56 // Maximum number of matches: skip processing documents for a repository in
57 // a shard once we have found ShardRepoMaxMatchCount.
58 //
59 // A compound shard may contain multiple repositories. This will most often
60 // be set to 1 to find all repositories containing a result.
61 int64 shard_repo_max_match_count = 5;
62
63 // Abort the search after this much time has passed.
64 google.protobuf.Duration max_wall_time = 6;
65
66 // FlushWallTime if non-zero will stop streaming behaviour at first and
67 // instead will collate and sort results. At FlushWallTime the results will
68 // be sent and then the behaviour will revert to the normal streaming.
69 google.protobuf.Duration flush_wall_time = 7;
70
71 // Trim the number of results after collating and sorting the
72 // results
73 int64 max_doc_display_count = 8;
74
75 // If set to a number greater than zero then up to this many number
76 // of context lines will be added before and after each matched line.
77 // Note that the included context lines might contain matches and
78 // it's up to the consumer of the result to remove those lines.
79 int64 num_context_lines = 9;
80
81 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches
82 // EXPERIMENTAL: the behavior of this flag may be changed in future versions.
83 bool chunk_matches = 10;
84
85 // EXPERIMENTAL. If true, document ranks are used as additional input for
86 // sorting matches.
87 bool use_document_ranks = 11;
88
89 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust
90 // their weight in the file match score. If the value is <= 0.0, the default weight value
91 // will be used. This option is temporary and is only exposed for testing/ tuning purposes.
92 double document_ranks_weight = 12;
93
94 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as
95 // a command-line flag
96 bool trace = 13;
97
98 // If set, the search results will contain debug information for scoring.
99 bool debug_score = 14;
100
101 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
102 // Currently, this treats each match in a file as a term and computes an approximation to BM25.
103 // When enabled, all other scoring signals are ignored, including document ranks.
104 bool use_keyword_scoring = 15;
105}
106
107message ListRequest {
108 Q query = 1;
109 ListOptions opts = 2;
110}
111
112message ListOptions {
113 enum RepoListField {
114 REPO_LIST_FIELD_UNKNOWN = 0;
115 REPO_LIST_FIELD_REPOS = 1;
116 REPO_LIST_FIELD_MINIMAL = 2;
117 REPO_LIST_FIELD_REPOS_MAP = 3;
118 }
119
120 // Field decides which field to populate in RepoList response.
121 RepoListField field = 1;
122
123 // Return only Minimal data per repo that Sourcegraph frontend needs.
124 //
125 // Deprecated: use Field
126 bool minimal = 16;
127}
128
129message ListResponse {
130 // Returned when ListOptions.Field is RepoListFieldRepos.
131 repeated RepoListEntry repos = 1;
132
133 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap.
134 map<uint32, MinimalRepoListEntry> repos_map = 2;
135
136 int64 crashes = 3;
137
138 // Stats response to a List request.
139 // This is the aggregate RepoStats of all repos matching the input query.
140 RepoStats stats = 4;
141
142 // Returned when ListOptions.Field is RepoListFieldMinimal.
143 //
144 // Deprecated: use ReposMap.
145 map<uint32, MinimalRepoListEntry> minimal = 5;
146}
147
148message RepoListEntry {
149 Repository repository = 1;
150 IndexMetadata index_metadata = 2;
151 RepoStats stats = 3;
152}
153
154message Repository {
155 // Sourcegraph's repository ID
156 uint32 id = 1;
157
158 // The repository name
159 string name = 2;
160
161 // The repository URL.
162 string url = 3;
163
164 // The physical source where this repo came from, eg. full
165 // path to the zip filename or git repository directory. This
166 // will not be exposed in the UI, but can be used to detect
167 // orphaned index shards.
168 string source = 4;
169
170 // The branches indexed in this repo.
171 repeated RepositoryBranch branches = 5;
172
173 // Nil if this is not the super project.
174 map<string, Repository> sub_repo_map = 6;
175
176 // URL template to link to the commit of a branch
177 string commit_url_template = 7;
178
179 // The repository URL for getting to a file. Has access to
180 // {{Branch}}, {{Path}}
181 string file_url_template = 8;
182
183 // The URL fragment to add to a file URL for line numbers. has
184 // access to {{LineNumber}}. The fragment should include the
185 // separator, generally '#' or ';'.
186 string line_fragment_template = 9;
187
188 // Perf optimization: priority is set when we load the shard. It corresponds to
189 // the value of "priority" stored in RawConfig.
190 double priority = 10;
191
192 // All zoekt.* configuration settings.
193 map<string, string> raw_config = 11;
194
195 // Importance of the repository, bigger is more important
196 uint32 rank = 12;
197
198 // index_options is a hash of the options used to create the index for the
199 // repo.
200 string index_options = 13;
201
202 // has_symbols is true if this repository has indexed ctags
203 // output. Sourcegraph specific: This field is more appropriate for
204 // IndexMetadata. However, we store it here since the Sourcegraph frontend
205 // can read this structure but not IndexMetadata.
206 bool has_symbols = 14;
207
208 // tombstone is true if we are not allowed to search this repo.
209 bool tombstone = 15;
210
211 // latest_commit_date is the date of the latest commit among all indexed Branches.
212 // The date might be time.Time's 0-value if the repository was last indexed
213 // before this field was added.
214 google.protobuf.Timestamp latest_commit_date = 16;
215
216 // file_tombstones is a set of file paths that should be ignored across all branches
217 // in this shard.
218 repeated string FileTombstones = 17;
219}
220
221message IndexMetadata {
222 int64 index_format_version = 1;
223 int64 index_feature_version = 2;
224 int64 index_min_reader_version = 3;
225 google.protobuf.Timestamp index_time = 4;
226 bool plain_ascii = 5;
227 map<string, uint32> language_map = 6;
228 string zoekt_version = 7;
229 string id = 8;
230}
231
232message MinimalRepoListEntry {
233 bool has_symbols = 1;
234 repeated RepositoryBranch branches = 2;
235}
236
237// RepositoryBranch describes an indexed branch, which is a name
238// combined with a version.
239message RepositoryBranch {
240 string name = 1;
241 string version = 2;
242}
243
244// RepoStats is a collection of statistics for a set of repositories.
245message RepoStats {
246 // repos is used for aggregrating the number of repositories.
247 int64 repos = 1;
248
249 // shards is the total number of search shards.
250 int64 shards = 2;
251
252 // documents holds the number of documents or files.
253 int64 documents = 3;
254
255 // index_bytes is the amount of RAM used for index overhead.
256 int64 index_bytes = 4;
257
258 // content_bytes is the amount of RAM used for raw content.
259 int64 content_bytes = 5;
260
261 // Sourcegraph specific stats below. These are not as efficient to calculate
262 // as the above statistics. We experimentally measured about a 10% slower
263 // shard load time. However, we find these values very useful to track and
264 // computing them outside of load time introduces a lot of complexity.
265
266 // new_lines_count is the number of newlines "\n" that appear in the zoekt
267 // indexed documents. This is not exactly the same as line count, since it
268 // will not include lines not terminated by "\n" (eg a file with no "\n", or
269 // a final line without "\n"). Note: Zoekt deduplicates documents across
270 // branches, so if a path has the same contents on multiple branches, there
271 // is only one document for it. As such that document's newlines is only
272 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount
273 // for counts which do not deduplicate.
274 uint64 new_lines_count = 6;
275
276 // default_branch_new_lines_count is the number of newlines "\n" in the default
277 // branch.
278 uint64 default_branch_new_lines_count = 7;
279
280 // other_branches_new_lines_count is the number of newlines "\n" in all branches
281 // except the default branch.
282 uint64 other_branches_new_lines_count = 8;
283}
284
285message Stats {
286 // Amount of I/O for reading contents.
287 int64 content_bytes_loaded = 1;
288
289 // Amount of I/O for reading from index.
290 int64 index_bytes_loaded = 2;
291
292 // Number of search shards that had a crash.
293 int64 crashes = 3;
294
295 // Wall clock time for this search
296 google.protobuf.Duration duration = 4;
297
298 // Number of files containing a match.
299 int64 file_count = 5;
300
301 // Number of files in shards that we considered.
302 int64 shard_files_considered = 6;
303
304 // Files that we evaluated. Equivalent to files for which all
305 // atom matches (including negations) evaluated to true.
306 int64 files_considered = 7;
307
308 // Files for which we loaded file content to verify substring matches
309 int64 files_loaded = 8;
310
311 // Candidate files whose contents weren't examined because we
312 // gathered enough matches.
313 int64 files_skipped = 9;
314
315 // Shards that we scanned to find matches.
316 int64 shards_scanned = 10;
317
318 // Shards that we did not process because a query was canceled.
319 int64 shards_skipped = 11;
320
321 // Shards that we did not process because the query was rejected by the
322 // ngram filter indicating it had no matches.
323 int64 shards_skipped_filter = 12;
324
325 // Number of non-overlapping matches
326 int64 match_count = 13;
327
328 // Number of candidate matches as a result of searching ngrams.
329 int64 ngram_matches = 14;
330
331 // Wall clock time for queued search.
332 google.protobuf.Duration wait = 15;
333
334 // Number of times regexp was called on files that we evaluated.
335 int64 regexps_considered = 16;
336
337 // FlushReason explains why results were flushed.
338 FlushReason flush_reason = 17;
339}
340
341enum FlushReason {
342 UNKNOWN = 0;
343 TIMER_EXPIRED = 1;
344 FINAL_FLUSH = 2;
345 MAX_SIZE = 3;
346}
347
348// Progress contains information about the global progress of the running search query.
349// This is used by the frontend to reorder results and emit them when stable.
350// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances.
351message Progress {
352 // Priority of the shard that was searched.
353 double priority = 1;
354
355 // max_pending_priority is the maximum priority of pending result that is being searched in parallel.
356 // This is used to reorder results when the result set is known to be stable-- that is, when a result's
357 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user.
358 //
359 // max_pending_priority decreases monotonically in each SearchResult.
360 double max_pending_priority = 2;
361}
362
363// FileMatch contains all the matches within a file.
364message FileMatch {
365 // Ranking; the higher, the better.
366 double score = 1;
367
368 // For debugging. Needs DebugScore set, but public so tests in
369 // other packages can print some diagnostics.
370 string debug = 2;
371
372 string file_name = 3;
373
374 // Repository is the globally unique name of the repo of the
375 // match
376 string repository = 4;
377 repeated string branches = 5;
378
379 // One of line_matches or chunk_matches will be returned depending on whether
380 // the SearchOptions.ChunkMatches is set.
381 repeated LineMatch line_matches = 6;
382 repeated ChunkMatch chunk_matches = 7;
383
384 // repository_id is a Sourcegraph extension. This is the ID of Repository in
385 // Sourcegraph.
386 uint32 repository_id = 8;
387
388 double repository_priority = 9;
389
390 // Only set if requested
391 bytes content = 10;
392
393 // Checksum of the content.
394 bytes checksum = 11;
395
396 // Detected language of the result.
397 string language = 12;
398
399 // sub_repository_name is the globally unique name of the repo,
400 // if it came from a subrepository
401 string sub_repository_name = 13;
402
403 // sub_repository_path holds the prefix where the subrepository
404 // was mounted.
405 string sub_repository_path = 14;
406
407 // Commit SHA1 (hex) of the (sub)repo holding the file.
408 string version = 15;
409}
410
411message LineMatch {
412 bytes line = 1;
413 int64 line_start = 2;
414 int64 line_end = 3;
415 int64 line_number = 4;
416
417 // before and after are only set when SearchOptions.NumContextLines is > 0
418 bytes before = 5;
419 bytes after = 6;
420
421 // If set, this was a match on the filename.
422 bool file_name = 7;
423
424 // The higher the better. Only ranks the quality of the match
425 // within the file, does not take rank of file into account
426 double score = 8;
427 string debug_score = 9;
428
429 repeated LineFragmentMatch line_fragments = 10;
430}
431
432message LineFragmentMatch {
433 // Offset within the line, in bytes.
434 int64 line_offset = 1;
435
436 // Offset from file start, in bytes.
437 uint32 offset = 2;
438
439 // Number bytes that match.
440 int64 match_length = 3;
441
442 optional SymbolInfo symbol_info = 4;
443}
444
445message SymbolInfo {
446 string sym = 1;
447 string kind = 2;
448 string parent = 3;
449 string parent_kind = 4;
450}
451
452message ChunkMatch {
453 // A contiguous range of complete lines that fully contains Ranges.
454 bytes content = 1;
455 // The location (inclusive) of the beginning of content
456 // relative to the beginning of the file. It will always be at the
457 // beginning of a line (Column will always be 1).
458 Location content_start = 2;
459
460 // True if this match is a match on the file name, in
461 // which case Content will contain the file name.
462 bool file_name = 3;
463
464 // A set of matching ranges within this chunk. Each range is relative
465 // to the beginning of the file (not the beginning of Content).
466 repeated Range ranges = 4;
467
468 // The symbol information associated with Ranges. If it is non-nil,
469 // its length will equal that of Ranges. Any of its elements may be nil.
470 repeated SymbolInfo symbol_info = 5;
471
472 double score = 6;
473 string debug_score = 7;
474}
475
476message Range {
477 // The inclusive beginning of the range.
478 Location start = 1;
479 // The exclusive end of the range.
480 Location end = 2;
481}
482
483message Location {
484 // 0-based byte offset from the beginning of the file
485 uint32 byte_offset = 1;
486 // 1-based line number from the beginning of the file
487 uint32 line_number = 2;
488 // 1-based column number (in runes) from the beginning of line
489 uint32 column = 3;
490}