fork of https://github.com/sourcegraph/zoekt
1package index
2
3import (
4 "errors"
5
6 "github.com/go-enry/go-enry/v2"
7)
8
9// FileCategory represents the category of a file, as determined by go-enry. It is non-exhaustive
10// but tries to the major cases like whether the file is a test, generated, etc.
11//
12// A file's category is used in search scoring to determine the weight of a file match.
13type FileCategory byte
14
15const (
16 // FileCategoryMissing is a sentinel value that indicates we never computed the file category during indexing
17 // (which means we're reading from an old index version). This value can never be written to the index.
18 FileCategoryMissing FileCategory = iota
19 FileCategoryDefault
20 FileCategoryTest
21 FileCategoryVendored
22 FileCategoryGenerated
23 FileCategoryConfig
24 FileCategoryDotFile
25 FileCategoryDocumentation
26)
27
28func DetermineFileCategory(doc *Document) {
29 name := doc.Name
30 content := doc.Content
31
32 // If this document has been skipped, it's likely very large. In this case, we just guess the category based
33 // on the filename to avoid examining the contents. Note: passing nil content is allowed by the go-enry contract.
34 if doc.SkipReason != "" {
35 content = nil
36 }
37
38 category := FileCategoryDefault
39 if enry.IsTest(name) {
40 category = FileCategoryTest
41 } else if enry.IsDotFile(name) {
42 category = FileCategoryDotFile
43 } else if enry.IsVendor(name) {
44 category = FileCategoryVendored
45 } else if enry.IsGenerated(name, content) {
46 category = FileCategoryGenerated
47 } else if enry.IsConfiguration(name) {
48 category = FileCategoryConfig
49 } else if enry.IsDocumentation(name) {
50 category = FileCategoryDocumentation
51 }
52
53 doc.Category = category
54}
55
56// lowPriority returns true if this file category is considered 'low priority'. This is used
57// in search scoring to down-weight matches in these files.
58func (c FileCategory) lowPriority() bool {
59 return c == FileCategoryTest || c == FileCategoryVendored || c == FileCategoryGenerated
60}
61
62func (c FileCategory) encode() (byte, error) {
63 switch c {
64 case FileCategoryMissing:
65 return 0, errors.New("cannot encode missing file category")
66 case FileCategoryDefault:
67 return 1, nil
68 case FileCategoryTest:
69 return 2, nil
70 case FileCategoryVendored:
71 return 3, nil
72 case FileCategoryGenerated:
73 return 4, nil
74 case FileCategoryConfig:
75 return 5, nil
76 case FileCategoryDotFile:
77 return 6, nil
78 case FileCategoryDocumentation:
79 return 7, nil
80 default:
81 return 0, errors.New("unrecognized file category")
82 }
83}
84
85func decodeCategory(c byte) (FileCategory, error) {
86 switch c {
87 case 1:
88 return FileCategoryDefault, nil
89 case 2:
90 return FileCategoryTest, nil
91 case 3:
92 return FileCategoryVendored, nil
93 case 4:
94 return FileCategoryGenerated, nil
95 case 5:
96 return FileCategoryConfig, nil
97 case 6:
98 return FileCategoryDotFile, nil
99 case 7:
100 return FileCategoryDocumentation, nil
101 default:
102 return FileCategoryMissing, errors.New("unrecognized file category")
103 }
104}