fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// zoekt-test compares the search engine results with raw substring search
16package main
17
18import (
19 "bufio"
20 "bytes"
21 "context"
22 "flag"
23 "fmt"
24 "log"
25 "os"
26 "path/filepath"
27 "reflect"
28 "runtime"
29 "runtime/pprof"
30 "sort"
31 "strings"
32 "time"
33
34 "github.com/sourcegraph/zoekt"
35 "github.com/sourcegraph/zoekt/build"
36 "github.com/sourcegraph/zoekt/query"
37 "github.com/sourcegraph/zoekt/shards"
38)
39
40func readTree(dir string) (map[string][]byte, error) {
41 var fns []string
42
43 add := func(path string, info os.FileInfo, err error) error {
44 if !info.Mode().IsRegular() {
45 return nil
46 }
47
48 fns = append(fns, path)
49 return nil
50 }
51 if err := filepath.Walk(dir, add); err != nil {
52 return nil, err
53 }
54
55 res := map[string][]byte{}
56 for _, n := range fns {
57 c, err := os.ReadFile(n)
58 if err != nil {
59 return nil, err
60 }
61
62 strip := strings.TrimPrefix(n, dir+"/")
63 res[strip] = c
64 }
65 return res, nil
66}
67
68func compare(dir, patfile string, caseSensitive bool) error {
69 indexDir, err := os.MkdirTemp("", "")
70 if err != nil {
71 return err
72 }
73 defer os.RemoveAll(indexDir)
74
75 var opts build.Options
76 opts.SetDefaults()
77 opts.IndexDir = indexDir
78
79 fileContents, err := readTree(dir)
80 if err != nil {
81 return err
82 }
83 if len(fileContents) == 0 {
84 return fmt.Errorf("no contents")
85 }
86
87 builder, err := build.NewBuilder(opts)
88 if err != nil {
89 return err
90 }
91 for k, v := range fileContents {
92 if err := builder.AddFile(k, v); err != nil {
93 return err
94 }
95 }
96 if err := builder.Finish(); err != nil {
97 return err
98 }
99
100 if !caseSensitive {
101 for k, v := range fileContents {
102 fileContents[k] = toLower(v)
103 }
104 }
105
106 f, err := os.Open(patfile)
107 if err != nil {
108 return err
109 }
110 searcher, err := shards.NewDirectorySearcher(indexDir)
111 if err != nil {
112 return err
113 }
114
115 scanner := bufio.NewScanner(f)
116 for scanner.Scan() {
117 t := scanner.Text()
118 if len(t) < 3 {
119 continue
120 }
121 q := &query.Substring{
122 Pattern: t,
123 CaseSensitive: caseSensitive,
124 }
125
126 zFiles := map[string]struct{}{}
127 rFiles := map[string]struct{}{}
128
129 // search engine results
130 var opts zoekt.SearchOptions
131 res, err := searcher.Search(context.Background(), q, &opts)
132 if err != nil {
133 return err
134 }
135
136 for _, f := range res.Files {
137 zFiles[f.FileName] = struct{}{}
138 }
139
140 // raw search
141 needle := []byte(t)
142 if !caseSensitive {
143 needle = toLower(needle)
144 }
145
146 for k, v := range fileContents {
147 if bytes.Contains(v, needle) {
148 rFiles[k] = struct{}{}
149 }
150 }
151
152 if !reflect.DeepEqual(zFiles, rFiles) {
153 var add, del []string
154 for k := range zFiles {
155 if _, ok := rFiles[k]; !ok {
156 del = append(del, k)
157 }
158 }
159 for k := range rFiles {
160 if _, ok := zFiles[k]; !ok {
161 add = append(add, k)
162 }
163 }
164 sort.Strings(add)
165 sort.Strings(del)
166 log.Printf("pattern %q, add %v, del %v", t, add, del)
167 }
168 }
169 return nil
170}
171
172var (
173 memprofile = flag.String("memprofile", "", "write memory profile to `file`")
174 cpuprofile = flag.String("cpuprofile", "", "write memory profile to `file`")
175)
176
177func testLoadIndexDir(indexDir string) {
178 var a, b runtime.MemStats
179 runtime.GC()
180 runtime.ReadMemStats(&a)
181 start := time.Now()
182 s, err := shards.NewDirectorySearcher(indexDir)
183 if err != nil {
184 return
185 }
186 duration := time.Since(start)
187 runtime.GC()
188 runtime.ReadMemStats(&b)
189 log.Printf("%s loaded in %d ms, additional memory consumption: %d MiB", s.String(), duration.Milliseconds(), (b.Alloc-a.Alloc)/1024/1024)
190
191 if *memprofile != "" {
192 f, err := os.Create(*memprofile)
193 if err != nil {
194 log.Fatal("could not create memory profile: ", err)
195 }
196 defer f.Close() // error handling omitted for example
197 runtime.GC() // get up-to-date statistics
198 if err := pprof.WriteHeapProfile(f); err != nil {
199 log.Fatal("could not write memory profile: ", err)
200 }
201 }
202}
203
204func main() {
205 repo := flag.String("repo", "", "repository to search")
206 indexDir := flag.String("indexDir", "", "indexDir to load and exit")
207 caseSensitive := flag.Bool("case", false, "case sensitive")
208
209 flag.Parse()
210
211 if *cpuprofile != "" {
212 f, err := os.Create(*cpuprofile)
213 if err != nil {
214 log.Fatal("could not create CPU profile: ", err)
215 }
216 defer f.Close() // error handling omitted for example
217 if err := pprof.StartCPUProfile(f); err != nil {
218 log.Fatal("could not start CPU profile: ", err)
219 }
220 defer pprof.StopCPUProfile()
221 }
222
223 if *indexDir != "" {
224 testLoadIndexDir(*indexDir)
225 return
226 }
227 if len(flag.Args()) == 0 {
228 fmt.Fprintf(os.Stderr, "pattern file is missing.\n")
229 flag.Usage()
230 os.Exit(2)
231 }
232 input := flag.Arg(0)
233
234 if err := compare(*repo, input, *caseSensitive); err != nil {
235 log.Fatal(err)
236 }
237}
238
239func toLower(in []byte) []byte {
240 out := make([]byte, len(in))
241 for i, c := range in {
242 if c >= 'A' && c <= 'Z' {
243 c = c - 'A' + 'a'
244 }
245 out[i] = c
246 }
247 return out
248}