fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Command zoekt-index indexes a directory of files.
16package main
17
18import (
19 "encoding/json"
20 "flag"
21 "fmt"
22 "log"
23 "os"
24 "path/filepath"
25 "runtime/pprof"
26 "strings"
27
28 "go.uber.org/automaxprocs/maxprocs"
29
30 "github.com/sourcegraph/zoekt/cmd"
31 "github.com/sourcegraph/zoekt/index"
32)
33
34type fileInfo struct {
35 name string
36 size int64
37}
38
39type fileAggregator struct {
40 ignoreDirs map[string]struct{}
41 sizeMax int64
42 sink chan fileInfo
43}
44
45func (a *fileAggregator) add(path string, info os.FileInfo, err error) error {
46 if err != nil {
47 return err
48 }
49
50 if info.IsDir() {
51 base := filepath.Base(path)
52 if _, ok := a.ignoreDirs[base]; ok {
53 return filepath.SkipDir
54 }
55 }
56
57 if info.Mode().IsRegular() {
58 a.sink <- fileInfo{path, info.Size()}
59 }
60 return nil
61}
62
63func main() {
64 cpuProfile := flag.String("cpu_profile", "", "write cpu profile to file")
65 ignoreDirs := flag.String("ignore_dirs", ".git,.hg,.svn", "comma separated list of directories to ignore.")
66 metaFile := flag.String("meta", "", "path to .meta JSON file with repository description")
67 flag.Parse()
68
69 if flag.NArg() == 0 {
70 fmt.Fprintf(flag.CommandLine.Output(), "USAGE: %s [options] PATHS...\n", filepath.Base(os.Args[0]))
71 fmt.Fprintln(flag.CommandLine.Output(), "Options:")
72 flag.PrintDefaults()
73 os.Exit(1)
74 }
75
76 // Tune GOMAXPROCS to match Linux container CPU quota.
77 _, _ = maxprocs.Set()
78
79 opts := cmd.OptionsFromFlags()
80 if *cpuProfile != "" {
81 f, err := os.Create(*cpuProfile)
82 if err != nil {
83 log.Fatal(err)
84 }
85 if err := pprof.StartCPUProfile(f); err != nil {
86 log.Fatal(err)
87 }
88 defer pprof.StopCPUProfile()
89 }
90
91 ignoreDirMap := map[string]struct{}{}
92 if *ignoreDirs != "" {
93 dirs := strings.Split(*ignoreDirs, ",")
94 for _, d := range dirs {
95 d = strings.TrimSpace(d)
96 if d != "" {
97 ignoreDirMap[d] = struct{}{}
98 }
99 }
100 }
101
102 if *metaFile != "" {
103 // Read and parse the .meta JSON file into opts.RepositoryDescription
104 data, err := os.ReadFile(*metaFile)
105 if err != nil {
106 log.Fatalf("failed to read .meta file %s: %v", *metaFile, err)
107 }
108 if err := json.Unmarshal(data, &opts.RepositoryDescription); err != nil {
109 log.Fatalf("failed to decode .meta file %s: %v", *metaFile, err)
110 }
111 }
112
113 for _, arg := range flag.Args() {
114 opts.RepositoryDescription.Source = arg
115 if err := indexArg(arg, *opts, ignoreDirMap); err != nil {
116 log.Fatal(err)
117 }
118 }
119}
120
121func indexArg(arg string, opts index.Options, ignore map[string]struct{}) error {
122 dir, err := filepath.Abs(filepath.Clean(arg))
123 if err != nil {
124 return err
125 }
126
127 opts.RepositoryDescription.Name = filepath.Base(dir)
128 builder, err := index.NewBuilder(opts)
129 if err != nil {
130 return err
131 }
132 // we don't need to check error, since we either already have an error, or
133 // we returning the first call to builder.Finish.
134 defer builder.Finish() // nolint:errcheck
135
136 comm := make(chan fileInfo, 100)
137 agg := fileAggregator{
138 ignoreDirs: ignore,
139 sink: comm,
140 sizeMax: int64(opts.SizeMax),
141 }
142
143 go func() {
144 if err := filepath.Walk(dir, agg.add); err != nil {
145 log.Fatal(err)
146 }
147 close(comm)
148 }()
149
150 for f := range comm {
151 displayName := strings.TrimPrefix(f.name, dir+"/")
152 if f.size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(displayName) {
153 if err := builder.Add(index.Document{
154 Name: displayName,
155 SkipReason: index.SkipReasonTooLarge,
156 }); err != nil {
157 return err
158 }
159 continue
160 }
161 content, err := os.ReadFile(f.name)
162 if err != nil {
163 return err
164 }
165
166 if err := builder.AddFile(displayName, content); err != nil {
167 return err
168 }
169 }
170
171 return builder.Finish()
172}