fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Command zoekt-index indexes a directory of files.
16package main
17
18import (
19 "flag"
20 "fmt"
21 "log"
22 "os"
23 "path/filepath"
24 "runtime/pprof"
25 "strings"
26
27 "go.uber.org/automaxprocs/maxprocs"
28
29 "github.com/sourcegraph/zoekt/cmd"
30 "github.com/sourcegraph/zoekt/index"
31)
32
33type fileInfo struct {
34 name string
35 size int64
36}
37
38type fileAggregator struct {
39 ignoreDirs map[string]struct{}
40 sizeMax int64
41 sink chan fileInfo
42}
43
44func (a *fileAggregator) add(path string, info os.FileInfo, err error) error {
45 if err != nil {
46 return err
47 }
48
49 if info.IsDir() {
50 base := filepath.Base(path)
51 if _, ok := a.ignoreDirs[base]; ok {
52 return filepath.SkipDir
53 }
54 }
55
56 if info.Mode().IsRegular() {
57 a.sink <- fileInfo{path, info.Size()}
58 }
59 return nil
60}
61
62func main() {
63 cpuProfile := flag.String("cpu_profile", "", "write cpu profile to file")
64 ignoreDirs := flag.String("ignore_dirs", ".git,.hg,.svn", "comma separated list of directories to ignore.")
65 flag.Parse()
66
67 if flag.NArg() == 0 {
68 fmt.Fprintf(flag.CommandLine.Output(), "USAGE: %s [options] PATHS...\n", filepath.Base(os.Args[0]))
69 fmt.Fprintln(flag.CommandLine.Output(), "Options:")
70 flag.PrintDefaults()
71 os.Exit(1)
72 }
73
74 // Tune GOMAXPROCS to match Linux container CPU quota.
75 _, _ = maxprocs.Set()
76
77 opts := cmd.OptionsFromFlags()
78 if *cpuProfile != "" {
79 f, err := os.Create(*cpuProfile)
80 if err != nil {
81 log.Fatal(err)
82 }
83 if err := pprof.StartCPUProfile(f); err != nil {
84 log.Fatal(err)
85 }
86 defer pprof.StopCPUProfile()
87 }
88
89 ignoreDirMap := map[string]struct{}{}
90 if *ignoreDirs != "" {
91 dirs := strings.Split(*ignoreDirs, ",")
92 for _, d := range dirs {
93 d = strings.TrimSpace(d)
94 if d != "" {
95 ignoreDirMap[d] = struct{}{}
96 }
97 }
98 }
99 for _, arg := range flag.Args() {
100 opts.RepositoryDescription.Source = arg
101 if err := indexArg(arg, *opts, ignoreDirMap); err != nil {
102 log.Fatal(err)
103 }
104 }
105}
106
107func indexArg(arg string, opts index.Options, ignore map[string]struct{}) error {
108 dir, err := filepath.Abs(filepath.Clean(arg))
109 if err != nil {
110 return err
111 }
112
113 opts.RepositoryDescription.Name = filepath.Base(dir)
114 builder, err := index.NewBuilder(opts)
115 if err != nil {
116 return err
117 }
118 // we don't need to check error, since we either already have an error, or
119 // we returning the first call to builder.Finish.
120 defer builder.Finish() // nolint:errcheck
121
122 comm := make(chan fileInfo, 100)
123 agg := fileAggregator{
124 ignoreDirs: ignore,
125 sink: comm,
126 sizeMax: int64(opts.SizeMax),
127 }
128
129 go func() {
130 if err := filepath.Walk(dir, agg.add); err != nil {
131 log.Fatal(err)
132 }
133 close(comm)
134 }()
135
136 for f := range comm {
137 displayName := strings.TrimPrefix(f.name, dir+"/")
138 if f.size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(displayName) {
139 if err := builder.Add(index.Document{
140 Name: displayName,
141 SkipReason: index.SkipReasonTooLarge,
142 }); err != nil {
143 return err
144 }
145 continue
146 }
147 content, err := os.ReadFile(f.name)
148 if err != nil {
149 return err
150 }
151
152 if err := builder.AddFile(displayName, content); err != nil {
153 return err
154 }
155 }
156
157 return builder.Finish()
158}