fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Command zoekt-index indexes a directory of files.
16package main
17
18import (
19 "flag"
20 "fmt"
21 "log"
22 "os"
23 "path/filepath"
24 "runtime/pprof"
25 "strings"
26
27 "github.com/sourcegraph/zoekt/cmd"
28 "github.com/sourcegraph/zoekt/index"
29 "go.uber.org/automaxprocs/maxprocs"
30)
31
32type fileInfo struct {
33 name string
34 size int64
35}
36
37type fileAggregator struct {
38 ignoreDirs map[string]struct{}
39 sizeMax int64
40 sink chan fileInfo
41}
42
43func (a *fileAggregator) add(path string, info os.FileInfo, err error) error {
44 if err != nil {
45 return err
46 }
47
48 if info.IsDir() {
49 base := filepath.Base(path)
50 if _, ok := a.ignoreDirs[base]; ok {
51 return filepath.SkipDir
52 }
53 }
54
55 if info.Mode().IsRegular() {
56 a.sink <- fileInfo{path, info.Size()}
57 }
58 return nil
59}
60
61func main() {
62 cpuProfile := flag.String("cpu_profile", "", "write cpu profile to file")
63 ignoreDirs := flag.String("ignore_dirs", ".git,.hg,.svn", "comma separated list of directories to ignore.")
64 flag.Parse()
65
66 if flag.NArg() == 0 {
67 fmt.Fprintf(flag.CommandLine.Output(), "USAGE: %s [options] PATHS...\n", filepath.Base(os.Args[0]))
68 fmt.Fprintln(flag.CommandLine.Output(), "Options:")
69 flag.PrintDefaults()
70 os.Exit(1)
71 }
72
73 // Tune GOMAXPROCS to match Linux container CPU quota.
74 _, _ = maxprocs.Set()
75
76 opts := cmd.OptionsFromFlags()
77 if *cpuProfile != "" {
78 f, err := os.Create(*cpuProfile)
79 if err != nil {
80 log.Fatal(err)
81 }
82 if err := pprof.StartCPUProfile(f); err != nil {
83 log.Fatal(err)
84 }
85 defer pprof.StopCPUProfile()
86 }
87
88 ignoreDirMap := map[string]struct{}{}
89 if *ignoreDirs != "" {
90 dirs := strings.Split(*ignoreDirs, ",")
91 for _, d := range dirs {
92 d = strings.TrimSpace(d)
93 if d != "" {
94 ignoreDirMap[d] = struct{}{}
95 }
96 }
97 }
98 for _, arg := range flag.Args() {
99 opts.RepositoryDescription.Source = arg
100 if err := indexArg(arg, *opts, ignoreDirMap); err != nil {
101 log.Fatal(err)
102 }
103 }
104}
105
106func indexArg(arg string, opts index.Options, ignore map[string]struct{}) error {
107 dir, err := filepath.Abs(filepath.Clean(arg))
108 if err != nil {
109 return err
110 }
111
112 opts.RepositoryDescription.Name = filepath.Base(dir)
113 builder, err := index.NewBuilder(opts)
114 if err != nil {
115 return err
116 }
117 // we don't need to check error, since we either already have an error, or
118 // we returning the first call to builder.Finish.
119 defer builder.Finish() // nolint:errcheck
120
121 comm := make(chan fileInfo, 100)
122 agg := fileAggregator{
123 ignoreDirs: ignore,
124 sink: comm,
125 sizeMax: int64(opts.SizeMax),
126 }
127
128 go func() {
129 if err := filepath.Walk(dir, agg.add); err != nil {
130 log.Fatal(err)
131 }
132 close(comm)
133 }()
134
135 for f := range comm {
136 displayName := strings.TrimPrefix(f.name, dir+"/")
137 if f.size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(displayName) {
138 if err := builder.Add(index.Document{
139 Name: displayName,
140 SkipReason: index.SkipReasonTooLarge,
141 }); err != nil {
142 return err
143 }
144 continue
145 }
146 content, err := os.ReadFile(f.name)
147 if err != nil {
148 return err
149 }
150
151 if err := builder.AddFile(displayName, content); err != nil {
152 return err
153 }
154 }
155
156 return builder.Finish()
157}