···11+// Copyright 2016 Google Inc. All rights reserved.
22+//
33+// Licensed under the Apache License, Version 2.0 (the "License");
44+// you may not use this file except in compliance with the License.
55+// You may obtain a copy of the License at
66+//
77+// http://www.apache.org/licenses/LICENSE-2.0
88+//
99+// Unless required by applicable law or agreed to in writing, software
1010+// distributed under the License is distributed on an "AS IS" BASIS,
1111+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212+// See the License for the specific language governing permissions and
1313+// limitations under the License.
1414+1515+package main
1616+1717+import (
1818+ "bytes"
1919+ "flag"
2020+ "fmt"
2121+ "io/ioutil"
2222+ "log"
2323+ "os"
2424+ "path/filepath"
2525+ "runtime/pprof"
2626+ "sync"
2727+2828+ "github.com/hanwen/codesearch"
2929+)
3030+3131+type fileAggregator struct {
3232+ chunks chan<- []string
3333+ files []string
3434+ total int64
3535+ shardMax int64
3636+ sizeMax int64
3737+}
3838+3939+func (a *fileAggregator) flush() {
4040+ a.chunks <- a.files
4141+ a.files = nil
4242+ close(a.chunks)
4343+}
4444+4545+func (a *fileAggregator) add(path string, info os.FileInfo, err error) error {
4646+ sz := info.Size()
4747+ if sz > a.sizeMax || !info.Mode().IsRegular() {
4848+ return nil
4949+ }
5050+5151+ a.files = append(a.files, path)
5252+ a.total += sz
5353+5454+ if a.total > a.shardMax {
5555+ a.chunks <- a.files
5656+ a.files = nil
5757+ a.total = 0
5858+ }
5959+ return nil
6060+}
6161+6262+func main() {
6363+ var cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file")
6464+ var sizeMax = flag.Int("file_limit", 128*1024, "maximum file size")
6565+ var shardLimit = flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard")
6666+ var parallelism = flag.Int("parallelism", 4, "maximum number of parallel indexing processes.")
6767+6868+ index := flag.String("index", ".csindex.%05d", "index file to use")
6969+7070+ flag.Parse()
7171+7272+ if *cpuProfile != "" {
7373+ f, err := os.Create(*cpuProfile)
7474+ if err != nil {
7575+ log.Fatal(err)
7676+ }
7777+ pprof.StartCPUProfile(f)
7878+ defer pprof.StopCPUProfile()
7979+ }
8080+8181+ chunks := make(chan []string, 10)
8282+ agg := fileAggregator{
8383+ chunks: chunks,
8484+ sizeMax: int64(*sizeMax),
8585+ shardMax: int64(*shardLimit),
8686+ }
8787+8888+ shardNum := 0
8989+ go func() {
9090+ for _, a := range flag.Args() {
9191+ if err := filepath.Walk(a, agg.add); err != nil {
9292+ log.Fatal(err)
9393+ }
9494+ }
9595+ agg.flush()
9696+ }()
9797+9898+ var wg sync.WaitGroup
9999+ errors := make(chan error, 10)
100100+ throttle := make(chan int, *parallelism)
101101+102102+ for names := range chunks {
103103+ fn := fmt.Sprintf(*index, shardNum)
104104+ shardNum++
105105+ wg.Add(1)
106106+ go func(nm []string) {
107107+ throttle <- 1
108108+ errors <- buildShard(fn, nm)
109109+ <-throttle
110110+ wg.Done()
111111+ }(names)
112112+ }
113113+114114+ go func() {
115115+ wg.Wait()
116116+ close(errors)
117117+ }()
118118+119119+ for err := range errors {
120120+ if err != nil {
121121+ log.Fatal(err)
122122+ }
123123+ }
124124+}
125125+126126+func buildShard(shardName string, files []string) error {
127127+ f, err := os.OpenFile(
128128+ shardName, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0600)
129129+ if err != nil {
130130+ return err
131131+ }
132132+133133+ b := codesearch.NewIndexBuilder()
134134+ total := 0
135135+ for _, a := range files {
136136+ c, err := ioutil.ReadFile(a)
137137+ if bytes.IndexByte(c, 0) != -1 {
138138+ // skip binary
139139+ continue
140140+ }
141141+ total += len(c)
142142+ if err != nil {
143143+ log.Println(err)
144144+ } else {
145145+ b.AddFile(a, c)
146146+ }
147147+ }
148148+149149+ if err := b.Write(f); err != nil {
150150+ log.Println("Write", err)
151151+ }
152152+ if err := f.Close(); err != nil {
153153+ log.Println("Write", err)
154154+ }
155155+ log.Printf("%s: indexed %d bytes\n", shardName, total)
156156+157157+ return nil
158158+}
+62
cmd/search/main.go
···11+// Copyright 2016 Google Inc. All rights reserved.
22+//
33+// Licensed under the Apache License, Version 2.0 (the "License");
44+// you may not use this file except in compliance with the License.
55+// You may obtain a copy of the License at
66+//
77+// http://www.apache.org/licenses/LICENSE-2.0
88+//
99+// Unless required by applicable law or agreed to in writing, software
1010+// distributed under the License is distributed on an "AS IS" BASIS,
1111+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212+// See the License for the specific language governing permissions and
1313+// limitations under the License.
1414+1515+package main
1616+1717+import (
1818+ "flag"
1919+ "fmt"
2020+ "log"
2121+2222+ "github.com/hanwen/codesearch"
2323+)
2424+2525+// go1.4
2626+func lastIndex(b string, c byte) int {
2727+ for i := len(b) - 1; i >= 0; i-- {
2828+ if b[i] == c {
2929+ return i
3030+ }
3131+ }
3232+ return -1
3333+}
3434+3535+const CONTEXT = 20
3636+3737+func displayMatches(matches []codesearch.Match, pat string) {
3838+ for _, m := range matches {
3939+ fmt.Printf("%s:%d:%s\n", m.Name, m.LineNum, m.Line)
4040+ }
4141+}
4242+4343+func main() {
4444+ index := flag.String("index", ".csindex.*", "index file glob to use")
4545+ flag.Parse()
4646+4747+ searcher, err := codesearch.NewShardedSearcher(*index)
4848+ if err != nil {
4949+ log.Fatal(err)
5050+ }
5151+5252+ if len(flag.Args()) == 0 {
5353+ log.Fatal("needs argument")
5454+ }
5555+ pat := flag.Arg(0)
5656+ ms, err := searcher.Search(pat)
5757+ if err != nil {
5858+ log.Fatal(err)
5959+ }
6060+6161+ displayMatches(ms, pat)
6262+}