fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Debug: write memory profile if heap exceeds threshold (#819)

This PR adds adds a debugging flag to periodically check memory usage against a
threshold. If it exceeds the threshold, then a memory profile like
`indexmemory.prof.1` is written to disk. No more than 10 profiles will be
written.

I've already found this more useful than the existing `-memprofile` flag, so I
removed that. It's hard to get insights using that flag, since it only takes a
single profile per shard, forces GC, and forces parallelism to 1.

+71 -34
+48 -28
build/builder.go
··· 37 37 "time" 38 38 39 39 "github.com/bmatcuk/doublestar" 40 + "github.com/dustin/go-humanize" 40 41 "github.com/go-enry/go-enry/v2" 41 42 "github.com/rs/xid" 42 43 ··· 88 89 // If set, ctags must succeed. 89 90 CTagsMustSucceed bool 90 91 91 - // Write memory profiles to this file. 92 - MemProfile string 93 - 94 92 // LargeFiles is a slice of glob patterns, including ** for any number 95 93 // of directories, where matching file paths should be indexed 96 94 // regardless of their size. The full pattern syntax is here: ··· 120 118 // ShardMerging is true if builder should respect compound shards. This is a 121 119 // Sourcegraph specific option. 122 120 ShardMerging bool 121 + 122 + // HeapProfileTriggerBytes is the heap usage in bytes that will trigger a memory profile. If 0, no memory profile will be triggered. 123 + // Profiles will be written to files named `index-memory.prof.n` in the index directory. No more than 10 files are written. 124 + // 125 + // Note: heap checking is "best effort", and it's possible for the process to OOM without triggering the heap profile. 126 + HeapProfileTriggerBytes uint64 123 127 } 124 128 125 129 // HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building. ··· 194 198 fs.StringVar(&o.IndexDir, "index", x.IndexDir, "directory for search indices") 195 199 fs.BoolVar(&o.CTagsMustSucceed, "require_ctags", x.CTagsMustSucceed, "If set, ctags calls must succeed.") 196 200 fs.Var(largeFilesFlag{o}, "large_file", "A glob pattern where matching files are to be index regardless of their size. You can add multiple patterns by setting this more than once.") 197 - fs.StringVar(&o.MemProfile, "memprofile", "", "write memory profile(s) to `file.shardnum`. Note: sets parallelism to 1.") 198 201 199 202 // Sourcegraph specific 200 203 fs.BoolVar(&o.DisableCTags, "disable_ctags", x.DisableCTags, "If set, ctags will not be called.") ··· 269 272 270 273 // indexTime is set by tests for doing reproducible builds. 271 274 indexTime time.Time 275 + 276 + // heapProfileMu is used to ensure that only one memory profile is written at a time 277 + heapProfileMu sync.Mutex 278 + heapProfileNum int 272 279 273 280 // a sortable 20 chars long id. 274 281 id string ··· 835 842 shard := b.nextShardNum 836 843 b.nextShardNum++ 837 844 838 - if b.opts.Parallelism > 1 && b.opts.MemProfile == "" { 845 + if b.opts.Parallelism > 1 { 839 846 b.building.Add(1) 840 847 b.throttle <- 1 841 848 go func() { ··· 860 867 if err == nil { 861 868 b.finishedShards[done.temp] = done.final 862 869 } 863 - if b.opts.MemProfile != "" { 864 - // drop memory, and profile. 865 - todo = nil 866 - b.writeMemProfile(b.opts.MemProfile) 867 - } 868 870 869 871 return b.buildError 870 872 } 871 873 872 874 return nil 873 - } 874 - 875 - var profileNumber int 876 - 877 - func (b *Builder) writeMemProfile(name string) { 878 - nm := fmt.Sprintf("%s.%d", name, profileNumber) 879 - profileNumber++ 880 - f, err := os.Create(nm) 881 - if err != nil { 882 - log.Fatal("could not create memory profile: ", err) 883 - } 884 - runtime.GC() // get up-to-date statistics 885 - if err := pprof.WriteHeapProfile(f); err != nil { 886 - log.Fatal("could not write memory profile: ", err) 887 - } 888 - f.Close() 889 - log.Printf("wrote mem profile %q", nm) 890 875 } 891 876 892 877 // map [0,inf) to [0,1) monotonically ··· 1011 996 1012 997 sortDocuments(todo) 1013 998 1014 - for _, t := range todo { 999 + for idx, t := range todo { 1015 1000 if err := shardBuilder.Add(*t); err != nil { 1016 1001 return nil, err 1017 1002 } 1003 + 1004 + if idx%10_000 == 0 { 1005 + b.CheckMemoryUsage() 1006 + } 1018 1007 } 1019 1008 1020 1009 return b.writeShard(name, shardBuilder) 1010 + } 1011 + 1012 + // CheckMemoryUsage checks the memory usage of the process and writes a memory profile if the heap usage exceeds the 1013 + // configured threshold. NOTE: this method is expensive and should only be used for debugging. 1014 + func (b *Builder) CheckMemoryUsage() { 1015 + // Don't check memory if heap profiling is disabled, or we've already written 10 profiles 1016 + if b.opts.HeapProfileTriggerBytes <= 0 || b.heapProfileNum >= 10 { 1017 + return 1018 + } 1019 + 1020 + var m runtime.MemStats 1021 + runtime.ReadMemStats(&m) 1022 + 1023 + if m.HeapAlloc > b.opts.HeapProfileTriggerBytes && b.heapProfileMu.TryLock() { 1024 + defer b.heapProfileMu.Unlock() 1025 + 1026 + log.Printf("writing memory profile, heap usage: %s", humanize.Bytes(m.HeapAlloc)) 1027 + name := filepath.Join(b.opts.IndexDir, fmt.Sprintf("indexmemory.prof.%d", b.heapProfileNum)) 1028 + f, err := os.Create(name) 1029 + if err != nil { 1030 + log.Printf("failed to create memory profile file: %v", err) 1031 + return 1032 + } 1033 + 1034 + err = pprof.WriteHeapProfile(f) 1035 + if err != nil { 1036 + log.Printf("failed to write memory profile: %v", err) 1037 + } 1038 + 1039 + b.heapProfileNum++ 1040 + } 1021 1041 } 1022 1042 1023 1043 func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) {
+15 -4
cmd/zoekt-git-index/main.go
··· 22 22 "runtime/pprof" 23 23 "strings" 24 24 25 + "github.com/dustin/go-humanize" 25 26 "github.com/sourcegraph/zoekt/internal/profiler" 26 27 "go.uber.org/automaxprocs/maxprocs" 27 28 ··· 31 32 ) 32 33 33 34 func run() int { 34 - cpuprofile := flag.String("cpuprofile", "", "write cpu profile to `file`") 35 - 36 35 allowMissing := flag.Bool("allow_missing_branches", false, "allow missing branches.") 37 36 submodules := flag.Bool("submodules", true, "if set to false, do not recurse into submodules") 38 37 branchesStr := flag.String("branches", "HEAD", "git branches to index.") ··· 47 46 offlineRanking := flag.String("offline_ranking", "", "the name of the file that contains the ranking info.") 48 47 offlineRankingVersion := flag.String("offline_ranking_version", "", "a version string identifying the contents in offline_ranking.") 49 48 languageMap := flag.String("language_map", "", "a mapping between a language and its ctags processor (a:0,b:3).") 49 + 50 + cpuProfile := flag.String("cpuprofile", "", "write cpu profile to `file`") 51 + 50 52 flag.Parse() 51 53 52 54 // Tune GOMAXPROCS to match Linux container CPU quota. 53 55 _, _ = maxprocs.Set() 54 56 55 - if *cpuprofile != "" { 56 - f, err := os.Create(*cpuprofile) 57 + if *cpuProfile != "" { 58 + f, err := os.Create(*cpuProfile) 57 59 if err != nil { 58 60 log.Fatal("could not create CPU profile: ", err) 59 61 } ··· 107 109 continue 108 110 } 109 111 opts.LanguageMap[m[0]] = ctags.StringToParser(m[1]) 112 + } 113 + 114 + if heapProfileTrigger := os.Getenv("ZOEKT_HEAP_PROFILE_TRIGGER"); heapProfileTrigger != "" { 115 + trigger, err := humanize.ParseBytes(heapProfileTrigger) 116 + if err != nil { 117 + log.Printf("invalid value for ZOEKT_HEAP_PROFILE_TRIGGER: %v", err) 118 + } else { 119 + opts.HeapProfileTriggerBytes = trigger 120 + } 110 121 } 111 122 112 123 profiler.Init("zoekt-git-index")
+8 -2
gitindex/index.go
··· 514 514 return false, fmt.Errorf("build.NewBuilder: %w", err) 515 515 } 516 516 517 + // Preparing the build can consume substantial memory, so check usage before starting to index. 518 + builder.CheckMemoryUsage() 519 + 517 520 var ranks repoPathRanks 518 521 var meanRank float64 519 522 if opts.BuildOptions.DocumentRanksPath != "" { ··· 562 565 names = uniq(names) 563 566 564 567 log.Printf("attempting to index %d total files", totalFiles) 565 - for _, name := range names { 568 + for idx, name := range names { 566 569 keys := fileKeys[name] 567 570 568 571 for _, key := range keys { ··· 574 577 if err := builder.Add(doc); err != nil { 575 578 return false, fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err) 576 579 } 580 + 581 + if idx%10_000 == 0 { 582 + builder.CheckMemoryUsage() 583 + } 577 584 } 578 585 } 579 - 580 586 return true, builder.Finish() 581 587 } 582 588