fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package zoekt 2 3import ( 4 "crypto/sha1" 5 "fmt" 6 "io" 7 "log" 8 "net/url" 9 "os" 10 "path/filepath" 11 "runtime" 12 "sort" 13) 14 15// Merge files into a compound shard in dstDir. Merge returns tmpName and a 16// dstName. It is the responsibility of the caller to delete the input shards and 17// rename the temporary compound shard from tmpName to dstName. 18func Merge(dstDir string, files ...IndexFile) (tmpName, dstName string, _ error) { 19 var ds []*indexData 20 for _, f := range files { 21 searcher, err := NewSearcher(f) 22 if err != nil { 23 return "", "", err 24 } 25 ds = append(ds, searcher.(*indexData)) 26 } 27 28 ib, err := merge(ds...) 29 if err != nil { 30 return "", "", err 31 } 32 33 hasher := sha1.New() 34 for _, d := range ds { 35 for i, md := range d.repoMetaData { 36 if d.repoMetaData[i].Tombstone { 37 continue 38 } 39 hasher.Write([]byte(md.Name)) 40 hasher.Write([]byte{0}) 41 } 42 } 43 44 dstName = filepath.Join(dstDir, fmt.Sprintf("compound-%x_v%d.%05d.zoekt", hasher.Sum(nil), NextIndexFormatVersion, 0)) 45 tmpName = dstName + ".tmp" 46 if err := builderWriteAll(tmpName, ib); err != nil { 47 return "", "", err 48 } 49 return tmpName, dstName, nil 50} 51 52func builderWriteAll(fn string, ib *IndexBuilder) error { 53 dir := filepath.Dir(fn) 54 if err := os.MkdirAll(dir, 0o700); err != nil { 55 return err 56 } 57 58 f, err := os.CreateTemp(dir, filepath.Base(fn)+".*.tmp") 59 if err != nil { 60 return err 61 } 62 if runtime.GOOS != "windows" { 63 // umask? 64 if err := f.Chmod(0o666); err != nil { 65 return err 66 } 67 } 68 69 defer f.Close() 70 if err := ib.Write(f); err != nil { 71 return err 72 } 73 fi, err := f.Stat() 74 if err != nil { 75 return err 76 } 77 if err := f.Close(); err != nil { 78 return err 79 } 80 81 if err := os.Rename(f.Name(), fn); err != nil { 82 return err 83 } 84 85 log.Printf("finished shard %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 86 float64(fi.Size())/float64(ib.ContentSize()+1)) 87 88 return nil 89} 90 91func merge(ds ...*indexData) (*IndexBuilder, error) { 92 if len(ds) == 0 { 93 return nil, fmt.Errorf("need 1 or more indexData to merge") 94 } 95 96 sort.Slice(ds, func(i, j int) bool { 97 return ds[i].repoMetaData[0].priority > ds[j].repoMetaData[0].priority 98 }) 99 100 ib := newIndexBuilder() 101 ib.indexFormatVersion = NextIndexFormatVersion 102 103 for _, d := range ds { 104 lastRepoID := -1 105 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 106 repoID := int(d.repos[docID]) 107 108 if d.repoMetaData[repoID].Tombstone { 109 continue 110 } 111 112 if repoID != lastRepoID { 113 if lastRepoID > repoID { 114 return nil, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 115 } 116 lastRepoID = repoID 117 118 // TODO we are losing empty repos on merging since we only get here if 119 // there is an associated document. 120 121 if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 122 return nil, err 123 } 124 } 125 126 if err := addDocument(d, ib, repoID, docID); err != nil { 127 return nil, err 128 } 129 } 130 } 131 132 return ib, nil 133} 134 135// Explode takes an IndexFile f and creates 1 simple shard per repository 136// contained in f. Explode returns a map of tmpName -> dstName. It is the 137// responsibility of the caller to rename the temporary shard(s) and delete the 138// input shard. 139func Explode(dstDir string, f IndexFile) (map[string]string, error) { 140 return explode(dstDir, f) 141} 142 143type indexBuilderFunc func(ib *IndexBuilder) 144 145// explode offers a richer signature compared to Explode for testing. You 146// probably want to call Explode instead. 147func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[string]string, error) { 148 searcher, err := NewSearcher(f) 149 if err != nil { 150 return nil, err 151 } 152 d := searcher.(*indexData) 153 154 shardNames := make(map[string]string, len(d.repoMetaData)) 155 156 writeShard := func(ib *IndexBuilder) error { 157 if len(ib.repoList) != 1 { 158 return fmt.Errorf("expected ib to contain exactly 1 repository") 159 } 160 for _, ibFunc := range ibFuncs { 161 ibFunc(ib) 162 } 163 fn := filepath.Join(dstDir, shardName(ib.repoList[0].Name, ib.indexFormatVersion, 0)) 164 fnTmp := fn + ".tmp" 165 shardNames[fnTmp] = fn 166 return builderWriteAll(fnTmp, ib) 167 } 168 169 var ib *IndexBuilder 170 lastRepoID := -1 171 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 172 repoID := int(d.repos[docID]) 173 174 if d.repoMetaData[repoID].Tombstone { 175 continue 176 } 177 178 if repoID != lastRepoID { 179 if lastRepoID > repoID { 180 return shardNames, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 181 } 182 lastRepoID = repoID 183 184 if ib != nil { 185 if err := writeShard(ib); err != nil { 186 return shardNames, err 187 } 188 } 189 190 ib = newIndexBuilder() 191 ib.indexFormatVersion = IndexFormatVersion 192 if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 193 return shardNames, err 194 } 195 } 196 197 err := addDocument(d, ib, repoID, docID) 198 if err != nil { 199 return shardNames, err 200 } 201 } 202 203 if ib != nil { 204 if err := writeShard(ib); err != nil { 205 return shardNames, err 206 } 207 } 208 209 return shardNames, nil 210} 211 212func addDocument(d *indexData, ib *IndexBuilder, repoID int, docID uint32) error { 213 doc := Document{ 214 Name: string(d.fileName(docID)), 215 // Content set below since it can return an error 216 // Branches set below since it requires lookups 217 SubRepositoryPath: d.subRepoPaths[repoID][d.subRepos[docID]], 218 Language: d.languageMap[d.getLanguage(docID)], 219 // SkipReason not set, will be part of content from original indexer. 220 } 221 222 var err error 223 if doc.Content, err = d.readContents(docID); err != nil { 224 return err 225 } 226 227 if doc.Symbols, _, err = d.readDocSections(docID, nil); err != nil { 228 return err 229 } 230 231 doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) 232 for i := range doc.SymbolsMetaData { 233 doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) 234 } 235 236 // calculate branches 237 { 238 mask := d.fileBranchMasks[docID] 239 id := uint32(1) 240 for mask != 0 { 241 if mask&0x1 != 0 { 242 doc.Branches = append(doc.Branches, d.branchNames[repoID][uint(id)]) 243 } 244 id <<= 1 245 mask >>= 1 246 } 247 } 248 return ib.Add(doc) 249} 250 251// copied from builder package to avoid circular imports. 252func hashString(s string) string { 253 h := sha1.New() 254 _, _ = io.WriteString(h, s) 255 return fmt.Sprintf("%x", h.Sum(nil)) 256} 257 258// copied from builder package to avoid circular imports. 259func shardName(name string, version, n int) string { 260 abs := url.QueryEscape(name) 261 if len(abs) > 200 { 262 abs = abs[:200] + hashString(abs)[:8] 263 } 264 return fmt.Sprintf("%s_v%d.%05d.zoekt", abs, version, n) 265}