fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package zoekt 2 3import ( 4 "crypto/sha1" 5 "fmt" 6 "io" 7 "log" 8 "net/url" 9 "os" 10 "path/filepath" 11 "runtime" 12 "sort" 13 14 "github.com/sourcegraph/zoekt/internal/tenant" 15) 16 17// Merge files into a compound shard in dstDir. Merge returns tmpName and a 18// dstName. It is the responsibility of the caller to delete the input shards and 19// rename the temporary compound shard from tmpName to dstName. 20func Merge(dstDir string, files ...IndexFile) (tmpName, dstName string, _ error) { 21 var ds []*indexData 22 for _, f := range files { 23 searcher, err := NewSearcher(f) 24 if err != nil { 25 return "", "", err 26 } 27 ds = append(ds, searcher.(*indexData)) 28 } 29 30 ib, err := merge(ds...) 31 if err != nil { 32 return "", "", err 33 } 34 35 hasher := sha1.New() 36 for _, d := range ds { 37 for i, md := range d.repoMetaData { 38 if d.repoMetaData[i].Tombstone { 39 continue 40 } 41 hasher.Write([]byte(md.Name)) 42 hasher.Write([]byte{0}) 43 } 44 } 45 46 dstName = filepath.Join(dstDir, fmt.Sprintf("compound-%x_v%d.%05d.zoekt", hasher.Sum(nil), NextIndexFormatVersion, 0)) 47 tmpName = dstName + ".tmp" 48 if err := builderWriteAll(tmpName, ib); err != nil { 49 return "", "", err 50 } 51 return tmpName, dstName, nil 52} 53 54func builderWriteAll(fn string, ib *IndexBuilder) error { 55 dir := filepath.Dir(fn) 56 if err := os.MkdirAll(dir, 0o700); err != nil { 57 return err 58 } 59 60 f, err := os.CreateTemp(dir, filepath.Base(fn)+".*.tmp") 61 if err != nil { 62 return err 63 } 64 if runtime.GOOS != "windows" { 65 // umask? 66 if err := f.Chmod(0o666); err != nil { 67 return err 68 } 69 } 70 71 defer f.Close() 72 if err := ib.Write(f); err != nil { 73 return err 74 } 75 fi, err := f.Stat() 76 if err != nil { 77 return err 78 } 79 if err := f.Close(); err != nil { 80 return err 81 } 82 83 if err := os.Rename(f.Name(), fn); err != nil { 84 return err 85 } 86 87 log.Printf("finished shard %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 88 float64(fi.Size())/float64(ib.ContentSize()+1)) 89 90 return nil 91} 92 93func merge(ds ...*indexData) (*IndexBuilder, error) { 94 if len(ds) == 0 { 95 return nil, fmt.Errorf("need 1 or more indexData to merge") 96 } 97 98 sort.Slice(ds, func(i, j int) bool { 99 return ds[i].repoMetaData[0].priority > ds[j].repoMetaData[0].priority 100 }) 101 102 ib := newIndexBuilder() 103 ib.indexFormatVersion = NextIndexFormatVersion 104 105 for _, d := range ds { 106 lastRepoID := -1 107 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 108 repoID := int(d.repos[docID]) 109 110 if d.repoMetaData[repoID].Tombstone { 111 continue 112 } 113 114 if repoID != lastRepoID { 115 if lastRepoID > repoID { 116 return nil, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 117 } 118 lastRepoID = repoID 119 120 // TODO we are losing empty repos on merging since we only get here if 121 // there is an associated document. 122 123 if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 124 return nil, err 125 } 126 } 127 128 if err := addDocument(d, ib, repoID, docID); err != nil { 129 return nil, err 130 } 131 } 132 } 133 134 return ib, nil 135} 136 137// Explode takes an IndexFile f and creates 1 simple shard per repository 138// contained in f. Explode returns a map of tmpName -> dstName. It is the 139// responsibility of the caller to rename the temporary shard(s) and delete the 140// input shard. 141func Explode(dstDir string, f IndexFile) (map[string]string, error) { 142 return explode(dstDir, f) 143} 144 145type indexBuilderFunc func(ib *IndexBuilder) 146 147// explode offers a richer signature compared to Explode for testing. You 148// probably want to call Explode instead. 149func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[string]string, error) { 150 searcher, err := NewSearcher(f) 151 if err != nil { 152 return nil, err 153 } 154 d := searcher.(*indexData) 155 156 shardNames := make(map[string]string, len(d.repoMetaData)) 157 158 writeShard := func(ib *IndexBuilder) error { 159 if len(ib.repoList) != 1 { 160 return fmt.Errorf("expected ib to contain exactly 1 repository") 161 } 162 for _, ibFunc := range ibFuncs { 163 ibFunc(ib) 164 } 165 166 prefix := "" 167 if tenant.EnforceTenant() { 168 prefix = tenant.SrcPrefix(ib.repoList[0].TenantID, ib.repoList[0].ID) 169 } else { 170 prefix = ib.repoList[0].Name 171 } 172 173 fn := filepath.Join(dstDir, shardName(prefix, ib.indexFormatVersion, 0)) 174 fnTmp := fn + ".tmp" 175 shardNames[fnTmp] = fn 176 return builderWriteAll(fnTmp, ib) 177 } 178 179 var ib *IndexBuilder 180 lastRepoID := -1 181 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 182 repoID := int(d.repos[docID]) 183 184 if d.repoMetaData[repoID].Tombstone { 185 continue 186 } 187 188 if repoID != lastRepoID { 189 if lastRepoID > repoID { 190 return shardNames, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 191 } 192 lastRepoID = repoID 193 194 if ib != nil { 195 if err := writeShard(ib); err != nil { 196 return shardNames, err 197 } 198 } 199 200 ib = newIndexBuilder() 201 ib.indexFormatVersion = IndexFormatVersion 202 if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 203 return shardNames, err 204 } 205 } 206 207 err := addDocument(d, ib, repoID, docID) 208 if err != nil { 209 return shardNames, err 210 } 211 } 212 213 if ib != nil { 214 if err := writeShard(ib); err != nil { 215 return shardNames, err 216 } 217 } 218 219 return shardNames, nil 220} 221 222func addDocument(d *indexData, ib *IndexBuilder, repoID int, docID uint32) error { 223 doc := Document{ 224 Name: string(d.fileName(docID)), 225 // Content set below since it can return an error 226 // Branches set below since it requires lookups 227 SubRepositoryPath: d.subRepoPaths[repoID][d.subRepos[docID]], 228 Language: d.languageMap[d.getLanguage(docID)], 229 // SkipReason not set, will be part of content from original indexer. 230 } 231 232 var err error 233 if doc.Content, err = d.readContents(docID); err != nil { 234 return err 235 } 236 237 if doc.Symbols, _, err = d.readDocSections(docID, nil); err != nil { 238 return err 239 } 240 241 doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) 242 for i := range doc.SymbolsMetaData { 243 doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) 244 } 245 246 // calculate branches 247 { 248 mask := d.fileBranchMasks[docID] 249 id := uint32(1) 250 for mask != 0 { 251 if mask&0x1 != 0 { 252 doc.Branches = append(doc.Branches, d.branchNames[repoID][uint(id)]) 253 } 254 id <<= 1 255 mask >>= 1 256 } 257 } 258 return ib.Add(doc) 259} 260 261// copied from builder package to avoid circular imports. 262func hashString(s string) string { 263 h := sha1.New() 264 _, _ = io.WriteString(h, s) 265 return fmt.Sprintf("%x", h.Sum(nil)) 266} 267 268// copied from builder package to avoid circular imports. 269func shardName(prefix string, version, n int) string { 270 abs := url.QueryEscape(prefix) 271 if len(abs) > 200 { 272 abs = abs[:200] + hashString(abs)[:8] 273 } 274 return fmt.Sprintf("%s_v%d.%05d.zoekt", abs, version, n) 275}