fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package zoekt 2 3import ( 4 "crypto/sha1" 5 "fmt" 6 "io" 7 "log" 8 "os" 9 "path/filepath" 10 "runtime" 11 "sort" 12 13 "github.com/sourcegraph/zoekt/internal/tenant" 14) 15 16// Merge files into a compound shard in dstDir. Merge returns tmpName and a 17// dstName. It is the responsibility of the caller to delete the input shards and 18// rename the temporary compound shard from tmpName to dstName. 19func Merge(dstDir string, files ...IndexFile) (tmpName, dstName string, _ error) { 20 var ds []*indexData 21 for _, f := range files { 22 searcher, err := NewSearcher(f) 23 if err != nil { 24 return "", "", err 25 } 26 ds = append(ds, searcher.(*indexData)) 27 } 28 29 ib, err := merge(ds...) 30 if err != nil { 31 return "", "", err 32 } 33 34 hasher := sha1.New() 35 for _, d := range ds { 36 for i, md := range d.repoMetaData { 37 if d.repoMetaData[i].Tombstone { 38 continue 39 } 40 hasher.Write([]byte(md.Name)) 41 hasher.Write([]byte{0}) 42 } 43 } 44 45 dstName = filepath.Join(dstDir, fmt.Sprintf("compound-%x_v%d.%05d.zoekt", hasher.Sum(nil), NextIndexFormatVersion, 0)) 46 tmpName = dstName + ".tmp" 47 if err := builderWriteAll(tmpName, ib); err != nil { 48 return "", "", err 49 } 50 return tmpName, dstName, nil 51} 52 53func builderWriteAll(fn string, ib *IndexBuilder) error { 54 dir := filepath.Dir(fn) 55 if err := os.MkdirAll(dir, 0o700); err != nil { 56 return err 57 } 58 59 f, err := os.CreateTemp(dir, filepath.Base(fn)+".*.tmp") 60 if err != nil { 61 return err 62 } 63 if runtime.GOOS != "windows" { 64 // umask? 65 if err := f.Chmod(0o666); err != nil { 66 return err 67 } 68 } 69 70 defer f.Close() 71 if err := ib.Write(f); err != nil { 72 return err 73 } 74 fi, err := f.Stat() 75 if err != nil { 76 return err 77 } 78 if err := f.Close(); err != nil { 79 return err 80 } 81 82 if err := os.Rename(f.Name(), fn); err != nil { 83 return err 84 } 85 86 log.Printf("finished shard %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 87 float64(fi.Size())/float64(ib.ContentSize()+1)) 88 89 return nil 90} 91 92func merge(ds ...*indexData) (*IndexBuilder, error) { 93 if len(ds) == 0 { 94 return nil, fmt.Errorf("need 1 or more indexData to merge") 95 } 96 97 sort.Slice(ds, func(i, j int) bool { 98 return ds[i].repoMetaData[0].priority > ds[j].repoMetaData[0].priority 99 }) 100 101 ib := newIndexBuilder() 102 ib.indexFormatVersion = NextIndexFormatVersion 103 104 for _, d := range ds { 105 lastRepoID := -1 106 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 107 repoID := int(d.repos[docID]) 108 109 if d.repoMetaData[repoID].Tombstone { 110 continue 111 } 112 113 if repoID != lastRepoID { 114 if lastRepoID > repoID { 115 return nil, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 116 } 117 lastRepoID = repoID 118 119 // TODO we are losing empty repos on merging since we only get here if 120 // there is an associated document. 121 122 if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 123 return nil, err 124 } 125 } 126 127 if err := addDocument(d, ib, repoID, docID); err != nil { 128 return nil, err 129 } 130 } 131 } 132 133 return ib, nil 134} 135 136// Explode takes an IndexFile f and creates 1 simple shard per repository 137// contained in f. Explode returns a map of tmpName -> dstName. It is the 138// responsibility of the caller to rename the temporary shard(s) and delete the 139// input shard. 140func Explode(dstDir string, f IndexFile) (map[string]string, error) { 141 return explode(dstDir, f) 142} 143 144type indexBuilderFunc func(ib *IndexBuilder) 145 146// explode offers a richer signature compared to Explode for testing. You 147// probably want to call Explode instead. 148func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[string]string, error) { 149 searcher, err := NewSearcher(f) 150 if err != nil { 151 return nil, err 152 } 153 d := searcher.(*indexData) 154 155 shardNames := make(map[string]string, len(d.repoMetaData)) 156 157 writeShard := func(ib *IndexBuilder) error { 158 if len(ib.repoList) != 1 { 159 return fmt.Errorf("expected ib to contain exactly 1 repository") 160 } 161 for _, ibFunc := range ibFuncs { 162 ibFunc(ib) 163 } 164 165 prefix := "" 166 if tenant.EnforceTenant() { 167 prefix = tenant.SrcPrefix(ib.repoList[0].TenantID, ib.repoList[0].ID) 168 } else { 169 prefix = ib.repoList[0].Name 170 } 171 172 shardName := ShardName(dstDir, prefix, ib.indexFormatVersion, 0) 173 shardNameTmp := shardName + ".tmp" 174 shardNames[shardNameTmp] = shardName 175 return builderWriteAll(shardNameTmp, ib) 176 } 177 178 var ib *IndexBuilder 179 lastRepoID := -1 180 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 181 repoID := int(d.repos[docID]) 182 183 if d.repoMetaData[repoID].Tombstone { 184 continue 185 } 186 187 if repoID != lastRepoID { 188 if lastRepoID > repoID { 189 return shardNames, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 190 } 191 lastRepoID = repoID 192 193 if ib != nil { 194 if err := writeShard(ib); err != nil { 195 return shardNames, err 196 } 197 } 198 199 ib = newIndexBuilder() 200 ib.indexFormatVersion = IndexFormatVersion 201 if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 202 return shardNames, err 203 } 204 } 205 206 err := addDocument(d, ib, repoID, docID) 207 if err != nil { 208 return shardNames, err 209 } 210 } 211 212 if ib != nil { 213 if err := writeShard(ib); err != nil { 214 return shardNames, err 215 } 216 } 217 218 return shardNames, nil 219} 220 221func addDocument(d *indexData, ib *IndexBuilder, repoID int, docID uint32) error { 222 doc := Document{ 223 Name: string(d.fileName(docID)), 224 // Content set below since it can return an error 225 // Branches set below since it requires lookups 226 SubRepositoryPath: d.subRepoPaths[repoID][d.subRepos[docID]], 227 Language: d.languageMap[d.getLanguage(docID)], 228 // SkipReason not set, will be part of content from original indexer. 229 } 230 231 var err error 232 if doc.Content, err = d.readContents(docID); err != nil { 233 return err 234 } 235 236 if doc.Symbols, _, err = d.readDocSections(docID, nil); err != nil { 237 return err 238 } 239 240 doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) 241 for i := range doc.SymbolsMetaData { 242 doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) 243 } 244 245 // calculate branches 246 { 247 mask := d.fileBranchMasks[docID] 248 id := uint32(1) 249 for mask != 0 { 250 if mask&0x1 != 0 { 251 doc.Branches = append(doc.Branches, d.branchNames[repoID][uint(id)]) 252 } 253 id <<= 1 254 mask >>= 1 255 } 256 } 257 return ib.Add(doc) 258} 259 260// copied from builder package to avoid circular imports. 261func hashString(s string) string { 262 h := sha1.New() 263 _, _ = io.WriteString(h, s) 264 return fmt.Sprintf("%x", h.Sum(nil)) 265}