···3535 "syscall"
3636 "time"
37373838+ "github.com/sourcegraph/mountinfo"
3939+3840 "github.com/sourcegraph/zoekt"
3941 "github.com/sourcegraph/zoekt/build"
4042 "github.com/sourcegraph/zoekt/debugserver"
4141- "github.com/sourcegraph/zoekt/internal/mountinfo"
4243 "github.com/sourcegraph/zoekt/internal/profiler"
4344 "github.com/sourcegraph/zoekt/internal/tracer"
4445 "github.com/sourcegraph/zoekt/query"
···184185 metricsLogger := sglog.Scoped("metricsRegistration", "")
185186186187 mustRegisterMemoryMapMetrics(metricsLogger)
187187- mountinfo.MustRegisterNewMountPointInfoMetric(metricsLogger, mountinfo.MountPointInfoOpts{Namespace: "zoekt_webserver"}, map[string]string{"indexDir": *index})
188188+189189+ opts := mountinfo.CollectorOpts{Namespace: "zoekt_webserver"}
190190+ c := mountinfo.NewCollector(metricsLogger, opts, map[string]string{"indexDir": *index})
191191+192192+ prometheus.DefaultRegisterer.MustRegister(c)
188193189194 // Do not block on loading shards so we can become partially available
190195 // sooner. Otherwise on large instances zoekt can be unavailable on the
···11-package mountinfo
22-33-import (
44- "errors"
55- "fmt"
66- "os"
77- "path/filepath"
88- "runtime"
99- "strings"
1010-1111- "github.com/prometheus/client_golang/prometheus"
1212- "github.com/prometheus/client_golang/prometheus/promauto"
1313- sglog "github.com/sourcegraph/log"
1414- "golang.org/x/sys/unix"
1515-)
1616-1717-// defaultSysMountPoint is the common mount point for the sysfs pseudo-filesystem.
1818-const defaultSysMountPoint = "/sys"
1919-2020-// MountPointInfoOpts modifies the behavior of the metric created
2121-// by MustRegisterNewMountPointInfoMetric.
2222-type MountPointInfoOpts struct {
2323- // If non-empty, Namespace prefixes the "mount_point_info" metric by the provided string and
2424- // an underscore ("_").
2525- Namespace string
2626-}
2727-2828-// MustRegisterNewMountPointInfoMetric registers a Prometheus metric named "mount_point_info" that
2929-// contains the names of the block storage devices that back each of the requested mounts.
3030-//
3131-// Mounts is a set of name -> file path mappings (example: {"indexDir": "/home/.zoekt"}).
3232-//
3333-// The metric "mount_point_info" has a constant value of 1 and two labels:
3434-// - mount_name: caller-provided name for the given mount (example: "indexDir")
3535-// - device: name of the block device that backs the given mount file path (example: "sdb")
3636-//
3737-// This metric only works on Linux-based operating systems that have access to the sysfs pseudo-filesystem.
3838-// On all other operating systems, this metric will not emit any values.
3939-func MustRegisterNewMountPointInfoMetric(logger sglog.Logger, opts MountPointInfoOpts, mounts map[string]string) {
4040- logger = logger.Scoped("mountPointInfo", "registration logic for mount_point_info Prometheus metric")
4141-4242- metric := promauto.NewGaugeVec(prometheus.GaugeOpts{
4343- Namespace: opts.Namespace,
4444- Name: "mount_point_info",
4545- Help: "An info metric with a constant '1' value that contains mount_name, device mappings",
4646- }, []string{"mount_name", "device"})
4747-4848- // This device discovery logic relies on the sysfs pseudo-filesystem, which only exists
4949- // on linux.
5050- //
5151- // See https://en.wikipedia.org/wiki/Sysfs for more information.
5252- if runtime.GOOS != "linux" {
5353- return
5454- }
5555-5656- for name, filePath := range mounts {
5757- // for each <mountName>:<mountFilePath> pairing,
5858- // discover the name of the block device that stores <mountFilePath>.
5959- discoveryLogger := logger.Scoped("deviceNameDiscovery", "").With(
6060- sglog.String("mountName", name),
6161- sglog.String("mountFilePath", filePath),
6262- )
6363-6464- device, err := discoverDeviceName(discoveryLogger, discoverDeviceNameOpts{}, filePath)
6565- if err != nil {
6666- discoveryLogger.Warn("skipping metric registration",
6767- sglog.String("reason", "failed to discover device name"),
6868- sglog.Error(err),
6969- )
7070-7171- continue
7272- }
7373-7474- discoveryLogger.Debug("discovered device name",
7575- sglog.String("deviceName", device),
7676- )
7777-7878- metric.WithLabelValues(name, device).Set(1)
7979- }
8080-}
8181-8282-type discoverDeviceNameOpts struct {
8383- // sysfsMountPoint is the location of the sysfs mount point.
8484- // If empty, defaultSysMountPoint will be used instead.
8585- sysfsMountPoint string
8686-8787- // getDeviceNumber, if non-nil, is the function that will be used to find
8888- // the number of the block device that stores the specified file.
8989- // If getDeviceNumber is nil, mountinfo.getDeviceNumber will be used instead.
9090- getDeviceNumber func(filePath string) (major uint32, minor uint32, err error)
9191-}
9292-9393-// discoverDeviceName returns the name of the block device that filePath is
9494-// stored on.
9595-func discoverDeviceName(logger sglog.Logger, opts discoverDeviceNameOpts, filePath string) (string, error) {
9696- // Note: It's quite involved to implement the device discovery logic for
9797- // every possible kind of storage device (e.x. logical volumes, NFS, etc.) See
9898- // https://unix.stackexchange.com/a/11312 for more information.
9999- //
100100- // As a result, this logic will only work correctly for filePaths that are either:
101101- // - stored directly on a block device
102102- // - stored on a block device's partition
103103- //
104104- // For all other device types, this logic will either:
105105- // - return an incorrect device name
106106- // - return an error
107107- //
108108- // This logic was implemented from information gathered from the following sources (amongst others):
109109- // - "The Linux Programming Interface" by Michael Kerrisk: Chapter 14
110110- // - "Linux Kernel Development" by Robert Love: Chapters 13, 17
111111- // - https://man7.org/linux/man-pages/man5/sysfs.5.html
112112- // - https://en.wikipedia.org/wiki/Sysfs
113113- // - https://unix.stackexchange.com/a/11312
114114- // - https://www.kernel.org/doc/ols/2005/ols2005v1-pages-321-334.pdf
115115-116116- getDeviceNumber := getDeviceNumber
117117- if opts.getDeviceNumber != nil {
118118- getDeviceNumber = opts.getDeviceNumber
119119- }
120120-121121- sysfsMountPoint := defaultSysMountPoint
122122- if opts.sysfsMountPoint != "" {
123123- sysfsMountPoint = opts.sysfsMountPoint
124124- }
125125-126126- sysfsMountPoint = filepath.Clean(sysfsMountPoint)
127127-128128- // the provided sysfs mountpoint could itself be a symlink, so we
129129- // resolve it immediately so that future file path
130130- // evaluations / massaging doesn't break
131131- sysfsMountPoint, err := filepath.EvalSymlinks(sysfsMountPoint)
132132- if err != nil {
133133- return "", fmt.Errorf("verifying sysfs mountpoint %q: failed to resolve symlink %w", sysfsMountPoint, err)
134134- }
135135-136136- major, minor, err := getDeviceNumber(filePath)
137137- if err != nil {
138138- return "", fmt.Errorf("discovering device number: %w", err)
139139- }
140140-141141- // Represent the number in <major>:<minor> format.
142142- deviceNumber := fmt.Sprintf("%d:%d", major, minor)
143143-144144- logger.Debug(
145145- "discovered device number",
146146- sglog.String("deviceNumber", deviceNumber),
147147- )
148148-149149- // /sys/dev/block/<device_number> symlinks to /sys/devices/.../block/.../<deviceName>
150150- symlink := filepath.Join(sysfsMountPoint, "dev", "block", deviceNumber)
151151-152152- devicePath, err := filepath.EvalSymlinks(symlink)
153153- if err != nil {
154154- return "", fmt.Errorf("discovering device path: failed to evaluate sysfs symlink %q: %w", symlink, err)
155155- }
156156-157157- devicePath, err = filepath.Abs(devicePath)
158158- if err != nil {
159159- return "", fmt.Errorf("discovering device path: failed to massage device path %q to absolute path: %w", devicePath, err)
160160- }
161161-162162- logger.Debug("discovered device path",
163163- sglog.String("devicePath", devicePath),
164164- )
165165-166166- // Check to see if devicePath points to a disk partition. If so, we need to find the parent
167167- // device.
168168-169169- // massage the sysfs folder name to ensure that it always ends in a '/'
170170- // so that strings.HasPrefix does what we expect when checking to see if
171171- // we're still under the /sys sub-folder
172172- sysFolderPrefix := strings.TrimSuffix(sysfsMountPoint, string(os.PathSeparator))
173173- sysFolderPrefix = sysFolderPrefix + string(os.PathSeparator)
174174-175175- for {
176176- if !strings.HasPrefix(devicePath, sysFolderPrefix) {
177177- // ensure that we're still under the /sys/ sub-folder
178178- return "", fmt.Errorf("validating device path: device path %q isn't a subpath of %q", devicePath, sysFolderPrefix)
179179- }
180180-181181- _, err := os.Stat(filepath.Join(devicePath, "partition"))
182182- if errors.Is(err, os.ErrNotExist) {
183183- break
184184- }
185185-186186- parent := filepath.Dir(devicePath)
187187-188188- logger.Debug("changing device path",
189189- sglog.String("reason", "oldDevicePath represents a disk partition"),
190190-191191- sglog.String("oldDevicePath", devicePath),
192192- sglog.String("newDevicePath", parent),
193193- )
194194-195195- devicePath = parent
196196- }
197197-198198- // If this device is a block device, its device path should have a symlink
199199- // to the block subsystem.
200200-201201- subsystemPath, err := filepath.EvalSymlinks(filepath.Join(devicePath, "subsystem"))
202202- if err != nil {
203203- return "", fmt.Errorf("validating device path: failed to discover subsystem that device (path %q) is part of: %w", devicePath, err)
204204- }
205205-206206- if filepath.Base(subsystemPath) != "block" {
207207- return "", fmt.Errorf("validating device path: device (path %q) is not part of the block subsystem", devicePath)
208208- }
209209-210210- device := filepath.Base(devicePath)
211211- return filepath.Base(device), nil
212212-}
213213-214214-func getDeviceNumber(filePath string) (major uint32, minor uint32, err error) {
215215- var stat unix.Stat_t
216216- err = unix.Stat(filePath, &stat)
217217- if err != nil {
218218- return 0, 0, fmt.Errorf("failed to stat %q: %w", filePath, err)
219219- }
220220-221221- major, minor = unix.Major(uint64(stat.Dev)), unix.Minor(uint64(stat.Dev))
222222- return major, minor, nil
223223-}
-29
internal/mountinfo/mountinfo_linux_test.go
···11-//go:build linux
22-33-package mountinfo
44-55-import (
66- "log"
77- "os"
88- "testing"
99-1010- "github.com/sourcegraph/log/logtest"
1111-)
1212-1313-func Test_DeviceName_SmokeTest(t *testing.T) {
1414- // A simple smoke test to verify that we can find the storage device
1515- // for the current working directory.
1616- logger := logtest.Scoped(t)
1717-1818- filePath, err := os.Getwd()
1919- if err != nil {
2020- log.Fatalf("getting current working directory: %s", err)
2121- }
2222-2323- device, err := discoverDeviceName(logger, discoverDeviceNameOpts{}, filePath)
2424- if err != nil {
2525- t.Fatalf("discovering device name for file path %q: %s", filePath, err)
2626- }
2727-2828- t.Logf("discovered device name %q for file path %q", device, filePath)
2929-}
-196
internal/mountinfo/mountinfo_test.go
···11-package mountinfo
22-33-import (
44- "archive/tar"
55- "compress/gzip"
66- "io"
77- "os"
88- "path/filepath"
99- "testing"
1010-1111- "github.com/google/go-cmp/cmp"
1212- "github.com/sourcegraph/log/logtest"
1313-)
1414-1515-func Test_DeviceName_Snapshots(t *testing.T) {
1616- // This test uses sysfs snapshots from real linux machines to ensure
1717- // that the device discovery logic returns the expected device name.
1818-1919- for _, test := range []struct {
2020- name string
2121-2222- sysfsTarballFile string
2323-2424- deviceMajor uint32
2525- deviceMinor uint32
2626-2727- expectedDeviceName string
2828- }{
2929- {
3030- name: "should find the name of the block device that backs a partition (vda1 -> vda)",
3131-3232- // ( lsblk output from the snapshotted machine)
3333- // ~ # lsblk
3434- // NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
3535- // nbd0 43:0 0 0B 0 disk
3636- // nbd1 43:32 0 0B 0 disk
3737- // nbd2 43:64 0 0B 0 disk
3838- // nbd3 43:96 0 0B 0 disk
3939- // nbd4 43:128 0 0B 0 disk
4040- // nbd5 43:160 0 0B 0 disk
4141- // nbd6 43:192 0 0B 0 disk
4242- // nbd7 43:224 0 0B 0 disk
4343- // vda 254:0 0 59.6G 0 disk
4444- // └─vda1 254:1 0 59.6G 0 part /etc/hosts # test targets this partition
4545- // /etc/hostname
4646- // /etc/resolv.conf
4747- // /data/index
4848- // nbd8 43:256 0 0B 0 disk
4949- // nbd9 43:288 0 0B 0 disk
5050- // nbd10 43:320 0 0B 0 disk
5151- // nbd11 43:352 0 0B 0 disk
5252- // nbd12 43:384 0 0B 0 disk
5353- // nbd13 43:416 0 0B 0 disk
5454- // nbd14 43:448 0 0B 0 disk
5555- // nbd15 43:480 0 0B 0 disk
5656-5757- sysfsTarballFile: "sysfs.vda1.tar.gz",
5858-5959- deviceMajor: 254, // points to vda1 partition
6060- deviceMinor: 1,
6161-6262- expectedDeviceName: "vda",
6363- },
6464- {
6565- name: "should find the device name for a lvm volume backed by a single disk",
6666-6767- // ( lsblk output from the snapshotted machine)
6868- // ~ # lsblk
6969- // NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
7070- // sda 8:0 0 7.3T 0 disk
7171- // └─sda1 8:1 0 1024G 0 part /var/lib/plex
7272- // nvme0n1 259:0 0 1.8T 0 disk
7373- // ├─nvme0n1p1 259:1 0 529M 0 part
7474- // ├─nvme0n1p2 259:2 0 99M 0 part
7575- // ├─nvme0n1p3 259:3 0 16M 0 part
7676- // ├─nvme0n1p4 259:4 0 293G 0 part
7777- // ├─nvme0n1p5 259:5 0 512M 0 part /boot
7878- // └─nvme0n1p6 259:6 0 1.5T 0 part
7979- // └─pool-nixos 254:0 0 600G 0 lvm /nix/store
8080- // / # test targets this device
8181-8282- sysfsTarballFile: "sysfs.lvm.dm-0.tar.gz",
8383-8484- deviceMajor: 254, // points to dm-0 device
8585- deviceMinor: 0,
8686-8787- // TODO@ggilmore: technically, dm-0 is a lvm volume backed by a partition stored on the nvme device.
8888- // For consistency with the other test case, we should be returning nvme0n1 (the parent disk device) as the
8989- // device name. I'll revisit this later, as I need to figure out how to programmatically determine
9090- // the nvme01n1 <-> dm-0 translation.
9191- expectedDeviceName: "dm-0",
9292- },
9393- } {
9494- test := test
9595-9696- t.Run(t.Name(), func(t *testing.T) {
9797- t.Parallel()
9898-9999- // provide a custom sysfs location so that we can point the test
100100- // at our sysfs snapshot
101101- mockSysFSDir := filepath.Join(t.TempDir(), "sys")
102102-103103- // unpack sysfs tarball
104104- tarball := filepath.Join("testdata", test.sysfsTarballFile)
105105- decompressSysFSTarball(t, tarball, mockSysFSDir)
106106-107107- logger := logtest.Scoped(t)
108108-109109- mockGetDeviceNumber := func(_ string) (major uint32, minor uint32, err error) {
110110- return test.deviceMajor, test.deviceMinor, nil
111111- }
112112- fakeFilePath := "doesn't matter" // the file path itself doesn't matter since we hard-code the device number
113113-114114- // execute the test with our injected mocks
115115- actualDeviceName, err := discoverDeviceName(
116116- logger,
117117- discoverDeviceNameOpts{
118118- sysfsMountPoint: mockSysFSDir,
119119- getDeviceNumber: mockGetDeviceNumber,
120120- },
121121- fakeFilePath,
122122- )
123123-124124- if err != nil {
125125- t.Fatalf("discovering device name for file path %q: %s", fakeFilePath, err)
126126- }
127127-128128- // verify that the discovered device name is the one that we expect
129129-130130- if diff := cmp.Diff(test.expectedDeviceName, actualDeviceName); diff != "" {
131131- t.Fatalf("recieved unexpected device name (-want +got):\n%s", diff)
132132- }
133133- })
134134- }
135135-}
136136-137137-func decompressSysFSTarball(t *testing.T, tarball, outputFolder string) {
138138- t.Helper()
139139-140140- file, err := os.Open(tarball)
141141- if err != nil {
142142- t.Fatalf("opening tarball %q: %s", tarball, err)
143143- }
144144-145145- defer file.Close()
146146-147147- gz, err := gzip.NewReader(file)
148148- if err != nil {
149149- t.Fatalf("initialzing gzip reader: %s", err)
150150- }
151151-152152- reader := tar.NewReader(gz)
153153-154154- for {
155155- header, err := reader.Next()
156156- if err == io.EOF {
157157- break
158158- }
159159-160160- if err != nil {
161161- t.Fatalf("intializing tar reader: %s", err)
162162- }
163163-164164- outputFile := filepath.Join(outputFolder, header.Name)
165165-166166- switch header.Typeflag {
167167- case tar.TypeDir:
168168- err := os.MkdirAll(outputFile, os.FileMode(header.Mode))
169169- if err != nil {
170170- t.Fatalf("creating directory %q: %s", outputFile, err)
171171- }
172172-173173- case tar.TypeSymlink:
174174- err := os.Symlink(header.Linkname, outputFile)
175175- if err != nil {
176176- t.Fatalf("creating symlink (%q -> %q): %s", outputFile, header.Linkname, err)
177177- }
178178-179179- case tar.TypeReg:
180180- f, err := os.OpenFile(outputFile, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode))
181181- if err != nil {
182182- t.Fatalf("creating file %q: %s", outputFile, err)
183183- }
184184-185185- _, err = io.Copy(f, reader)
186186- if err != nil {
187187- t.Fatalf("writing file %q: %s", outputFile, err)
188188- }
189189-190190- f.Close()
191191-192192- default:
193193- t.Fatalf("encounted unknown file header type (%d) for file %q", header.Typeflag, header.Name)
194194- }
195195- }
196196-}
-48
internal/mountinfo/testdata/snapshot.sh
···11-#!/usr/bin/env bash
22-33-# Create a tarball of this system's sysfs filesystem + place it in the home directory.
44-#
55-# (This special logic is necessary since /sys is a pseudo-filesystem that exposes kernel variables.
66-# The files in /sys and their sizes will frequently change in between read()'s, which can break naive tar invocations.)
77-#
88-# Usage: ./snapshot.sh sysfs.tar.gz
99-1010-dst="$PWD/$1"
1111-tmp=$(mktemp -d -t sysfs_snapshot_XXXXXXX)
1212-1313-cleanup() {
1414- rm -rf "$tmp"
1515-}
1616-trap cleanup EXIT
1717-1818-set -euxo pipefail
1919-2020-find /sys/devices/*/block /sys/dev/block /sys/class/block -print0 | sort -z | while IFS= read -d $'\0' -r file; do
2121- # create the new file name by stripping the leading
2222- # /sys and mashing it against the temp folder
2323- temp_file="${tmp}/${file#*/sys/}"
2424-2525- # create equivalent symlink
2626- if [ -L "$file" ]; then
2727- cp -d "$file" "$temp_file"
2828- continue
2929- fi
3030-3131- # create necessary directories
3232- if [ -d "$file" ]; then
3333- mkdir -p "$temp_file"
3434- continue
3535- fi
3636-3737- # skip over any files that we lack permissions to read,
3838- # we encounter I/O errors when trying to read, or
3939- # have some other weirdness
4040- if ! wc -l "$file" >/dev/null 2>&1; then
4141- continue
4242- fi
4343-4444- cp "$file" "$temp_file"
4545-done
4646-4747-cd "$tmp"
4848-tar vczf "$dst" .