475 lines
15 KiB
Go
475 lines
15 KiB
Go
|
// Copyright 2017 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// Package cache implements a build artifact cache.
|
||
|
//
|
||
|
// This package is a slightly modified fork of Go's
|
||
|
// cmd/go/internal/cache package.
|
||
|
package cache
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"crypto/sha256"
|
||
|
"encoding/hex"
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"io/ioutil"
|
||
|
"os"
|
||
|
"path/filepath"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
"time"
|
||
|
|
||
|
"honnef.co/go/tools/internal/renameio"
|
||
|
)
|
||
|
|
||
|
// An ActionID is a cache action key, the hash of a complete description of a
|
||
|
// repeatable computation (command line, environment variables,
|
||
|
// input file contents, executable contents).
|
||
|
type ActionID [HashSize]byte
|
||
|
|
||
|
// An OutputID is a cache output key, the hash of an output of a computation.
|
||
|
type OutputID [HashSize]byte
|
||
|
|
||
|
// A Cache is a package cache, backed by a file system directory tree.
|
||
|
type Cache struct {
|
||
|
dir string
|
||
|
now func() time.Time
|
||
|
}
|
||
|
|
||
|
// Open opens and returns the cache in the given directory.
|
||
|
//
|
||
|
// It is safe for multiple processes on a single machine to use the
|
||
|
// same cache directory in a local file system simultaneously.
|
||
|
// They will coordinate using operating system file locks and may
|
||
|
// duplicate effort but will not corrupt the cache.
|
||
|
//
|
||
|
// However, it is NOT safe for multiple processes on different machines
|
||
|
// to share a cache directory (for example, if the directory were stored
|
||
|
// in a network file system). File locking is notoriously unreliable in
|
||
|
// network file systems and may not suffice to protect the cache.
|
||
|
//
|
||
|
func Open(dir string) (*Cache, error) {
|
||
|
info, err := os.Stat(dir)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
if !info.IsDir() {
|
||
|
return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
|
||
|
}
|
||
|
for i := 0; i < 256; i++ {
|
||
|
name := filepath.Join(dir, fmt.Sprintf("%02x", i))
|
||
|
if err := os.MkdirAll(name, 0777); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
c := &Cache{
|
||
|
dir: dir,
|
||
|
now: time.Now,
|
||
|
}
|
||
|
return c, nil
|
||
|
}
|
||
|
|
||
|
// fileName returns the name of the file corresponding to the given id.
|
||
|
func (c *Cache) fileName(id [HashSize]byte, key string) string {
|
||
|
return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
|
||
|
}
|
||
|
|
||
|
var errMissing = errors.New("cache entry not found")
|
||
|
|
||
|
const (
|
||
|
// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
|
||
|
hexSize = HashSize * 2
|
||
|
entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
|
||
|
)
|
||
|
|
||
|
// verify controls whether to run the cache in verify mode.
|
||
|
// In verify mode, the cache always returns errMissing from Get
|
||
|
// but then double-checks in Put that the data being written
|
||
|
// exactly matches any existing entry. This provides an easy
|
||
|
// way to detect program behavior that would have been different
|
||
|
// had the cache entry been returned from Get.
|
||
|
//
|
||
|
// verify is enabled by setting the environment variable
|
||
|
// GODEBUG=gocacheverify=1.
|
||
|
var verify = false
|
||
|
|
||
|
// DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
|
||
|
var DebugTest = false
|
||
|
|
||
|
func init() { initEnv() }
|
||
|
|
||
|
func initEnv() {
|
||
|
verify = false
|
||
|
debugHash = false
|
||
|
debug := strings.Split(os.Getenv("GODEBUG"), ",")
|
||
|
for _, f := range debug {
|
||
|
if f == "gocacheverify=1" {
|
||
|
verify = true
|
||
|
}
|
||
|
if f == "gocachehash=1" {
|
||
|
debugHash = true
|
||
|
}
|
||
|
if f == "gocachetest=1" {
|
||
|
DebugTest = true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Get looks up the action ID in the cache,
|
||
|
// returning the corresponding output ID and file size, if any.
|
||
|
// Note that finding an output ID does not guarantee that the
|
||
|
// saved file for that output ID is still available.
|
||
|
func (c *Cache) Get(id ActionID) (Entry, error) {
|
||
|
if verify {
|
||
|
return Entry{}, errMissing
|
||
|
}
|
||
|
return c.get(id)
|
||
|
}
|
||
|
|
||
|
type Entry struct {
|
||
|
OutputID OutputID
|
||
|
Size int64
|
||
|
Time time.Time
|
||
|
}
|
||
|
|
||
|
// get is Get but does not respect verify mode, so that Put can use it.
|
||
|
func (c *Cache) get(id ActionID) (Entry, error) {
|
||
|
missing := func() (Entry, error) {
|
||
|
return Entry{}, errMissing
|
||
|
}
|
||
|
f, err := os.Open(c.fileName(id, "a"))
|
||
|
if err != nil {
|
||
|
return missing()
|
||
|
}
|
||
|
defer f.Close()
|
||
|
entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
|
||
|
if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
|
||
|
return missing()
|
||
|
}
|
||
|
if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
|
||
|
return missing()
|
||
|
}
|
||
|
eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
|
||
|
eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
|
||
|
esize, entry := entry[1:1+20], entry[1+20:]
|
||
|
//lint:ignore SA4006 See https://github.com/dominikh/go-tools/issues/465
|
||
|
etime, entry := entry[1:1+20], entry[1+20:]
|
||
|
var buf [HashSize]byte
|
||
|
if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
|
||
|
return missing()
|
||
|
}
|
||
|
if _, err := hex.Decode(buf[:], eout); err != nil {
|
||
|
return missing()
|
||
|
}
|
||
|
i := 0
|
||
|
for i < len(esize) && esize[i] == ' ' {
|
||
|
i++
|
||
|
}
|
||
|
size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
|
||
|
if err != nil || size < 0 {
|
||
|
return missing()
|
||
|
}
|
||
|
i = 0
|
||
|
for i < len(etime) && etime[i] == ' ' {
|
||
|
i++
|
||
|
}
|
||
|
tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
|
||
|
if err != nil || size < 0 {
|
||
|
return missing()
|
||
|
}
|
||
|
|
||
|
c.used(c.fileName(id, "a"))
|
||
|
|
||
|
return Entry{buf, size, time.Unix(0, tm)}, nil
|
||
|
}
|
||
|
|
||
|
// GetFile looks up the action ID in the cache and returns
|
||
|
// the name of the corresponding data file.
|
||
|
func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
|
||
|
entry, err = c.Get(id)
|
||
|
if err != nil {
|
||
|
return "", Entry{}, err
|
||
|
}
|
||
|
file = c.OutputFile(entry.OutputID)
|
||
|
info, err := os.Stat(file)
|
||
|
if err != nil || info.Size() != entry.Size {
|
||
|
return "", Entry{}, errMissing
|
||
|
}
|
||
|
return file, entry, nil
|
||
|
}
|
||
|
|
||
|
// GetBytes looks up the action ID in the cache and returns
|
||
|
// the corresponding output bytes.
|
||
|
// GetBytes should only be used for data that can be expected to fit in memory.
|
||
|
func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
|
||
|
entry, err := c.Get(id)
|
||
|
if err != nil {
|
||
|
return nil, entry, err
|
||
|
}
|
||
|
data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
|
||
|
if sha256.Sum256(data) != entry.OutputID {
|
||
|
return nil, entry, errMissing
|
||
|
}
|
||
|
return data, entry, nil
|
||
|
}
|
||
|
|
||
|
// OutputFile returns the name of the cache file storing output with the given OutputID.
|
||
|
func (c *Cache) OutputFile(out OutputID) string {
|
||
|
file := c.fileName(out, "d")
|
||
|
c.used(file)
|
||
|
return file
|
||
|
}
|
||
|
|
||
|
// Time constants for cache expiration.
|
||
|
//
|
||
|
// We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
|
||
|
// to avoid causing many unnecessary inode updates. The mtimes therefore
|
||
|
// roughly reflect "time of last use" but may in fact be older by at most an hour.
|
||
|
//
|
||
|
// We scan the cache for entries to delete at most once per trimInterval (1 day).
|
||
|
//
|
||
|
// When we do scan the cache, we delete entries that have not been used for
|
||
|
// at least trimLimit (5 days). Statistics gathered from a month of usage by
|
||
|
// Go developers found that essentially all reuse of cached entries happened
|
||
|
// within 5 days of the previous reuse. See golang.org/issue/22990.
|
||
|
const (
|
||
|
mtimeInterval = 1 * time.Hour
|
||
|
trimInterval = 24 * time.Hour
|
||
|
trimLimit = 5 * 24 * time.Hour
|
||
|
)
|
||
|
|
||
|
// used makes a best-effort attempt to update mtime on file,
|
||
|
// so that mtime reflects cache access time.
|
||
|
//
|
||
|
// Because the reflection only needs to be approximate,
|
||
|
// and to reduce the amount of disk activity caused by using
|
||
|
// cache entries, used only updates the mtime if the current
|
||
|
// mtime is more than an hour old. This heuristic eliminates
|
||
|
// nearly all of the mtime updates that would otherwise happen,
|
||
|
// while still keeping the mtimes useful for cache trimming.
|
||
|
func (c *Cache) used(file string) {
|
||
|
info, err := os.Stat(file)
|
||
|
if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
|
||
|
return
|
||
|
}
|
||
|
os.Chtimes(file, c.now(), c.now())
|
||
|
}
|
||
|
|
||
|
// Trim removes old cache entries that are likely not to be reused.
|
||
|
func (c *Cache) Trim() {
|
||
|
now := c.now()
|
||
|
|
||
|
// We maintain in dir/trim.txt the time of the last completed cache trim.
|
||
|
// If the cache has been trimmed recently enough, do nothing.
|
||
|
// This is the common case.
|
||
|
data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt"))
|
||
|
t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
|
||
|
if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Trim each of the 256 subdirectories.
|
||
|
// We subtract an additional mtimeInterval
|
||
|
// to account for the imprecision of our "last used" mtimes.
|
||
|
cutoff := now.Add(-trimLimit - mtimeInterval)
|
||
|
for i := 0; i < 256; i++ {
|
||
|
subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
|
||
|
c.trimSubdir(subdir, cutoff)
|
||
|
}
|
||
|
|
||
|
// Ignore errors from here: if we don't write the complete timestamp, the
|
||
|
// cache will appear older than it is, and we'll trim it again next time.
|
||
|
renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())))
|
||
|
}
|
||
|
|
||
|
// trimSubdir trims a single cache subdirectory.
|
||
|
func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
|
||
|
// Read all directory entries from subdir before removing
|
||
|
// any files, in case removing files invalidates the file offset
|
||
|
// in the directory scan. Also, ignore error from f.Readdirnames,
|
||
|
// because we don't care about reporting the error and we still
|
||
|
// want to process any entries found before the error.
|
||
|
f, err := os.Open(subdir)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
names, _ := f.Readdirnames(-1)
|
||
|
f.Close()
|
||
|
|
||
|
for _, name := range names {
|
||
|
// Remove only cache entries (xxxx-a and xxxx-d).
|
||
|
if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
|
||
|
continue
|
||
|
}
|
||
|
entry := filepath.Join(subdir, name)
|
||
|
info, err := os.Stat(entry)
|
||
|
if err == nil && info.ModTime().Before(cutoff) {
|
||
|
os.Remove(entry)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// putIndexEntry adds an entry to the cache recording that executing the action
|
||
|
// with the given id produces an output with the given output id (hash) and size.
|
||
|
func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
|
||
|
// Note: We expect that for one reason or another it may happen
|
||
|
// that repeating an action produces a different output hash
|
||
|
// (for example, if the output contains a time stamp or temp dir name).
|
||
|
// While not ideal, this is also not a correctness problem, so we
|
||
|
// don't make a big deal about it. In particular, we leave the action
|
||
|
// cache entries writable specifically so that they can be overwritten.
|
||
|
//
|
||
|
// Setting GODEBUG=gocacheverify=1 does make a big deal:
|
||
|
// in verify mode we are double-checking that the cache entries
|
||
|
// are entirely reproducible. As just noted, this may be unrealistic
|
||
|
// in some cases but the check is also useful for shaking out real bugs.
|
||
|
entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()))
|
||
|
if verify && allowVerify {
|
||
|
old, err := c.get(id)
|
||
|
if err == nil && (old.OutputID != out || old.Size != size) {
|
||
|
// panic to show stack trace, so we can see what code is generating this cache entry.
|
||
|
msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
|
||
|
panic(msg)
|
||
|
}
|
||
|
}
|
||
|
file := c.fileName(id, "a")
|
||
|
if err := ioutil.WriteFile(file, entry, 0666); err != nil {
|
||
|
// TODO(bcmills): This Remove potentially races with another go command writing to file.
|
||
|
// Can we eliminate it?
|
||
|
os.Remove(file)
|
||
|
return err
|
||
|
}
|
||
|
os.Chtimes(file, c.now(), c.now()) // mainly for tests
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// Put stores the given output in the cache as the output for the action ID.
|
||
|
// It may read file twice. The content of file must not change between the two passes.
|
||
|
func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
|
||
|
return c.put(id, file, true)
|
||
|
}
|
||
|
|
||
|
// PutNoVerify is like Put but disables the verify check
|
||
|
// when GODEBUG=goverifycache=1 is set.
|
||
|
// It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
|
||
|
// like test output containing times and the like.
|
||
|
func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
|
||
|
return c.put(id, file, false)
|
||
|
}
|
||
|
|
||
|
func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
|
||
|
// Compute output ID.
|
||
|
h := sha256.New()
|
||
|
if _, err := file.Seek(0, 0); err != nil {
|
||
|
return OutputID{}, 0, err
|
||
|
}
|
||
|
size, err := io.Copy(h, file)
|
||
|
if err != nil {
|
||
|
return OutputID{}, 0, err
|
||
|
}
|
||
|
var out OutputID
|
||
|
h.Sum(out[:0])
|
||
|
|
||
|
// Copy to cached output file (if not already present).
|
||
|
if err := c.copyFile(file, out, size); err != nil {
|
||
|
return out, size, err
|
||
|
}
|
||
|
|
||
|
// Add to cache index.
|
||
|
return out, size, c.putIndexEntry(id, out, size, allowVerify)
|
||
|
}
|
||
|
|
||
|
// PutBytes stores the given bytes in the cache as the output for the action ID.
|
||
|
func (c *Cache) PutBytes(id ActionID, data []byte) error {
|
||
|
_, _, err := c.Put(id, bytes.NewReader(data))
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
// copyFile copies file into the cache, expecting it to have the given
|
||
|
// output ID and size, if that file is not present already.
|
||
|
func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
|
||
|
name := c.fileName(out, "d")
|
||
|
info, err := os.Stat(name)
|
||
|
if err == nil && info.Size() == size {
|
||
|
// Check hash.
|
||
|
if f, err := os.Open(name); err == nil {
|
||
|
h := sha256.New()
|
||
|
io.Copy(h, f)
|
||
|
f.Close()
|
||
|
var out2 OutputID
|
||
|
h.Sum(out2[:0])
|
||
|
if out == out2 {
|
||
|
return nil
|
||
|
}
|
||
|
}
|
||
|
// Hash did not match. Fall through and rewrite file.
|
||
|
}
|
||
|
|
||
|
// Copy file to cache directory.
|
||
|
mode := os.O_RDWR | os.O_CREATE
|
||
|
if err == nil && info.Size() > size { // shouldn't happen but fix in case
|
||
|
mode |= os.O_TRUNC
|
||
|
}
|
||
|
f, err := os.OpenFile(name, mode, 0666)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
defer f.Close()
|
||
|
if size == 0 {
|
||
|
// File now exists with correct size.
|
||
|
// Only one possible zero-length file, so contents are OK too.
|
||
|
// Early return here makes sure there's a "last byte" for code below.
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// From here on, if any of the I/O writing the file fails,
|
||
|
// we make a best-effort attempt to truncate the file f
|
||
|
// before returning, to avoid leaving bad bytes in the file.
|
||
|
|
||
|
// Copy file to f, but also into h to double-check hash.
|
||
|
if _, err := file.Seek(0, 0); err != nil {
|
||
|
f.Truncate(0)
|
||
|
return err
|
||
|
}
|
||
|
h := sha256.New()
|
||
|
w := io.MultiWriter(f, h)
|
||
|
if _, err := io.CopyN(w, file, size-1); err != nil {
|
||
|
f.Truncate(0)
|
||
|
return err
|
||
|
}
|
||
|
// Check last byte before writing it; writing it will make the size match
|
||
|
// what other processes expect to find and might cause them to start
|
||
|
// using the file.
|
||
|
buf := make([]byte, 1)
|
||
|
if _, err := file.Read(buf); err != nil {
|
||
|
f.Truncate(0)
|
||
|
return err
|
||
|
}
|
||
|
h.Write(buf)
|
||
|
sum := h.Sum(nil)
|
||
|
if !bytes.Equal(sum, out[:]) {
|
||
|
f.Truncate(0)
|
||
|
return fmt.Errorf("file content changed underfoot")
|
||
|
}
|
||
|
|
||
|
// Commit cache file entry.
|
||
|
if _, err := f.Write(buf); err != nil {
|
||
|
f.Truncate(0)
|
||
|
return err
|
||
|
}
|
||
|
if err := f.Close(); err != nil {
|
||
|
// Data might not have been written,
|
||
|
// but file may look like it is the right size.
|
||
|
// To be extra careful, remove cached file.
|
||
|
os.Remove(name)
|
||
|
return err
|
||
|
}
|
||
|
os.Chtimes(name, c.now(), c.now()) // mainly for tests
|
||
|
|
||
|
return nil
|
||
|
}
|