File: dedup_linux.go

package info (click to toggle)
golang-github-containers-storage 1.59.1%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 4,184 kB
  • sloc: sh: 630; ansic: 389; makefile: 143; awk: 12
file content (139 lines) | stat: -rw-r--r-- 3,103 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
package dedup

import (
	"errors"
	"fmt"
	"io"
	"io/fs"
	"os"
	"sync"
	"syscall"

	"golang.org/x/sys/unix"
)

type deviceInodePair struct {
	dev uint64
	ino uint64
}

type dedupFiles struct {
	lock          sync.Mutex
	visitedInodes map[deviceInodePair]struct{}
}

func newDedupFiles() (*dedupFiles, error) {
	return &dedupFiles{
		visitedInodes: make(map[deviceInodePair]struct{}),
	}, nil
}

func (d *dedupFiles) recordInode(dev, ino uint64) (bool, error) {
	d.lock.Lock()
	defer d.lock.Unlock()

	di := deviceInodePair{
		dev: dev,
		ino: ino,
	}

	_, visited := d.visitedInodes[di]
	d.visitedInodes[di] = struct{}{}
	return visited, nil
}

// isFirstVisitOf records that the file is being processed.  Returns true if the file was already visited.
func (d *dedupFiles) isFirstVisitOf(fi fs.FileInfo) (bool, error) {
	st, ok := fi.Sys().(*syscall.Stat_t)
	if !ok {
		return false, fmt.Errorf("unable to get raw syscall.Stat_t data")
	}
	return d.recordInode(uint64(st.Dev), st.Ino) //nolint:unconvert
}

// dedup deduplicates the file at src path to dst path
func (d *dedupFiles) dedup(src, dst string, fiDst fs.FileInfo) (uint64, error) {
	srcFile, err := os.OpenFile(src, os.O_RDONLY, 0)
	if err != nil {
		return 0, fmt.Errorf("failed to open source file: %w", err)
	}
	defer srcFile.Close()

	dstFile, err := os.OpenFile(dst, os.O_WRONLY, 0)
	if err != nil {
		return 0, fmt.Errorf("failed to open destination file: %w", err)
	}
	defer dstFile.Close()

	stSrc, err := srcFile.Stat()
	if err != nil {
		return 0, fmt.Errorf("failed to stat source file: %w", err)
	}
	sSrc, ok := stSrc.Sys().(*syscall.Stat_t)
	if !ok {
		return 0, fmt.Errorf("unable to get raw syscall.Stat_t data")
	}
	sDest, ok := fiDst.Sys().(*syscall.Stat_t)
	if !ok {
		return 0, fmt.Errorf("unable to get raw syscall.Stat_t data")
	}
	if sSrc.Dev == sDest.Dev && sSrc.Ino == sDest.Ino {
		// same inode, we are dealing with a hard link, no need to deduplicate
		return 0, nil
	}

	value := unix.FileDedupeRange{
		Src_offset: 0,
		Src_length: uint64(stSrc.Size()),
		Info: []unix.FileDedupeRangeInfo{
			{
				Dest_fd:     int64(dstFile.Fd()),
				Dest_offset: 0,
			},
		},
	}
	err = unix.IoctlFileDedupeRange(int(srcFile.Fd()), &value)
	if err == nil {
		return value.Info[0].Bytes_deduped, nil
	}

	if errors.Is(err, unix.ENOTSUP) {
		return 0, errNotSupported
	}
	return 0, fmt.Errorf("failed to clone file %q: %w", src, err)
}

func readAllFile(path string, info fs.FileInfo, fn func([]byte) (string, error)) (string, error) {
	size := info.Size()
	if size == 0 {
		return fn(nil)
	}

	file, err := os.Open(path)
	if err != nil {
		return "", err
	}
	defer file.Close()

	if size < 4096 {
		// small file, read it all
		data := make([]byte, size)
		_, err = io.ReadFull(file, data)
		if err != nil {
			return "", err
		}
		return fn(data)
	}

	mmap, err := unix.Mmap(int(file.Fd()), 0, int(size), unix.PROT_READ, unix.MAP_PRIVATE)
	if err != nil {
		return "", fmt.Errorf("failed to mmap file: %w", err)
	}
	defer func() {
		_ = unix.Munmap(mmap)
	}()

	_ = unix.Madvise(mmap, unix.MADV_SEQUENTIAL)

	return fn(mmap)
}