File: range_get_file.go

package info (click to toggle)
golang-github-tombuildsstuff-giovanni 0.20.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 15,908 kB
  • sloc: makefile: 3
file content (128 lines) | stat: -rw-r--r-- 3,117 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package files

import (
	"context"
	"fmt"
	"log"
	"math"
	"runtime"
	"sync"

	"github.com/Azure/go-autorest/autorest"
)

// GetFile is a helper method to download a file by chunking it automatically
func (client Client) GetFile(ctx context.Context, accountName, shareName, path, fileName string, parallelism int) (result autorest.Response, outputBytes []byte, err error) {

	// first look up the file and check out how many bytes it is
	file, e := client.GetProperties(ctx, accountName, shareName, path, fileName)
	if err != nil {
		result = file.Response
		err = e
		return
	}

	if file.ContentLength == nil {
		err = fmt.Errorf("Content-Length was nil!")
		return
	}

	length := int64(*file.ContentLength)
	chunkSize := int64(4 * 1024 * 1024) // 4MB

	if chunkSize > length {
		chunkSize = length
	}

	// then split that up into chunks and retrieve it retrieve it into the 'results' set
	chunks := int(math.Ceil(float64(length) / float64(chunkSize)))
	workerCount := parallelism * runtime.NumCPU()
	if workerCount > chunks {
		workerCount = chunks
	}

	var waitGroup sync.WaitGroup
	waitGroup.Add(workerCount)

	results := make([]*downloadFileChunkResult, chunks)
	errors := make(chan error, chunkSize)

	for i := 0; i < chunks; i++ {
		go func(i int) {
			log.Printf("[DEBUG] Downloading Chunk %d of %d", i+1, chunks)

			dfci := downloadFileChunkInput{
				thisChunk: i,
				chunkSize: chunkSize,
				fileSize:  length,
			}

			result, err := client.downloadFileChunk(ctx, accountName, shareName, path, fileName, dfci)
			if err != nil {
				errors <- err
				waitGroup.Done()
				return
			}

			// if there's no error, we should have bytes, so this is safe
			results[i] = result

			waitGroup.Done()
		}(i)
	}
	waitGroup.Wait()

	// TODO: we should switch to hashicorp/multi-error here
	if len(errors) > 0 {
		err = fmt.Errorf("Error downloading file: %s", <-errors)
		return
	}

	// then finally put it all together, in order and return it
	output := make([]byte, length)
	for _, v := range results {
		copy(output[v.startBytes:v.endBytes], v.bytes)
	}

	outputBytes = output
	return
}

type downloadFileChunkInput struct {
	thisChunk int
	chunkSize int64
	fileSize  int64
}

type downloadFileChunkResult struct {
	startBytes int64
	endBytes   int64
	bytes      []byte
}

func (client Client) downloadFileChunk(ctx context.Context, accountName, shareName, path, fileName string, input downloadFileChunkInput) (*downloadFileChunkResult, error) {
	startBytes := input.chunkSize * int64(input.thisChunk)
	endBytes := startBytes + input.chunkSize

	// the last chunk may exceed the size of the file
	remaining := input.fileSize - startBytes
	if input.chunkSize > remaining {
		endBytes = startBytes + remaining
	}

	getInput := GetByteRangeInput{
		StartBytes: startBytes,
		EndBytes:   endBytes,
	}
	result, err := client.GetByteRange(ctx, accountName, shareName, path, fileName, getInput)
	if err != nil {
		return nil, fmt.Errorf("Error putting bytes: %s", err)
	}

	output := downloadFileChunkResult{
		startBytes: startBytes,
		endBytes:   endBytes,
		bytes:      result.Contents,
	}
	return &output, nil
}