File: fileinfo_parser.go

package info (click to toggle)
golang-github-viant-toolbox 0.33.2-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,280 kB
  • sloc: makefile: 16
file content (218 lines) | stat: -rw-r--r-- 5,989 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
package scp

import (
	"fmt"
	"github.com/lunixbochs/vtclean"
	"github.com/viant/toolbox"
	"github.com/viant/toolbox/storage"
	"net/url"
	"strings"
	"time"
	"unicode"
)

const (
	fileInfoPermission = iota
	_
	fileInfoOwner
	fileInfoGroup
	fileInfoSize
	fileInfoDateMonth
	fileInfoDateDay
	fileInfoDateHour
	fileInfoDateYear
	fileInfoName
)

const (
	fileIsoInfoPermission = iota
	_
	fileIsoInfoOwner
	fileIsoInfoGroup
	fileIsoInfoSize
	fileIsoDate
	fileIsoTime
	fileIsoTimezone
	fileIsoInfoName
)

//Parser represents fileinfo parser from stdout
type Parser struct {
	IsoTimeStyle bool
}

func (p *Parser) Parse(parsedURL *url.URL, stdout string, isURLDir bool) ([]storage.Object, error) {
	var err error
	var result = make([]storage.Object, 0)
	if strings.Contains(stdout, "No such file or directory") {
		return result, nil
	}
	for _, line := range strings.Split(stdout, "\n") {
		if line == "" {
			continue
		}
		var object storage.Object
		if p.IsoTimeStyle {
			if object, err = p.extractObjectFromIsoBasedTimeCommand(parsedURL, line, isURLDir); err != nil {
				object, err = p.extractObjectFromNonIsoBaseTimeCommand(parsedURL, line, isURLDir)
			}
		} else {
			if object, err = p.extractObjectFromNonIsoBaseTimeCommand(parsedURL, line, isURLDir); err != nil {
				object, err = p.extractObjectFromIsoBasedTimeCommand(parsedURL, line, isURLDir)
			}
		}
		if err != nil {
			return nil, err
		}
		result = append(result, object)
	}
	return result, nil
}

func (p *Parser) HasNextTokenInout(nextTokenPosition int, line string) bool {
	if nextTokenPosition >= len(line) {
		return false
	}
	nextToken := []rune(string(line[nextTokenPosition:]))[0]
	return !unicode.IsSpace(nextToken)
}

func (p *Parser) newObject(parsedURL *url.URL, name, permission, line, size string, modificationTime time.Time, isURLDirectory bool) (storage.Object, error) {
	var URLPath = parsedURL.Path
	var URL = parsedURL.String()
	var pathPosition = strings.Index(URL, parsedURL.Host) + len(parsedURL.Host)
	var URLPrefix = URL[:pathPosition]

	fileMode, err := storage.NewFileMode(permission)
	if err != nil {
		return nil, fmt.Errorf("failed to parse line for lineinfo: %v, unable to file attributes: %v", line, err)
	}
	if isURLDirectory {
		name = strings.Replace(name, URLPath, "", 1)
		URLPath = toolbox.URLPathJoin(URLPath, name)
	} else {
		URLPath = name
	}

	var objectURL = URLPrefix + URLPath
	fileInfo := storage.NewFileInfo(name, int64(toolbox.AsInt(size)), fileMode, modificationTime, fileMode.IsDir())
	object := newStorageObject(objectURL, fileInfo, fileInfo)
	return object, nil
}

//extractObjectFromNonIsoBaseTimeCommand extract file storage object from line,
// it expects a file info without iso i.e  -rw-r--r--  1 awitas  1742120565   414 Jun  8 14:14:08 2017 id_rsa.pub
func (p *Parser) extractObjectFromNonIsoBaseTimeCommand(parsedURL *url.URL, line string, isURLDirectory bool) (storage.Object, error) {
	tokenIndex := 0
	if strings.TrimSpace(line) == "" {
		return nil, nil
	}
	var owner, name, permission, group, size, year, month, day, hour string
	for i, aRune := range line {
		if unicode.IsSpace(aRune) {
			if p.HasNextTokenInout(i+1, line) {
				tokenIndex++
			}
			continue
		}

		aChar := string(aRune)
		switch tokenIndex {
		case fileInfoPermission:
			permission += aChar
		case fileInfoOwner:
			owner += aChar
		case fileInfoGroup:
			group += aChar
		case fileInfoSize:
			if size == "" && !unicode.IsNumber(aRune) {
				tokenIndex--
				group += " " + aChar
				continue
			}
			size += aChar
		case fileInfoDateMonth:
			month += aChar
		case fileInfoDateDay:
			day += aChar
		case fileInfoDateHour:
			hour += aChar
		case fileInfoDateYear:
			year += aChar
		case fileInfoName:
			name += aChar
		}
	}

	if name == "" {
		return nil, fmt.Errorf("failed to parse line for fileinfo: %v\n", line)
	}
	dateTime := year + " " + month + " " + day + " " + hour
	layout := toolbox.DateFormatToLayout("yyyy MMM ddd HH:mm:s")
	modificationTime, err := time.Parse(layout, dateTime)
	if err != nil {
		return nil, fmt.Errorf("failed to extract file info from stdout: %v, err: %v", line, err)
	}

	return p.newObject(parsedURL, name, permission, line, size, modificationTime, isURLDirectory)
}

//extractObjectFromNonIsoBaseTimeCommand extract file storage object from line,
// it expects a file info with iso i.e. -rw-r--r-- 1 awitas awitas 2002 2017-11-04 22:29:33.363458941 +0000 aerospikeciads_aerospike.conf
func (p *Parser) extractObjectFromIsoBasedTimeCommand(parsedURL *url.URL, line string, isURLDirectory bool) (storage.Object, error) {
	tokenIndex := 0
	if strings.TrimSpace(line) == "" {
		return nil, nil
	}
	var owner, name, permission, group, timezone, date, modTime, size string
	line = vtclean.Clean(line, false)
	for i, aRune := range line {

		if unicode.IsSpace(aRune) {
			if p.HasNextTokenInout(i+1, line) {
				tokenIndex++
			}
			continue
		}

		aChar := string(aRune)
		switch tokenIndex {
		case fileIsoInfoPermission:
			permission += aChar
		case fileIsoInfoOwner:
			owner += aChar
		case fileIsoInfoGroup:
			group += aChar
		case fileIsoInfoSize:
			if size == "" && !unicode.IsNumber(aRune) {
				tokenIndex--
				group += " " + aChar
				continue
			}
			size += aChar
		case fileIsoDate:
			date += aChar
		case fileIsoTime:
			modTime += aChar
		case fileIsoTimezone:
			timezone += aChar
		case fileIsoInfoName:
			name += aChar
		}
		continue
	}
	timeLen := len(modTime)
	if timeLen > 12 {
		modTime = string(modTime[:12])
	}
	dateTime := date + " " + modTime + " " + timezone
	layout := toolbox.DateFormatToLayout("yyyy-MM-dd HH:mm:ss.SSS ZZ")
	if len(date+" "+modTime) <= len("yyyy-MM-dd HH:mm:ss") {
		layout = toolbox.DateFormatToLayout("yyyy-MM-dd HH:mm:ss ZZ")
	}
	modificationTime, err := time.Parse(layout, dateTime)
	if err != nil {
		return nil, fmt.Errorf("failed to extract file info from stdout: %v, err: %v", line, err)
	}
	return p.newObject(parsedURL, name, permission, line, size, modificationTime, isURLDirectory)
}