File: text_csv.go

package info (click to toggle)
golang-github-gabriel-vasile-mimetype 1.4.1%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bookworm-backports
  • size: 9,720 kB
  • sloc: javascript: 3; makefile: 3; tcl: 1; php: 1; python: 1; perl: 1
file content (51 lines) | stat: -rw-r--r-- 1,167 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
package magic

import (
	"bytes"
	"encoding/csv"
	"io"
)

// Csv matches a comma-separated values file.
func Csv(raw []byte, limit uint32) bool {
	return sv(raw, ',', limit)
}

// Tsv matches a tab-separated values file.
func Tsv(raw []byte, limit uint32) bool {
	return sv(raw, '\t', limit)
}

func sv(in []byte, comma rune, limit uint32) bool {
	r := csv.NewReader(dropLastLine(in, limit))
	r.Comma = comma
	r.TrimLeadingSpace = true
	r.LazyQuotes = true
	r.Comment = '#'

	lines, err := r.ReadAll()
	return err == nil && r.FieldsPerRecord > 1 && len(lines) > 1
}

// dropLastLine drops the last incomplete line from b.
//
// mimetype limits itself to ReadLimit bytes when performing a detection.
// This means, for file formats like CSV for NDJSON, the last line of the input
// can be an incomplete line.
func dropLastLine(b []byte, cutAt uint32) io.Reader {
	if cutAt == 0 {
		return bytes.NewReader(b)
	}
	if uint32(len(b)) >= cutAt {
		for i := cutAt - 1; i > 0; i-- {
			if b[i] == '\n' {
				return bytes.NewReader(b[:i])
			}
		}

		// No newline was found between the 0 index and cutAt.
		return bytes.NewReader(b[:cutAt])
	}

	return bytes.NewReader(b)
}