File: csv-to-json.go

package info (click to toggle)
golang-github-wildducktheories-go-csv 0.0~git20210709.8745000-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 200 kB
  • sloc: makefile: 5
file content (117 lines) | stat: -rw-r--r-- 3,106 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package csv

import (
	"encoding/json"
	"fmt"
	"os"
	"regexp"
	"strings"
)

// Given a stream of CSV records, generate a stream of JSON records, one per line. The headers
// are treated as paths into the resulting JSON object, so a CSV file containing the header
// foo.bar,baz and the data 1, 2 will be converted into a JSON object like {"foo": {"bar": 1}, "baz": 2}
//
// If column values can be successfully unmarshalled as JSON numbers, booleans, objects or arrays then
// the value will be encoded as the corresponding JSON object, otherwise it will be encoded as a string.
// Use --strings to force all column values to be encoded as JSON strings.
//
type CsvToJsonProcess struct {
	BaseObject  string
	StringsOnly bool
}

func (proc *CsvToJsonProcess) writeToMap(m map[string]interface{}, p []string, v interface{}) {
	if len(p) == 1 {
		m[p[0]] = v
	} else if len(p) > 1 {
		var o interface{}
		var mo map[string]interface{}
		var ok bool
		if o, ok = m[p[0]]; !ok {
			mo = map[string]interface{}{}
		} else {
			if mo, ok = o.(map[string]interface{}); !ok {
				mo = map[string]interface{}{}
			}
		}
		m[p[0]] = mo
		proc.writeToMap(mo, p[1:], v)
	}

}

func (p *CsvToJsonProcess) Run(reader Reader, encoder *json.Encoder, errCh chan<- error) {
	errCh <- func() (err error) {
		defer reader.Close()

		baseObject := p.BaseObject
		stringsOnly := p.StringsOnly

		// open the reader
		paths := map[string][]string{}
		for _, k := range reader.Header() {
			paths[k] = strings.Split(k, ".")
		}

		if baseObject != "" {
			if _, ok := paths[baseObject]; !ok {
				return fmt.Errorf("fatal: '%s' is not a valid key in the input stream", baseObject)
			}
		}

		// see: http://stackoverflow.com/questions/13340717/json-numbers-regular-expression
		numberMatcher := regexp.MustCompile("^ *-?(?:0|[1-9]\\d*)(?:\\.\\d+)?(?:[eE][+-]?\\d+)? *$")

		for data := range reader.C() {
			dataMap := data.AsMap()
			objectMap := map[string]interface{}{}

			if baseObject != "" {
				if base, ok := dataMap[baseObject]; ok {
					if err := json.Unmarshal([]byte(base), &objectMap); err != nil {
						fmt.Fprintf(os.Stderr, "warning: failed to parse base object: %s: %s\n", base, err)
					}
				}
			}

			for k, v := range dataMap {
				var f float64
				var ov interface{}

				ov = v

				if baseObject != "" && k == baseObject {
					continue
				} else if v == "" {
					continue
				} else if stringsOnly {
					ov = v
				} else if v == "null" {
					continue
				} else if v == "true" || v == "TRUE" {
					ov = true
				} else if v == "false" || v == "FALSE" {
					ov = false
				} else if v[0] == '{' {
					j := map[string]interface{}{}
					if err := json.Unmarshal([]byte(v), &j); err == nil {
						ov = j
					}
				} else if v[0] == '[' {
					aj := make([]interface{}, 0)
					if err := json.Unmarshal([]byte(v), &aj); err == nil {
						ov = aj
					}
				} else if numberMatcher.MatchString(v) {
					if _, err := fmt.Sscanf(v, "%f", &f); err == nil {
						ov = f
					}
				}
				p.writeToMap(objectMap, paths[k], ov)
			}
			encoder.Encode(objectMap)
		}
		return reader.Error()
	}()
}