File: option_types.go

package info (click to toggle)
miller 6.16.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 87,928 kB
  • sloc: ruby: 162; sh: 119; makefile: 87
file content (262 lines) | stat: -rw-r--r-- 6,862 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
// ================================================================
// Items which might better belong in miller/cli, but which are placed in a
// deeper package to avoid a package-dependency cycle between miller/cli and
// miller/transforming.
// ================================================================

package cli

import (
	"regexp"

	"github.com/johnkerl/miller/v6/pkg/lib"
)

type TCommentHandling int

const (
	CommentsAreData TCommentHandling = iota
	SkipComments
	PassComments
)
const DEFAULT_COMMENT_STRING = "#"

const DEFAULT_GEN_FIELD_NAME = "i"
const DEFAULT_GEN_START_AS_STRING = "1"
const DEFAULT_GEN_STEP_AS_STRING = "1"
const DEFAULT_GEN_STOP_AS_STRING = "100"

const DEFAULT_RECORDS_PER_BATCH = 500

type TGeneratorOptions struct {
	FieldName     string
	StartAsString string
	StepAsString  string
	StopAsString  string
}

type TReaderOptions struct {
	InputFileFormat  string
	IFS              string
	IPS              string
	IRS              string
	AllowRepeatIFS   bool
	IFSRegex         *regexp.Regexp
	IPSRegex         *regexp.Regexp
	DedupeFieldNames bool

	// If unspecified on the command line, these take input-format-dependent
	// defaults.  E.g. default FS is comma for DKVP but space for NIDX;
	// default AllowRepeatIFS is false for CSV but true for PPRINT.
	ifsWasSpecified            bool
	ipsWasSpecified            bool
	irsWasSpecified            bool
	allowRepeatIFSWasSpecified bool

	UseImplicitHeader    bool
	AllowRaggedCSVInput  bool
	CSVLazyQuotes        bool
	CSVTrimLeadingSpace  bool
	BarredPprintInput    bool
	IncrementImplicitKey bool

	CommentHandling TCommentHandling
	CommentString   string

	// Fake internal-data-generator 'reader'
	GeneratorOptions TGeneratorOptions

	// For out-of-process handling of compressed data, via popen
	Prepipe string
	// For most things like gunzip we do 'gunzip < filename | mlr ...' if
	// filename is present, else 'gunzip | mlr ...' if reading from stdin.
	// However some commands like 'unzip -qc' are weird so this option lets
	// people give the command and we won't insert the '<'.
	PrepipeIsRaw bool
	// For in-process gunzip/bunzip2/zcat (distinct from prepipe)
	FileInputEncoding lib.TFileInputEncoding

	// TODO: comment
	RecordsPerBatch int64
}

// ----------------------------------------------------------------
type TWriterOptions struct {
	OutputFileFormat string
	ORS              string
	OFS              string
	OPS              string
	FLATSEP          string

	FlushOnEveryRecord             bool
	flushOnEveryRecordWasSpecified bool

	// If unspecified on the command line, these take input-format-dependent
	// defaults.  E.g. default FS is comma for DKVP but space for NIDX.
	ofsWasSpecified bool
	opsWasSpecified bool
	orsWasSpecified bool

	HeaderlessOutput         bool
	BarredPprintOutput       bool
	RightAlignedPPRINTOutput bool
	RightAlignedXTABOutput   bool

	// JSON output: --jlistwrap on, --jvstack on
	// JSON Lines output: --jlistwrap off, --jvstack off
	WrapJSONOutputInOuterList bool // --jlistwrap
	JSONOutputMultiline       bool // --jvstack
	JVQuoteAll                bool // --jvquoteall
	// Not using miller/types enum to avoid package cycle

	CSVQuoteAll bool // --quote-all

	// When we read things like
	//
	//   x:a=1,x:b=2
	//
	// which is how we write out nested data structures for non-nested formats
	// (all but JSON), the default behavior is to unflatten them back to
	//
	//   {"x": {"a": 1}, {"b": 2}}
	//
	// unless the user explicitly asks to suppress that.
	AutoUnflatten bool

	// The default behavior is to flatten nested data structures like
	//
	//   {"x": {"a": 1}, {"b": 2}}
	//
	// down to
	//
	//   x:a=1,x:b=2
	//
	// which is how we write out nested data structures for non-nested formats
	// (all but JSON) -- unless the user explicitly asks to suppress that.
	AutoFlatten bool

	// Default CSV/TSV:
	//   a=1,b=2,c=3
	//   a=4,b=5
	// leads to
	//   a,b,c
	//   1 2,3
	//   4,5, <-- note trailing empty field
	// and
	//   a=1,b=2,c=3
	//   d=4,e=5
	// leads to
	//   fatal error
	//
	// With this flag:
	//   a=1,b=2,c=3
	//   a=4,b=5
	// leads to
	//   a,b,c
	//   1 2,3
	//
	//   a,b
	//   4,5
	//
	// and
	//   a=1,b=2,c=3
	//   d=4,e=5
	// leads to
	//   a,b,c
	//   1,2,3
	//
	//   d,e
	//   4,5
	NoAutoUnsparsify bool

	// For floating-point numbers: "" means use the Go default.
	FPOFMT string

	// Fatal the process when error data in a given record is about to be output.
	FailOnDataError bool
}

// ----------------------------------------------------------------
type TOptions struct {
	ReaderOptions TReaderOptions
	WriterOptions TWriterOptions

	// Data files to be operated on: e.g. given 'mlr cat foo.dat bar.dat', this
	// is ["foo.dat", "bar.dat"].
	FileNames []string

	// DSL files to be loaded for every put/filter operation -- like 'put -f'
	// or 'filter -f' but specified up front on the command line, suitable for
	// .mlrrc. Use-case is someone has DSL functions they always want to be
	// defined.
	//
	// Risk of CVE if this is in .mlrrc so --load and --mload are explicitly
	// denied in the .mlrrc reader.
	DSLPreloadFileNames []string

	NRProgressMod int64
	DoInPlace     bool // mlr -I
	NoInput       bool // mlr -n

	HaveRandSeed bool
	RandSeed     int64

	PrintElapsedTime bool // mlr --time
}

// Not usable until FinalizeReaderOptions and FinalizeWriterOptions are called.
func DefaultOptions() *TOptions {
	return &TOptions{
		ReaderOptions: DefaultReaderOptions(),
		WriterOptions: DefaultWriterOptions(),

		FileNames:           make([]string, 0),
		DSLPreloadFileNames: make([]string, 0),
		NoInput:             false,
	}
}

// Not usable until FinalizeReaderOptions is called on it.
func DefaultReaderOptions() TReaderOptions {
	return TReaderOptions{
		InputFileFormat: "dkvp", // TODO: constify at top, or maybe formats.DKVP in package
		// FinalizeReaderOptions will compute IFSRegex and IPSRegex.
		IRS:               "\n",
		IFS:               ",",
		IPS:               "=",
		CommentHandling:   CommentsAreData,
		FileInputEncoding: lib.FileInputEncodingDefault,
		GeneratorOptions: TGeneratorOptions{
			FieldName:     DEFAULT_GEN_FIELD_NAME,
			StartAsString: DEFAULT_GEN_START_AS_STRING,
			StepAsString:  DEFAULT_GEN_STEP_AS_STRING,
			StopAsString:  DEFAULT_GEN_STOP_AS_STRING,
		},
		DedupeFieldNames: true,

		// TODO: comment
		RecordsPerBatch: DEFAULT_RECORDS_PER_BATCH,
	}
}

// Not usable until FinalizeWriterOptions is called on it.
func DefaultWriterOptions() TWriterOptions {
	return TWriterOptions{
		OutputFileFormat:   "dkvp",
		ORS:                "\n",
		OFS:                ",",
		OPS:                "=",
		FLATSEP:            ".",
		FlushOnEveryRecord: true,

		HeaderlessOutput: false,

		WrapJSONOutputInOuterList: true,
		JSONOutputMultiline:       true,

		AutoUnflatten: true,
		AutoFlatten:   true,

		FPOFMT: "",
	}
}