1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
|
// ================================================================
// Items which might better belong in miller/cli, but which are placed in a
// deeper package to avoid a package-dependency cycle between miller/cli and
// miller/transforming.
// ================================================================
package cli
import (
"regexp"
"github.com/johnkerl/miller/v6/pkg/lib"
)
type TCommentHandling int
const (
CommentsAreData TCommentHandling = iota
SkipComments
PassComments
)
const DEFAULT_COMMENT_STRING = "#"
const DEFAULT_GEN_FIELD_NAME = "i"
const DEFAULT_GEN_START_AS_STRING = "1"
const DEFAULT_GEN_STEP_AS_STRING = "1"
const DEFAULT_GEN_STOP_AS_STRING = "100"
const DEFAULT_RECORDS_PER_BATCH = 500
type TGeneratorOptions struct {
FieldName string
StartAsString string
StepAsString string
StopAsString string
}
type TReaderOptions struct {
InputFileFormat string
IFS string
IPS string
IRS string
AllowRepeatIFS bool
IFSRegex *regexp.Regexp
IPSRegex *regexp.Regexp
DedupeFieldNames bool
// If unspecified on the command line, these take input-format-dependent
// defaults. E.g. default FS is comma for DKVP but space for NIDX;
// default AllowRepeatIFS is false for CSV but true for PPRINT.
ifsWasSpecified bool
ipsWasSpecified bool
irsWasSpecified bool
allowRepeatIFSWasSpecified bool
UseImplicitHeader bool
AllowRaggedCSVInput bool
CSVLazyQuotes bool
CSVTrimLeadingSpace bool
BarredPprintInput bool
IncrementImplicitKey bool
CommentHandling TCommentHandling
CommentString string
// Fake internal-data-generator 'reader'
GeneratorOptions TGeneratorOptions
// For out-of-process handling of compressed data, via popen
Prepipe string
// For most things like gunzip we do 'gunzip < filename | mlr ...' if
// filename is present, else 'gunzip | mlr ...' if reading from stdin.
// However some commands like 'unzip -qc' are weird so this option lets
// people give the command and we won't insert the '<'.
PrepipeIsRaw bool
// For in-process gunzip/bunzip2/zcat (distinct from prepipe)
FileInputEncoding lib.TFileInputEncoding
// TODO: comment
RecordsPerBatch int64
}
// ----------------------------------------------------------------
type TWriterOptions struct {
OutputFileFormat string
ORS string
OFS string
OPS string
FLATSEP string
FlushOnEveryRecord bool
flushOnEveryRecordWasSpecified bool
// If unspecified on the command line, these take input-format-dependent
// defaults. E.g. default FS is comma for DKVP but space for NIDX.
ofsWasSpecified bool
opsWasSpecified bool
orsWasSpecified bool
HeaderlessOutput bool
BarredPprintOutput bool
RightAlignedPPRINTOutput bool
RightAlignedXTABOutput bool
// JSON output: --jlistwrap on, --jvstack on
// JSON Lines output: --jlistwrap off, --jvstack off
WrapJSONOutputInOuterList bool // --jlistwrap
JSONOutputMultiline bool // --jvstack
JVQuoteAll bool // --jvquoteall
// Not using miller/types enum to avoid package cycle
CSVQuoteAll bool // --quote-all
// When we read things like
//
// x:a=1,x:b=2
//
// which is how we write out nested data structures for non-nested formats
// (all but JSON), the default behavior is to unflatten them back to
//
// {"x": {"a": 1}, {"b": 2}}
//
// unless the user explicitly asks to suppress that.
AutoUnflatten bool
// The default behavior is to flatten nested data structures like
//
// {"x": {"a": 1}, {"b": 2}}
//
// down to
//
// x:a=1,x:b=2
//
// which is how we write out nested data structures for non-nested formats
// (all but JSON) -- unless the user explicitly asks to suppress that.
AutoFlatten bool
// Default CSV/TSV:
// a=1,b=2,c=3
// a=4,b=5
// leads to
// a,b,c
// 1 2,3
// 4,5, <-- note trailing empty field
// and
// a=1,b=2,c=3
// d=4,e=5
// leads to
// fatal error
//
// With this flag:
// a=1,b=2,c=3
// a=4,b=5
// leads to
// a,b,c
// 1 2,3
//
// a,b
// 4,5
//
// and
// a=1,b=2,c=3
// d=4,e=5
// leads to
// a,b,c
// 1,2,3
//
// d,e
// 4,5
NoAutoUnsparsify bool
// For floating-point numbers: "" means use the Go default.
FPOFMT string
// Fatal the process when error data in a given record is about to be output.
FailOnDataError bool
}
// ----------------------------------------------------------------
type TOptions struct {
ReaderOptions TReaderOptions
WriterOptions TWriterOptions
// Data files to be operated on: e.g. given 'mlr cat foo.dat bar.dat', this
// is ["foo.dat", "bar.dat"].
FileNames []string
// DSL files to be loaded for every put/filter operation -- like 'put -f'
// or 'filter -f' but specified up front on the command line, suitable for
// .mlrrc. Use-case is someone has DSL functions they always want to be
// defined.
//
// Risk of CVE if this is in .mlrrc so --load and --mload are explicitly
// denied in the .mlrrc reader.
DSLPreloadFileNames []string
NRProgressMod int64
DoInPlace bool // mlr -I
NoInput bool // mlr -n
HaveRandSeed bool
RandSeed int64
PrintElapsedTime bool // mlr --time
}
// Not usable until FinalizeReaderOptions and FinalizeWriterOptions are called.
func DefaultOptions() *TOptions {
return &TOptions{
ReaderOptions: DefaultReaderOptions(),
WriterOptions: DefaultWriterOptions(),
FileNames: make([]string, 0),
DSLPreloadFileNames: make([]string, 0),
NoInput: false,
}
}
// Not usable until FinalizeReaderOptions is called on it.
func DefaultReaderOptions() TReaderOptions {
return TReaderOptions{
InputFileFormat: "dkvp", // TODO: constify at top, or maybe formats.DKVP in package
// FinalizeReaderOptions will compute IFSRegex and IPSRegex.
IRS: "\n",
IFS: ",",
IPS: "=",
CommentHandling: CommentsAreData,
FileInputEncoding: lib.FileInputEncodingDefault,
GeneratorOptions: TGeneratorOptions{
FieldName: DEFAULT_GEN_FIELD_NAME,
StartAsString: DEFAULT_GEN_START_AS_STRING,
StepAsString: DEFAULT_GEN_STEP_AS_STRING,
StopAsString: DEFAULT_GEN_STOP_AS_STRING,
},
DedupeFieldNames: true,
// TODO: comment
RecordsPerBatch: DEFAULT_RECORDS_PER_BATCH,
}
}
// Not usable until FinalizeWriterOptions is called on it.
func DefaultWriterOptions() TWriterOptions {
return TWriterOptions{
OutputFileFormat: "dkvp",
ORS: "\n",
OFS: ",",
OPS: "=",
FLATSEP: ".",
FlushOnEveryRecord: true,
HeaderlessOutput: false,
WrapJSONOutputInOuterList: true,
JSONOutputMultiline: true,
AutoUnflatten: true,
AutoFlatten: true,
FPOFMT: "",
}
}
|