1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
// Copyright 2016 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bigquery
import (
"io"
bq "google.golang.org/api/bigquery/v2"
)
// A ReaderSource is a source for a load operation that gets
// data from an io.Reader.
//
// When a ReaderSource is part of a LoadConfig obtained via Job.Config,
// its internal io.Reader will be nil, so it cannot be used for a
// subsequent load operation.
type ReaderSource struct {
r io.Reader
FileConfig
}
// NewReaderSource creates a ReaderSource from an io.Reader. You may
// optionally configure properties on the ReaderSource that describe the
// data being read, before passing it to Table.LoaderFrom.
func NewReaderSource(r io.Reader) *ReaderSource {
return &ReaderSource{r: r}
}
func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader {
r.FileConfig.populateLoadConfig(lc)
return r.r
}
// FileConfig contains configuration options that pertain to files, typically
// text files that require interpretation to be used as a BigQuery table. A
// file may live in Google Cloud Storage (see GCSReference), or it may be
// loaded into a table via the Table.LoaderFromReader.
type FileConfig struct {
// SourceFormat is the format of the data to be read.
// Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet. The default is CSV.
SourceFormat DataFormat
// Indicates if we should automatically infer the options and
// schema for CSV and JSON sources.
AutoDetect bool
// MaxBadRecords is the maximum number of bad records that will be ignored
// when reading data.
MaxBadRecords int64
// IgnoreUnknownValues causes values not matching the schema to be
// tolerated. Unknown values are ignored. For CSV this ignores extra values
// at the end of a line. For JSON this ignores named values that do not
// match any column name. If this field is not set, records containing
// unknown values are treated as bad records. The MaxBadRecords field can
// be used to customize how bad records are handled.
IgnoreUnknownValues bool
// Schema describes the data. It is required when reading CSV or JSON data,
// unless the data is being loaded into a table that already exists.
Schema Schema
// Additional options for CSV files.
CSVOptions
}
func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
conf.SkipLeadingRows = fc.SkipLeadingRows
conf.SourceFormat = string(fc.SourceFormat)
conf.Autodetect = fc.AutoDetect
conf.AllowJaggedRows = fc.AllowJaggedRows
conf.AllowQuotedNewlines = fc.AllowQuotedNewlines
conf.Encoding = string(fc.Encoding)
conf.FieldDelimiter = fc.FieldDelimiter
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
conf.MaxBadRecords = fc.MaxBadRecords
if fc.Schema != nil {
conf.Schema = fc.Schema.toBQ()
}
conf.Quote = fc.quote()
}
func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
fc.SourceFormat = DataFormat(conf.SourceFormat)
fc.AutoDetect = conf.Autodetect
fc.MaxBadRecords = conf.MaxBadRecords
fc.IgnoreUnknownValues = conf.IgnoreUnknownValues
fc.Schema = bqToSchema(conf.Schema)
fc.SkipLeadingRows = conf.SkipLeadingRows
fc.AllowJaggedRows = conf.AllowJaggedRows
fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
fc.Encoding = Encoding(conf.Encoding)
fc.FieldDelimiter = conf.FieldDelimiter
fc.CSVOptions.setQuote(conf.Quote)
}
func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) {
format := fc.SourceFormat
if format == "" {
// Format must be explicitly set for external data sources.
format = CSV
}
conf.Autodetect = fc.AutoDetect
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
conf.MaxBadRecords = fc.MaxBadRecords
conf.SourceFormat = string(format)
if fc.Schema != nil {
conf.Schema = fc.Schema.toBQ()
}
if format == CSV {
fc.CSVOptions.populateExternalDataConfig(conf)
}
}
// Encoding specifies the character encoding of data to be loaded into BigQuery.
// See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
// for more details about how this is used.
type Encoding string
const (
// UTF_8 specifies the UTF-8 encoding type.
UTF_8 Encoding = "UTF-8"
// ISO_8859_1 specifies the ISO-8859-1 encoding type.
ISO_8859_1 Encoding = "ISO-8859-1"
)
|