1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
|
// Copyright 2015 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bigquery
import (
"encoding/json"
"errors"
"fmt"
"reflect"
"sync"
bq "google.golang.org/api/bigquery/v2"
)
// Schema describes the fields in a table or query result.
type Schema []*FieldSchema
// Relax returns a version of the schema where no fields are marked
// as Required.
func (s Schema) Relax() Schema {
var out Schema
for _, v := range s {
relaxed := &FieldSchema{
Name: v.Name,
Description: v.Description,
Repeated: v.Repeated,
Required: false,
Type: v.Type,
Schema: v.Schema.Relax(),
}
out = append(out, relaxed)
}
return out
}
// FieldSchema describes a single field.
type FieldSchema struct {
// The field name.
// Must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
// and must start with a letter or underscore.
// The maximum length is 128 characters.
Name string
// A description of the field. The maximum length is 16,384 characters.
Description string
// Whether the field may contain multiple values.
Repeated bool
// Whether the field is required. Ignored if Repeated is true.
Required bool
// The field data type. If Type is Record, then this field contains a nested schema,
// which is described by Schema.
Type FieldType
// Describes the nested schema if Type is set to Record.
Schema Schema
}
func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
tfs := &bq.TableFieldSchema{
Description: fs.Description,
Name: fs.Name,
Type: string(fs.Type),
}
if fs.Repeated {
tfs.Mode = "REPEATED"
} else if fs.Required {
tfs.Mode = "REQUIRED"
} // else leave as default, which is interpreted as NULLABLE.
for _, f := range fs.Schema {
tfs.Fields = append(tfs.Fields, f.toBQ())
}
return tfs
}
func (s Schema) toBQ() *bq.TableSchema {
var fields []*bq.TableFieldSchema
for _, f := range s {
fields = append(fields, f.toBQ())
}
return &bq.TableSchema{Fields: fields}
}
func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema {
fs := &FieldSchema{
Description: tfs.Description,
Name: tfs.Name,
Repeated: tfs.Mode == "REPEATED",
Required: tfs.Mode == "REQUIRED",
Type: FieldType(tfs.Type),
}
for _, f := range tfs.Fields {
fs.Schema = append(fs.Schema, bqToFieldSchema(f))
}
return fs
}
func bqToSchema(ts *bq.TableSchema) Schema {
if ts == nil {
return nil
}
var s Schema
for _, f := range ts.Fields {
s = append(s, bqToFieldSchema(f))
}
return s
}
// FieldType is the type of field.
type FieldType string
const (
// StringFieldType is a string field type.
StringFieldType FieldType = "STRING"
// BytesFieldType is a bytes field type.
BytesFieldType FieldType = "BYTES"
// IntegerFieldType is a integer field type.
IntegerFieldType FieldType = "INTEGER"
// FloatFieldType is a float field type.
FloatFieldType FieldType = "FLOAT"
// BooleanFieldType is a boolean field type.
BooleanFieldType FieldType = "BOOLEAN"
// TimestampFieldType is a timestamp field type.
TimestampFieldType FieldType = "TIMESTAMP"
// RecordFieldType is a record field type. It is typically used to create columns with repeated or nested data.
RecordFieldType FieldType = "RECORD"
// DateFieldType is a date field type.
DateFieldType FieldType = "DATE"
// TimeFieldType is a time field type.
TimeFieldType FieldType = "TIME"
// DateTimeFieldType is a datetime field type.
DateTimeFieldType FieldType = "DATETIME"
// NumericFieldType is a numeric field type. Numeric types include integer types, floating point types and the
// NUMERIC data type.
NumericFieldType FieldType = "NUMERIC"
// GeographyFieldType is a string field type. Geography types represent a set of points
// on the Earth's surface, represented in Well Known Text (WKT) format.
GeographyFieldType FieldType = "GEOGRAPHY"
)
var (
errEmptyJSONSchema = errors.New("bigquery: empty JSON schema")
fieldTypes = map[FieldType]bool{
StringFieldType: true,
BytesFieldType: true,
IntegerFieldType: true,
FloatFieldType: true,
BooleanFieldType: true,
TimestampFieldType: true,
RecordFieldType: true,
DateFieldType: true,
TimeFieldType: true,
DateTimeFieldType: true,
NumericFieldType: true,
GeographyFieldType: true,
}
)
var typeOfByteSlice = reflect.TypeOf([]byte{})
// InferSchema tries to derive a BigQuery schema from the supplied struct value.
// Each exported struct field is mapped to a field in the schema.
//
// The following BigQuery types are inferred from the corresponding Go types.
// (This is the same mapping as that used for RowIterator.Next.) Fields inferred
// from these types are marked required (non-nullable).
//
// STRING string
// BOOL bool
// INTEGER int, int8, int16, int32, int64, uint8, uint16, uint32
// FLOAT float32, float64
// BYTES []byte
// TIMESTAMP time.Time
// DATE civil.Date
// TIME civil.Time
// DATETIME civil.DateTime
// NUMERIC *big.Rat
//
// The big.Rat type supports numbers of arbitrary size and precision. Values
// will be rounded to 9 digits after the decimal point before being transmitted
// to BigQuery. See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type
// for more on NUMERIC.
//
// A Go slice or array type is inferred to be a BigQuery repeated field of the
// element type. The element type must be one of the above listed types.
//
// Due to lack of unique native Go type for GEOGRAPHY, there is no schema
// inference to GEOGRAPHY at this time.
//
// Nullable fields are inferred from the NullXXX types, declared in this package:
//
// STRING NullString
// BOOL NullBool
// INTEGER NullInt64
// FLOAT NullFloat64
// TIMESTAMP NullTimestamp
// DATE NullDate
// TIME NullTime
// DATETIME NullDateTime
// GEOGRAPHY NullGeography
//
// For a nullable BYTES field, use the type []byte and tag the field "nullable" (see below).
// For a nullable NUMERIC field, use the type *big.Rat and tag the field "nullable".
//
// A struct field that is of struct type is inferred to be a required field of type
// RECORD with a schema inferred recursively. For backwards compatibility, a field of
// type pointer to struct is also inferred to be required. To get a nullable RECORD
// field, use the "nullable" tag (see below).
//
// InferSchema returns an error if any of the examined fields is of type uint,
// uint64, uintptr, map, interface, complex64, complex128, func, or chan. Future
// versions may handle these cases without error.
//
// Recursively defined structs are also disallowed.
//
// Struct fields may be tagged in a way similar to the encoding/json package.
// A tag of the form
// bigquery:"name"
// uses "name" instead of the struct field name as the BigQuery field name.
// A tag of the form
// bigquery:"-"
// omits the field from the inferred schema.
// The "nullable" option marks the field as nullable (not required). It is only
// needed for []byte, *big.Rat and pointer-to-struct fields, and cannot appear on other
// fields. In this example, the Go name of the field is retained:
// bigquery:",nullable"
func InferSchema(st interface{}) (Schema, error) {
return inferSchemaReflectCached(reflect.TypeOf(st))
}
var schemaCache sync.Map
type cacheVal struct {
schema Schema
err error
}
func inferSchemaReflectCached(t reflect.Type) (Schema, error) {
var cv cacheVal
v, ok := schemaCache.Load(t)
if ok {
cv = v.(cacheVal)
} else {
s, err := inferSchemaReflect(t)
cv = cacheVal{s, err}
schemaCache.Store(t, cv)
}
return cv.schema, cv.err
}
func inferSchemaReflect(t reflect.Type) (Schema, error) {
rec, err := hasRecursiveType(t, nil)
if err != nil {
return nil, err
}
if rec {
return nil, fmt.Errorf("bigquery: schema inference for recursive type %s", t)
}
return inferStruct(t)
}
func inferStruct(t reflect.Type) (Schema, error) {
switch t.Kind() {
case reflect.Ptr:
if t.Elem().Kind() != reflect.Struct {
return nil, noStructError{t}
}
t = t.Elem()
fallthrough
case reflect.Struct:
return inferFields(t)
default:
return nil, noStructError{t}
}
}
// inferFieldSchema infers the FieldSchema for a Go type
func inferFieldSchema(fieldName string, rt reflect.Type, nullable bool) (*FieldSchema, error) {
// Only []byte and struct pointers can be tagged nullable.
if nullable && !(rt == typeOfByteSlice || rt.Kind() == reflect.Ptr && rt.Elem().Kind() == reflect.Struct) {
return nil, badNullableError{fieldName, rt}
}
switch rt {
case typeOfByteSlice:
return &FieldSchema{Required: !nullable, Type: BytesFieldType}, nil
case typeOfGoTime:
return &FieldSchema{Required: true, Type: TimestampFieldType}, nil
case typeOfDate:
return &FieldSchema{Required: true, Type: DateFieldType}, nil
case typeOfTime:
return &FieldSchema{Required: true, Type: TimeFieldType}, nil
case typeOfDateTime:
return &FieldSchema{Required: true, Type: DateTimeFieldType}, nil
case typeOfRat:
return &FieldSchema{Required: !nullable, Type: NumericFieldType}, nil
}
if ft := nullableFieldType(rt); ft != "" {
return &FieldSchema{Required: false, Type: ft}, nil
}
if isSupportedIntType(rt) || isSupportedUintType(rt) {
return &FieldSchema{Required: true, Type: IntegerFieldType}, nil
}
switch rt.Kind() {
case reflect.Slice, reflect.Array:
et := rt.Elem()
if et != typeOfByteSlice && (et.Kind() == reflect.Slice || et.Kind() == reflect.Array) {
// Multi dimensional slices/arrays are not supported by BigQuery
return nil, unsupportedFieldTypeError{fieldName, rt}
}
if nullableFieldType(et) != "" {
// Repeated nullable types are not supported by BigQuery.
return nil, unsupportedFieldTypeError{fieldName, rt}
}
f, err := inferFieldSchema(fieldName, et, false)
if err != nil {
return nil, err
}
f.Repeated = true
f.Required = false
return f, nil
case reflect.Ptr:
if rt.Elem().Kind() != reflect.Struct {
return nil, unsupportedFieldTypeError{fieldName, rt}
}
fallthrough
case reflect.Struct:
nested, err := inferStruct(rt)
if err != nil {
return nil, err
}
return &FieldSchema{Required: !nullable, Type: RecordFieldType, Schema: nested}, nil
case reflect.String:
return &FieldSchema{Required: !nullable, Type: StringFieldType}, nil
case reflect.Bool:
return &FieldSchema{Required: !nullable, Type: BooleanFieldType}, nil
case reflect.Float32, reflect.Float64:
return &FieldSchema{Required: !nullable, Type: FloatFieldType}, nil
default:
return nil, unsupportedFieldTypeError{fieldName, rt}
}
}
// inferFields extracts all exported field types from struct type.
func inferFields(rt reflect.Type) (Schema, error) {
var s Schema
fields, err := fieldCache.Fields(rt)
if err != nil {
return nil, err
}
for _, field := range fields {
var nullable bool
for _, opt := range field.ParsedTag.([]string) {
if opt == nullableTagOption {
nullable = true
break
}
}
f, err := inferFieldSchema(field.Name, field.Type, nullable)
if err != nil {
return nil, err
}
f.Name = field.Name
s = append(s, f)
}
return s, nil
}
// isSupportedIntType reports whether t is an int type that can be properly
// represented by the BigQuery INTEGER/INT64 type.
func isSupportedIntType(t reflect.Type) bool {
switch t.Kind() {
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
return true
default:
return false
}
}
// isSupportedIntType reports whether t is a uint type that can be properly
// represented by the BigQuery INTEGER/INT64 type.
func isSupportedUintType(t reflect.Type) bool {
switch t.Kind() {
case reflect.Uint8, reflect.Uint16, reflect.Uint32:
return true
default:
return false
}
}
// typeList is a linked list of reflect.Types.
type typeList struct {
t reflect.Type
next *typeList
}
func (l *typeList) has(t reflect.Type) bool {
for l != nil {
if l.t == t {
return true
}
l = l.next
}
return false
}
// hasRecursiveType reports whether t or any type inside t refers to itself, directly or indirectly,
// via exported fields. (Schema inference ignores unexported fields.)
func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) {
for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice || t.Kind() == reflect.Array {
t = t.Elem()
}
if t.Kind() != reflect.Struct {
return false, nil
}
if seen.has(t) {
return true, nil
}
fields, err := fieldCache.Fields(t)
if err != nil {
return false, err
}
seen = &typeList{t, seen}
// Because seen is a linked list, additions to it from one field's
// recursive call will not affect the value for subsequent fields' calls.
for _, field := range fields {
ok, err := hasRecursiveType(field.Type, seen)
if err != nil {
return false, err
}
if ok {
return true, nil
}
}
return false, nil
}
// bigQuerySchemaJSONField is an individual field in a JSON BigQuery table schema definition
// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema).
type bigQueryJSONField struct {
Description string `json:"description"`
Fields []bigQueryJSONField `json:"fields"`
Mode string `json:"mode"`
Name string `json:"name"`
Type string `json:"type"`
}
// convertSchemaFromJSON generates a Schema:
func convertSchemaFromJSON(fs []bigQueryJSONField) (Schema, error) {
convertedSchema := Schema{}
for _, f := range fs {
convertedFieldSchema := &FieldSchema{
Description: f.Description,
Name: f.Name,
Required: f.Mode == "REQUIRED",
Repeated: f.Mode == "REPEATED",
}
if len(f.Fields) > 0 {
convertedNestedFieldSchema, err := convertSchemaFromJSON(f.Fields)
if err != nil {
return nil, err
}
convertedFieldSchema.Schema = convertedNestedFieldSchema
}
// Check that the field-type (string) maps to a known FieldType:
if _, ok := fieldTypes[FieldType(f.Type)]; !ok {
return nil, fmt.Errorf("unknown field type (%v)", f.Type)
}
convertedFieldSchema.Type = FieldType(f.Type)
convertedSchema = append(convertedSchema, convertedFieldSchema)
}
return convertedSchema, nil
}
// SchemaFromJSON takes a JSON BigQuery table schema definition
// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema)
// and returns a fully-populated Schema.
func SchemaFromJSON(schemaJSON []byte) (Schema, error) {
var bigQuerySchema []bigQueryJSONField
// Make sure we actually have some content:
if len(schemaJSON) == 0 {
return nil, errEmptyJSONSchema
}
if err := json.Unmarshal(schemaJSON, &bigQuerySchema); err != nil {
return nil, err
}
return convertSchemaFromJSON(bigQuerySchema)
}
type noStructError struct {
typ reflect.Type
}
func (e noStructError) Error() string {
return fmt.Sprintf("bigquery: can only infer schema from struct or pointer to struct, not %s", e.typ)
}
type badNullableError struct {
name string
typ reflect.Type
}
func (e badNullableError) Error() string {
return fmt.Sprintf(`bigquery: field %q of type %s: use "nullable" only for []byte and struct pointers; for all other types, use a NullXXX type`, e.name, e.typ)
}
type unsupportedFieldTypeError struct {
name string
typ reflect.Type
}
func (e unsupportedFieldTypeError) Error() string {
return fmt.Sprintf("bigquery: field %q: type %s is not supported", e.name, e.typ)
}
|