1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
|
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bigquery
import (
"errors"
"time"
"cloud.google.com/go/internal"
gax "github.com/googleapis/gax-go"
"golang.org/x/net/context"
bq "google.golang.org/api/bigquery/v2"
)
// A Job represents an operation which has been submitted to BigQuery for processing.
type Job struct {
c *Client
projectID string
jobID string
isQuery bool
destinationTable *bq.TableReference // table to read query results from
}
// JobFromID creates a Job which refers to an existing BigQuery job. The job
// need not have been created by this package. For example, the job may have
// been created in the BigQuery console.
func (c *Client) JobFromID(ctx context.Context, id string) (*Job, error) {
job, err := c.service.getJob(ctx, c.projectID, id)
if err != nil {
return nil, err
}
job.c = c
return job, nil
}
func (j *Job) ID() string {
return j.jobID
}
// State is one of a sequence of states that a Job progresses through as it is processed.
type State int
const (
Pending State = iota
Running
Done
)
// JobStatus contains the current State of a job, and errors encountered while processing that job.
type JobStatus struct {
State State
err error
// All errors encountered during the running of the job.
// Not all Errors are fatal, so errors here do not necessarily mean that the job has completed or was unsuccessful.
Errors []*Error
// Statistics about the job.
Statistics *JobStatistics
}
// setJobRef initializes job's JobReference if given a non-empty jobID.
// projectID must be non-empty.
func setJobRef(job *bq.Job, jobID, projectID string) {
if jobID == "" {
return
}
// We don't check whether projectID is empty; the server will return an
// error when it encounters the resulting JobReference.
job.JobReference = &bq.JobReference{
JobId: jobID,
ProjectId: projectID,
}
}
// Done reports whether the job has completed.
// After Done returns true, the Err method will return an error if the job completed unsuccesfully.
func (s *JobStatus) Done() bool {
return s.State == Done
}
// Err returns the error that caused the job to complete unsuccesfully (if any).
func (s *JobStatus) Err() error {
return s.err
}
// Status returns the current status of the job. It fails if the Status could not be determined.
func (j *Job) Status(ctx context.Context) (*JobStatus, error) {
js, err := j.c.service.jobStatus(ctx, j.projectID, j.jobID)
if err != nil {
return nil, err
}
// Fill in the client field of Tables in the statistics.
if js.Statistics != nil {
if qs, ok := js.Statistics.Details.(*QueryStatistics); ok {
for _, t := range qs.ReferencedTables {
t.c = j.c
}
}
}
return js, nil
}
// Cancel requests that a job be cancelled. This method returns without waiting for
// cancellation to take effect. To check whether the job has terminated, use Job.Status.
// Cancelled jobs may still incur costs.
func (j *Job) Cancel(ctx context.Context) error {
return j.c.service.jobCancel(ctx, j.projectID, j.jobID)
}
// Wait blocks until the job or the context is done. It returns the final status
// of the job.
// If an error occurs while retrieving the status, Wait returns that error. But
// Wait returns nil if the status was retrieved successfully, even if
// status.Err() != nil. So callers must check both errors. See the example.
func (j *Job) Wait(ctx context.Context) (*JobStatus, error) {
if j.isQuery {
// We can avoid polling for query jobs.
if _, err := j.c.service.waitForQuery(ctx, j.projectID, j.jobID); err != nil {
return nil, err
}
// Note: extra RPC even if you just want to wait for the query to finish.
js, err := j.Status(ctx)
if err != nil {
return nil, err
}
return js, nil
}
// Non-query jobs must poll.
var js *JobStatus
err := internal.Retry(ctx, gax.Backoff{}, func() (stop bool, err error) {
js, err = j.Status(ctx)
if err != nil {
return true, err
}
if js.Done() {
return true, nil
}
return false, nil
})
if err != nil {
return nil, err
}
return js, nil
}
// Read fetches the results of a query job.
// If j is not a query job, Read returns an error.
func (j *Job) Read(ctx context.Context) (*RowIterator, error) {
if !j.isQuery {
return nil, errors.New("bigquery: cannot read from a non-query job")
}
var projectID string
if j.destinationTable != nil {
projectID = j.destinationTable.ProjectId
} else {
projectID = j.c.projectID
}
schema, err := j.c.service.waitForQuery(ctx, projectID, j.jobID)
if err != nil {
return nil, err
}
// The destination table should only be nil if there was a query error.
if j.destinationTable == nil {
return nil, errors.New("bigquery: query job missing destination table")
}
return newRowIterator(ctx, j.c.service, &readTableConf{
projectID: j.destinationTable.ProjectId,
datasetID: j.destinationTable.DatasetId,
tableID: j.destinationTable.TableId,
schema: schema,
}), nil
}
// JobStatistics contains statistics about a job.
type JobStatistics struct {
CreationTime time.Time
StartTime time.Time
EndTime time.Time
TotalBytesProcessed int64
Details Statistics
}
// Statistics is one of ExtractStatistics, LoadStatistics or QueryStatistics.
type Statistics interface {
implementsStatistics()
}
// ExtractStatistics contains statistics about an extract job.
type ExtractStatistics struct {
// The number of files per destination URI or URI pattern specified in the
// extract configuration. These values will be in the same order as the
// URIs specified in the 'destinationUris' field.
DestinationURIFileCounts []int64
}
// LoadStatistics contains statistics about a load job.
type LoadStatistics struct {
// The number of bytes of source data in a load job.
InputFileBytes int64
// The number of source files in a load job.
InputFiles int64
// Size of the loaded data in bytes. Note that while a load job is in the
// running state, this value may change.
OutputBytes int64
// The number of rows imported in a load job. Note that while an import job is
// in the running state, this value may change.
OutputRows int64
}
// QueryStatistics contains statistics about a query job.
type QueryStatistics struct {
// Billing tier for the job.
BillingTier int64
// Whether the query result was fetched from the query cache.
CacheHit bool
// The type of query statement, if valid.
StatementType string
// Total bytes billed for the job.
TotalBytesBilled int64
// Total bytes processed for the job.
TotalBytesProcessed int64
// Describes execution plan for the query.
QueryPlan []*ExplainQueryStage
// The number of rows affected by a DML statement. Present only for DML
// statements INSERT, UPDATE or DELETE.
NumDMLAffectedRows int64
// ReferencedTables: [Output-only, Experimental] Referenced tables for
// the job. Queries that reference more than 50 tables will not have a
// complete list.
ReferencedTables []*Table
// The schema of the results. Present only for successful dry run of
// non-legacy SQL queries.
Schema Schema
// Standard SQL: list of undeclared query parameter names detected during a
// dry run validation.
UndeclaredQueryParameterNames []string
}
// ExplainQueryStage describes one stage of a query.
type ExplainQueryStage struct {
// Relative amount of the total time the average shard spent on CPU-bound tasks.
ComputeRatioAvg float64
// Relative amount of the total time the slowest shard spent on CPU-bound tasks.
ComputeRatioMax float64
// Unique ID for stage within plan.
ID int64
// Human-readable name for stage.
Name string
// Relative amount of the total time the average shard spent reading input.
ReadRatioAvg float64
// Relative amount of the total time the slowest shard spent reading input.
ReadRatioMax float64
// Number of records read into the stage.
RecordsRead int64
// Number of records written by the stage.
RecordsWritten int64
// Current status for the stage.
Status string
// List of operations within the stage in dependency order (approximately
// chronological).
Steps []*ExplainQueryStep
// Relative amount of the total time the average shard spent waiting to be scheduled.
WaitRatioAvg float64
// Relative amount of the total time the slowest shard spent waiting to be scheduled.
WaitRatioMax float64
// Relative amount of the total time the average shard spent on writing output.
WriteRatioAvg float64
// Relative amount of the total time the slowest shard spent on writing output.
WriteRatioMax float64
}
// ExplainQueryStep describes one step of a query stage.
type ExplainQueryStep struct {
// Machine-readable operation type.
Kind string
// Human-readable stage descriptions.
Substeps []string
}
func (*ExtractStatistics) implementsStatistics() {}
func (*LoadStatistics) implementsStatistics() {}
func (*QueryStatistics) implementsStatistics() {}
|