1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
|
// Copyright 2012-present Oliver Eilhard. All rights reserved.
// Use of this source code is governed by a MIT-license.
// See http://olivere.mit-license.org/license.txt for details.
package elastic
import (
"context"
"net"
"sync"
"sync/atomic"
"time"
)
// BulkProcessorService allows to easily process bulk requests. It allows setting
// policies when to flush new bulk requests, e.g. based on a number of actions,
// on the size of the actions, and/or to flush periodically. It also allows
// to control the number of concurrent bulk requests allowed to be executed
// in parallel.
//
// BulkProcessorService, by default, commits either every 1000 requests or when the
// (estimated) size of the bulk requests exceeds 5 MB. However, it does not
// commit periodically. BulkProcessorService also does retry by default, using
// an exponential backoff algorithm.
//
// The caller is responsible for setting the index and type on every
// bulk request added to BulkProcessorService.
//
// BulkProcessorService takes ideas from the BulkProcessor of the
// Elasticsearch Java API as documented in
// https://www.elastic.co/guide/en/elasticsearch/client/java-api/current/java-docs-bulk-processor.html.
type BulkProcessorService struct {
c *Client
beforeFn BulkBeforeFunc
afterFn BulkAfterFunc
name string // name of processor
numWorkers int // # of workers (>= 1)
bulkActions int // # of requests after which to commit
bulkSize int // # of bytes after which to commit
flushInterval time.Duration // periodic flush interval
wantStats bool // indicates whether to gather statistics
backoff Backoff // a custom Backoff to use for errors
}
// NewBulkProcessorService creates a new BulkProcessorService.
func NewBulkProcessorService(client *Client) *BulkProcessorService {
return &BulkProcessorService{
c: client,
numWorkers: 1,
bulkActions: 1000,
bulkSize: 5 << 20, // 5 MB
backoff: NewExponentialBackoff(
time.Duration(200)*time.Millisecond,
time.Duration(10000)*time.Millisecond,
),
}
}
// BulkBeforeFunc defines the signature of callbacks that are executed
// before a commit to Elasticsearch.
type BulkBeforeFunc func(executionId int64, requests []BulkableRequest)
// BulkAfterFunc defines the signature of callbacks that are executed
// after a commit to Elasticsearch. The err parameter signals an error.
type BulkAfterFunc func(executionId int64, requests []BulkableRequest, response *BulkResponse, err error)
// Before specifies a function to be executed before bulk requests get comitted
// to Elasticsearch.
func (s *BulkProcessorService) Before(fn BulkBeforeFunc) *BulkProcessorService {
s.beforeFn = fn
return s
}
// After specifies a function to be executed when bulk requests have been
// comitted to Elasticsearch. The After callback executes both when the
// commit was successful as well as on failures.
func (s *BulkProcessorService) After(fn BulkAfterFunc) *BulkProcessorService {
s.afterFn = fn
return s
}
// Name is an optional name to identify this bulk processor.
func (s *BulkProcessorService) Name(name string) *BulkProcessorService {
s.name = name
return s
}
// Workers is the number of concurrent workers allowed to be
// executed. Defaults to 1 and must be greater or equal to 1.
func (s *BulkProcessorService) Workers(num int) *BulkProcessorService {
s.numWorkers = num
return s
}
// BulkActions specifies when to flush based on the number of actions
// currently added. Defaults to 1000 and can be set to -1 to be disabled.
func (s *BulkProcessorService) BulkActions(bulkActions int) *BulkProcessorService {
s.bulkActions = bulkActions
return s
}
// BulkSize specifies when to flush based on the size (in bytes) of the actions
// currently added. Defaults to 5 MB and can be set to -1 to be disabled.
func (s *BulkProcessorService) BulkSize(bulkSize int) *BulkProcessorService {
s.bulkSize = bulkSize
return s
}
// FlushInterval specifies when to flush at the end of the given interval.
// This is disabled by default. If you want the bulk processor to
// operate completely asynchronously, set both BulkActions and BulkSize to
// -1 and set the FlushInterval to a meaningful interval.
func (s *BulkProcessorService) FlushInterval(interval time.Duration) *BulkProcessorService {
s.flushInterval = interval
return s
}
// Stats tells bulk processor to gather stats while running.
// Use Stats to return the stats. This is disabled by default.
func (s *BulkProcessorService) Stats(wantStats bool) *BulkProcessorService {
s.wantStats = wantStats
return s
}
// Backoff sets the backoff strategy to use for errors
func (s *BulkProcessorService) Backoff(backoff Backoff) *BulkProcessorService {
s.backoff = backoff
return s
}
// Do creates a new BulkProcessor and starts it.
// Consider the BulkProcessor as a running instance that accepts bulk requests
// and commits them to Elasticsearch, spreading the work across one or more
// workers.
//
// You can interoperate with the BulkProcessor returned by Do, e.g. Start and
// Stop (or Close) it.
//
// Context is an optional context that is passed into the bulk request
// service calls. In contrast to other operations, this context is used in
// a long running process. You could use it to pass e.g. loggers, but you
// shouldn't use it for cancellation.
//
// Calling Do several times returns new BulkProcessors. You probably don't
// want to do this. BulkProcessorService implements just a builder pattern.
func (s *BulkProcessorService) Do(ctx context.Context) (*BulkProcessor, error) {
p := newBulkProcessor(
s.c,
s.beforeFn,
s.afterFn,
s.name,
s.numWorkers,
s.bulkActions,
s.bulkSize,
s.flushInterval,
s.wantStats,
s.backoff)
err := p.Start(ctx)
if err != nil {
return nil, err
}
return p, nil
}
// -- Bulk Processor Statistics --
// BulkProcessorStats contains various statistics of a bulk processor
// while it is running. Use the Stats func to return it while running.
type BulkProcessorStats struct {
Flushed int64 // number of times the flush interval has been invoked
Committed int64 // # of times workers committed bulk requests
Indexed int64 // # of requests indexed
Created int64 // # of requests that ES reported as creates (201)
Updated int64 // # of requests that ES reported as updates
Deleted int64 // # of requests that ES reported as deletes
Succeeded int64 // # of requests that ES reported as successful
Failed int64 // # of requests that ES reported as failed
Workers []*BulkProcessorWorkerStats // stats for each worker
}
// BulkProcessorWorkerStats represents per-worker statistics.
type BulkProcessorWorkerStats struct {
Queued int64 // # of requests queued in this worker
LastDuration time.Duration // duration of last commit
}
// newBulkProcessorStats initializes and returns a BulkProcessorStats struct.
func newBulkProcessorStats(workers int) *BulkProcessorStats {
stats := &BulkProcessorStats{
Workers: make([]*BulkProcessorWorkerStats, workers),
}
for i := 0; i < workers; i++ {
stats.Workers[i] = &BulkProcessorWorkerStats{}
}
return stats
}
func (st *BulkProcessorStats) dup() *BulkProcessorStats {
dst := new(BulkProcessorStats)
dst.Flushed = st.Flushed
dst.Committed = st.Committed
dst.Indexed = st.Indexed
dst.Created = st.Created
dst.Updated = st.Updated
dst.Deleted = st.Deleted
dst.Succeeded = st.Succeeded
dst.Failed = st.Failed
for _, src := range st.Workers {
dst.Workers = append(dst.Workers, src.dup())
}
return dst
}
func (st *BulkProcessorWorkerStats) dup() *BulkProcessorWorkerStats {
dst := new(BulkProcessorWorkerStats)
dst.Queued = st.Queued
dst.LastDuration = st.LastDuration
return dst
}
// -- Bulk Processor --
// BulkProcessor encapsulates a task that accepts bulk requests and
// orchestrates committing them to Elasticsearch via one or more workers.
//
// BulkProcessor is returned by setting up a BulkProcessorService and
// calling the Do method.
type BulkProcessor struct {
c *Client
beforeFn BulkBeforeFunc
afterFn BulkAfterFunc
name string
bulkActions int
bulkSize int
numWorkers int
executionId int64
requestsC chan BulkableRequest
workerWg sync.WaitGroup
workers []*bulkWorker
flushInterval time.Duration
flusherStopC chan struct{}
wantStats bool
backoff Backoff
startedMu sync.Mutex // guards the following block
started bool
statsMu sync.Mutex // guards the following block
stats *BulkProcessorStats
stopReconnC chan struct{} // channel to signal stop reconnection attempts
}
func newBulkProcessor(
client *Client,
beforeFn BulkBeforeFunc,
afterFn BulkAfterFunc,
name string,
numWorkers int,
bulkActions int,
bulkSize int,
flushInterval time.Duration,
wantStats bool,
backoff Backoff) *BulkProcessor {
return &BulkProcessor{
c: client,
beforeFn: beforeFn,
afterFn: afterFn,
name: name,
numWorkers: numWorkers,
bulkActions: bulkActions,
bulkSize: bulkSize,
flushInterval: flushInterval,
wantStats: wantStats,
backoff: backoff,
}
}
// Start starts the bulk processor. If the processor is already started,
// nil is returned.
func (p *BulkProcessor) Start(ctx context.Context) error {
p.startedMu.Lock()
defer p.startedMu.Unlock()
if p.started {
return nil
}
// We must have at least one worker.
if p.numWorkers < 1 {
p.numWorkers = 1
}
p.requestsC = make(chan BulkableRequest)
p.executionId = 0
p.stats = newBulkProcessorStats(p.numWorkers)
p.stopReconnC = make(chan struct{})
// Create and start up workers.
p.workers = make([]*bulkWorker, p.numWorkers)
for i := 0; i < p.numWorkers; i++ {
p.workerWg.Add(1)
p.workers[i] = newBulkWorker(p, i)
go p.workers[i].work(ctx)
}
// Start the ticker for flush (if enabled)
if int64(p.flushInterval) > 0 {
p.flusherStopC = make(chan struct{})
go p.flusher(p.flushInterval)
}
p.started = true
return nil
}
// Stop is an alias for Close.
func (p *BulkProcessor) Stop() error {
return p.Close()
}
// Close stops the bulk processor previously started with Do.
// If it is already stopped, this is a no-op and nil is returned.
//
// By implementing Close, BulkProcessor implements the io.Closer interface.
func (p *BulkProcessor) Close() error {
p.startedMu.Lock()
defer p.startedMu.Unlock()
// Already stopped? Do nothing.
if !p.started {
return nil
}
// Tell connection checkers to stop
if p.stopReconnC != nil {
close(p.stopReconnC)
p.stopReconnC = nil
}
// Stop flusher (if enabled)
if p.flusherStopC != nil {
p.flusherStopC <- struct{}{}
<-p.flusherStopC
close(p.flusherStopC)
p.flusherStopC = nil
}
// Stop all workers.
close(p.requestsC)
p.workerWg.Wait()
p.started = false
return nil
}
// Stats returns the latest bulk processor statistics.
// Collecting stats must be enabled first by calling Stats(true) on
// the service that created this processor.
func (p *BulkProcessor) Stats() BulkProcessorStats {
p.statsMu.Lock()
defer p.statsMu.Unlock()
return *p.stats.dup()
}
// Add adds a single request to commit by the BulkProcessorService.
//
// The caller is responsible for setting the index and type on the request.
func (p *BulkProcessor) Add(request BulkableRequest) {
p.requestsC <- request
}
// Flush manually asks all workers to commit their outstanding requests.
// It returns only when all workers acknowledge completion.
func (p *BulkProcessor) Flush() error {
p.statsMu.Lock()
p.stats.Flushed++
p.statsMu.Unlock()
for _, w := range p.workers {
w.flushC <- struct{}{}
<-w.flushAckC // wait for completion
}
return nil
}
// flusher is a single goroutine that periodically asks all workers to
// commit their outstanding bulk requests. It is only started if
// FlushInterval is greater than 0.
func (p *BulkProcessor) flusher(interval time.Duration) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ticker.C: // Periodic flush
p.Flush() // TODO swallow errors here?
case <-p.flusherStopC:
p.flusherStopC <- struct{}{}
return
}
}
}
// -- Bulk Worker --
// bulkWorker encapsulates a single worker, running in a goroutine,
// receiving bulk requests and eventually committing them to Elasticsearch.
// It is strongly bound to a BulkProcessor.
type bulkWorker struct {
p *BulkProcessor
i int
bulkActions int
bulkSize int
service *BulkService
flushC chan struct{}
flushAckC chan struct{}
}
// newBulkWorker creates a new bulkWorker instance.
func newBulkWorker(p *BulkProcessor, i int) *bulkWorker {
return &bulkWorker{
p: p,
i: i,
bulkActions: p.bulkActions,
bulkSize: p.bulkSize,
service: NewBulkService(p.c),
flushC: make(chan struct{}),
flushAckC: make(chan struct{}),
}
}
// work waits for bulk requests and manual flush calls on the respective
// channels and is invoked as a goroutine when the bulk processor is started.
func (w *bulkWorker) work(ctx context.Context) {
defer func() {
w.p.workerWg.Done()
close(w.flushAckC)
close(w.flushC)
}()
var stop bool
for !stop {
var err error
select {
case req, open := <-w.p.requestsC:
if open {
// Received a new request
w.service.Add(req)
if w.commitRequired() {
err = w.commit(ctx)
}
} else {
// Channel closed: Stop.
stop = true
if w.service.NumberOfActions() > 0 {
err = w.commit(ctx)
}
}
case <-w.flushC:
// Commit outstanding requests
if w.service.NumberOfActions() > 0 {
err = w.commit(ctx)
}
w.flushAckC <- struct{}{}
}
if !stop && err != nil {
waitForActive := func() {
// Add back pressure to prevent Add calls from filling up the request queue
ready := make(chan struct{})
go w.waitForActiveConnection(ready)
<-ready
}
if _, ok := err.(net.Error); ok {
waitForActive()
} else if IsConnErr(err) {
waitForActive()
}
}
}
}
// commit commits the bulk requests in the given service,
// invoking callbacks as specified.
func (w *bulkWorker) commit(ctx context.Context) error {
var res *BulkResponse
// commitFunc will commit bulk requests and, on failure, be retried
// via exponential backoff
commitFunc := func() error {
var err error
res, err = w.service.Do(ctx)
return err
}
// notifyFunc will be called if retry fails
notifyFunc := func(err error) {
w.p.c.errorf("elastic: bulk processor %q failed but may retry: %v", w.p.name, err)
}
id := atomic.AddInt64(&w.p.executionId, 1)
// Update # documents in queue before eventual retries
w.p.statsMu.Lock()
if w.p.wantStats {
w.p.stats.Workers[w.i].Queued = int64(len(w.service.requests))
}
w.p.statsMu.Unlock()
// Save requests because they will be reset in commitFunc
reqs := w.service.requests
// Invoke before callback
if w.p.beforeFn != nil {
w.p.beforeFn(id, reqs)
}
// Commit bulk requests
err := RetryNotify(commitFunc, w.p.backoff, notifyFunc)
w.updateStats(res)
if err != nil {
w.p.c.errorf("elastic: bulk processor %q failed: %v", w.p.name, err)
}
// Invoke after callback
if w.p.afterFn != nil {
w.p.afterFn(id, reqs, res, err)
}
return err
}
func (w *bulkWorker) waitForActiveConnection(ready chan<- struct{}) {
defer close(ready)
t := time.NewTicker(5 * time.Second)
defer t.Stop()
client := w.p.c
stopReconnC := w.p.stopReconnC
w.p.c.errorf("elastic: bulk processor %q is waiting for an active connection", w.p.name)
// loop until a health check finds at least 1 active connection or the reconnection channel is closed
for {
select {
case _, ok := <-stopReconnC:
if !ok {
w.p.c.errorf("elastic: bulk processor %q active connection check interrupted", w.p.name)
return
}
case <-t.C:
client.healthcheck(time.Duration(3)*time.Second, true)
if client.mustActiveConn() == nil {
// found an active connection
// exit and signal done to the WaitGroup
return
}
}
}
}
func (w *bulkWorker) updateStats(res *BulkResponse) {
// Update stats
if res != nil {
w.p.statsMu.Lock()
if w.p.wantStats {
w.p.stats.Committed++
if res != nil {
w.p.stats.Indexed += int64(len(res.Indexed()))
w.p.stats.Created += int64(len(res.Created()))
w.p.stats.Updated += int64(len(res.Updated()))
w.p.stats.Deleted += int64(len(res.Deleted()))
w.p.stats.Succeeded += int64(len(res.Succeeded()))
w.p.stats.Failed += int64(len(res.Failed()))
}
w.p.stats.Workers[w.i].Queued = int64(len(w.service.requests))
w.p.stats.Workers[w.i].LastDuration = time.Duration(int64(res.Took)) * time.Millisecond
}
w.p.statsMu.Unlock()
}
}
// commitRequired returns true if the service has to commit its
// bulk requests. This can be either because the number of actions
// or the estimated size in bytes is larger than specified in the
// BulkProcessorService.
func (w *bulkWorker) commitRequired() bool {
if w.bulkActions >= 0 && w.service.NumberOfActions() >= w.bulkActions {
return true
}
if w.bulkSize >= 0 && w.service.EstimatedSizeInBytes() >= int64(w.bulkSize) {
return true
}
return false
}
|