1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
|
# frozen_string_literal: true
module Backup
# Backup options provided by the command line interface
class Options
# SkippableTasks store which tasks can be skipped
# Setting any one to true, will create or restore a backup without that data
# @example Skipping database content and CI job artifacts
# SkippableTasks.new(db: true, artifacts: true)
SkippableTasks = Struct.new(
:db, # Database content (PostgreSQL)
:uploads, # Attachments
:builds, # CI job output logs
:artifacts, # CI job artifacts
:lfs, # LFS objects
:terraform_state, # Terraform states
:registry, # Container registry images
:pages, # GitLab Pages content
:repositories, # Repositories
:packages, # Packages
:ci_secure_files, # Project-level Secure Files
:external_diffs, # External Merge Request diffs
keyword_init: true
)
# What operations can be skipped
SkippableOperations = Struct.new(
:archive, # whether to skip .tar step
:remote_storage, # whether to skip uploading to remote storage
keyword_init: true
)
CompressionOptions = Struct.new(
:compression_cmd, # custom compression command
:decompression_cmd, # custom decompression command
keyword_init: true
)
# A backup will by default use STREAM strategy where content is streamed to the archive
# With COPY strategy, files are copied first to a temporary location before they are added to the archive
module Strategy
STREAM = :stream
COPY = :copy
end
# Backup ID is the backup filename portion without extensions
# When this option is not provided, the backup name will be based on date, timestamp and gitlab version
#
# @return [String|Nil] backup id that is used as part of filename
attr_accessor :backup_id
# Reference to previous backup full path
#
# @return [String|Nil] previous backup full path
attr_accessor :previous_backup
# Run incremental backup?
#
# @return [Boolean] whether to run an incremental backup
attr_accessor :incremental
alias_method :incremental?, :incremental
# Whether to bypass warnings when performing dangerous operations
# This is currently being used for the database restore task only
#
# @return [Boolean] whether to bypass warnings and perform dangerous operations
attr_accessor :force
alias_method :force?, :force
# What strategy the backup process should use
#
# @return [Strategy::STREAM|Strategy::COPY]
attr_accessor :strategy
# A list of all tasks and whether they can be skipped or not
#
# @return [SkippableTasks]
attr_accessor :skippable_tasks
# A list of all operations and whether they can be skipped or not
#
# @return [SkippableOperations]
attr_accessor :skippable_operations
# When using multiple repository storages, repositories can be backed up and restored in parallel
# This option allows to customize the overall limit.
#
# This is only used by repository backup and restore steps (GitalyBackup)
#
# @return [Integer|Nil] limit of backup or restore operations to happen in parallel overall
attr_accessor :max_parallelism
# When using multiple repository storages, repositories can be backed up and restored in parallel
# This option allows to customize the limit per storage.
#
# This is only used by repository backup and restore steps (GitalyBackup)
#
# @return [Integer|Nil] limit of backup or restore operations to happen in parallel per storage
attr_accessor :max_storage_parallelism
# When using multiple repository storages, repositories from specific storages can be backed up
# separately by running the backup operation while setting this option
#
# @return [Array<String>] a list of repository storages to be backed up
attr_accessor :repositories_storages
# In order to backup specific repositories, multiple paths containing the
# selected namespaces will be used to find which repositories to backup
#
# Ex: ['group-a', 'group-b/project-c'] will select all projects in group-a and project-c in group-b
# This can be combined with #skip_repositories_paths
#
# @return [Array<String>] a list of paths to select which repositories to backup
attr_accessor :repositories_paths
# In order to backup specific repositories, multiple paths containing the
# selected namespaces can be specified using #repositories_paths. To further
# refine the list, a new list of paths can be provided to be skipped among
# the previous pre-selected ones.
#
# Ex: for a repository_paths containing ['group-a'] and skip_repository_paths
# containing ['group-a/project-d'], all projects in `group-a` except `project-d`
# will be backed up
#
# @return [Array<String>] a list of paths to skip backup
attr_accessor :skip_repositories_paths
# Specify GitalyBackup to handle and perform backups server-side and stream it to object storage
#
# When this is defined, repositories will not be part of the backup archive
# @return [Boolean] whether to perform server-side backups for repositories
attr_accessor :repositories_server_side_backup
# A custom directory to send your remote backups to
# It can be used to group different types of backups (ex: daily, weekly)
#
# @return [String|Nil]
attr_accessor :remote_directory
# Custom compression and decompression options
#
# When compression is customized, it will ignore other related options like `:gzip_rsyncable`
# @return [CompressionOptions] custom compression and decompression commands
attr_accessor :compression_options
# Whether to run gzip with `--rsyncable` flag
#
# This is ignored if custom :compression_options are provided
# @return [Boolean] whether to use `--rsyncable` flag with gzip
attr_accessor :gzip_rsyncable
# If the container registry is using object storage, this is the bucket that is used
# @return [String|Nil]
attr_accessor :container_registry_bucket
# If we are backing up object storage in GCP, this is a file containing the service account credentials to use
attr_accessor :service_account_file
# rubocop:disable Metrics/ParameterLists -- This is a data object with all possible CMD options
def initialize(
backup_id: nil, previous_backup: nil, incremental: false, force: false, strategy: Strategy::STREAM,
skippable_tasks: SkippableTasks.new, skippable_operations: SkippableOperations.new,
max_parallelism: nil, max_storage_parallelism: nil,
repository_storages: [], repository_paths: [], skip_repository_paths: [],
repositories_server_side_backup: false, remote_directory: nil,
compression_options: CompressionOptions.new, gzip_rsyncable: false, container_registry_bucket: nil,
service_account_file: nil)
@backup_id = backup_id
@previous_backup = previous_backup
@incremental = incremental
@force = force
@strategy = strategy
@skippable_tasks = skippable_tasks
@skippable_operations = skippable_operations
@max_parallelism = max_parallelism
@max_storage_parallelism = max_storage_parallelism
@remote_directory = remote_directory
@repositories_server_side_backup = repositories_server_side_backup
@repositories_storages = repository_storages
@repositories_paths = repository_paths
@skip_repositories_paths = skip_repository_paths
@compression_options = compression_options
@gzip_rsyncable = gzip_rsyncable
@container_registry_bucket = container_registry_bucket
@service_account_file = service_account_file
end
# rubocop:enable Metrics/ParameterLists
# rubocop:disable Metrics/AbcSize -- TODO: Complexity will be solved in the Unified Backup implementation (https://gitlab.com/groups/gitlab-org/-/epics/11635)
# Extract supported options from defined ENV variables
def extract_from_env!
# We've used lowercase `force` as the key while ENV normally is defined using UPPERCASE letters
# This provides a fallback when the user defines using expected standards, while not breaking compatibility
force_value = ENV.key?('FORCE') ? ENV['FORCE'] : ENV['force']
self.backup_id = ENV['BACKUP']
self.previous_backup = ENV['PREVIOUS_BACKUP']
self.incremental = Gitlab::Utils.to_boolean(ENV['INCREMENTAL'], default: incremental)
self.force = Gitlab::Utils.to_boolean(force_value, default: force)
self.strategy = Strategy::COPY if ENV['STRATEGY'] == 'copy'
self.max_parallelism = ENV['GITLAB_BACKUP_MAX_CONCURRENCY']&.to_i
self.max_storage_parallelism = ENV['GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY']&.to_i
self.remote_directory = ENV['DIRECTORY']
self.repositories_server_side_backup = Gitlab::Utils.to_boolean(ENV['REPOSITORIES_SERVER_SIDE'],
default: repositories_server_side_backup)
self.repositories_storages = ENV.fetch('REPOSITORIES_STORAGES', '').split(',').uniq
self.repositories_paths = ENV.fetch('REPOSITORIES_PATHS', '').split(',').uniq
self.skip_repositories_paths = ENV.fetch('SKIP_REPOSITORIES_PATHS', '').split(',').uniq
compression_options.compression_cmd = ENV['COMPRESS_CMD']
compression_options.decompression_cmd = ENV['DECOMPRESS_CMD']
self.gzip_rsyncable = Gitlab::Utils.to_boolean(ENV['GZIP_RSYNCABLE'], default: gzip_rsyncable)
extract_skippables!(ENV['SKIP']) if ENV['SKIP'].present?
end
# rubocop:enable Metrics/AbcSize
def update_from_backup_information!(backup_information)
self.repositories_storages += backup_information[:repositories_storages]&.split(',') || []
self.repositories_storages.uniq!
self.repositories_storages.compact!
self.repositories_paths += backup_information[:repositories_paths]&.split(',') || []
self.repositories_paths.uniq!
self.repositories_paths.compact!
self.skip_repositories_paths += backup_information[:skip_repositories_paths]&.split(',') || []
self.skip_repositories_paths.uniq!
self.skip_repositories_paths.compact!
extract_skippables!(backup_information[:skipped]) if backup_information[:skipped]
end
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity -- TODO: Complexity will be solved in the Unified Backup implementation (https://gitlab.com/groups/gitlab-org/-/epics/11635)
# Return a String with a list of skippables, separated by commas
#
# @return [String] a list of skippables
def serialize_skippables
list = []
list << 'tar' if skippable_operations.archive
list << 'remote' if skippable_operations.remote_storage
list << 'db' if skippable_tasks.db
list << 'uploads' if skippable_tasks.uploads
list << 'builds' if skippable_tasks.builds
list << 'artifacts' if skippable_tasks.artifacts
list << 'lfs' if skippable_tasks.lfs
list << 'terraform_state' if skippable_tasks.terraform_state
list << 'registry' if skippable_tasks.registry
list << 'pages' if skippable_tasks.pages
list << 'repositories' if skippable_tasks.repositories
list << 'packages' if skippable_tasks.packages
list << 'ci_secure_files' if skippable_tasks.ci_secure_files
list << 'external_diffs' if skippable_tasks.external_diffs
list.join(',')
end
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
# Extract skippables from provided data field
# Current callers will provide either ENV['SKIP'] or backup_information[:skipped] content
#
# The first time the method is executed it will setup `true` or `false` to each field
# subsequent executions will preserve `true` values and evaluate again only when previously set to `false`
#
# @param [String] field contains a list separated by comma without surrounding spaces
def extract_skippables!(field)
list = field.split(',').uniq
extract_skippable_operations!(list)
extract_skippable_tasks(list)
end
def skip_task?(task_name)
!!skippable_tasks[task_name]
end
private
def extract_skippable_operations!(list)
skippable_operations.archive ||= list.include?('tar') # SKIP=tar
skippable_operations.remote_storage ||= list.include?('remote') # SKIP=remote
end
def extract_skippable_tasks(list)
skippable_tasks.db ||= list.include?('db') # SKIP=db
skippable_tasks.uploads ||= list.include?('uploads') # SKIP=uploads
skippable_tasks.builds ||= list.include?('builds') # SKIP=builds
skippable_tasks.artifacts ||= list.include?('artifacts') # SKIP=artifacts
skippable_tasks.lfs ||= list.include?('lfs') # SKIP=lfs
skippable_tasks.terraform_state ||= list.include?('terraform_state') # SKIP=terraform_state
skippable_tasks.registry ||= list.include?('registry') # SKIP=registry
skippable_tasks.pages ||= list.include?('pages') # SKIP=pages
skippable_tasks.repositories ||= list.include?('repositories') # SKIP=repositories
skippable_tasks.packages ||= list.include?('packages') # SKIP=packages
skippable_tasks.ci_secure_files ||= list.include?('ci_secure_files') # SKIP=ci_secure_files
skippable_tasks.external_diffs ||= list.include?('external_diffs') # SKIP=external_diffs
end
end
end
|