File: files.rb

package info (click to toggle)
gitlab 17.6.5-19
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 629,368 kB
  • sloc: ruby: 1,915,304; javascript: 557,307; sql: 60,639; xml: 6,509; sh: 4,567; makefile: 1,239; python: 406
file content (222 lines) | stat: -rw-r--r-- 7,462 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# frozen_string_literal: true

require 'open3'

module Backup
  module Targets
    class Files < Target
      extend ::Gitlab::Utils::Override
      include Backup::Helper

      DEFAULT_EXCLUDE = ['lost+found'].freeze

      # Use the content from stdin instead of an actual filepath (used by tar as input or output)
      USE_STDIN = '-'

      attr_reader :excludes

      # @param [IO] progress
      # @param [String] storage_path
      # @param [::Backup::Options] options
      # @param [Array] excludes
      def initialize(progress, storage_path, options:, excludes: [])
        super(progress, options: options)

        @storage_path = storage_path
        @excludes = excludes
      end

      # Copy files from public/files to backup/files
      override :dump

      def dump(backup_tarball, _)
        FileUtils.mkdir_p(backup_basepath)
        FileUtils.rm_f(backup_tarball)

        tar_utils = ::Gitlab::Backup::Cli::Utils::Tar.new
        shell_pipeline = ::Gitlab::Backup::Cli::Shell::Pipeline
        compress_command = ::Gitlab::Backup::Cli::Shell::Command.new(compress_cmd)

        if options.strategy == ::Backup::Options::Strategy::COPY
          cmd = [%w[rsync -a --delete], exclude_dirs_rsync, %W[#{storage_realpath} #{backup_basepath}]].flatten
          output, status = Gitlab::Popen.popen(cmd)

          # Retry if rsync source files vanish
          if status == 24
            $stdout.puts "Warning: files vanished during rsync, retrying..."
            output, status = Gitlab::Popen.popen(cmd)
          end

          unless status == 0
            puts output
            raise_custom_error(backup_tarball)
          end

          archive_file = [backup_tarball, 'w', 0o600]
          tar_command = tar_utils.pack_cmd(
            archive_file: USE_STDIN,
            target_directory: backup_files_realpath,
            target: '.',
            excludes: excludes)
          result = shell_pipeline.new(tar_command, compress_command).run!(output: archive_file)

          FileUtils.rm_rf(backup_files_realpath)
        else
          archive_file = [backup_tarball, 'w', 0o600]
          tar_command = tar_utils.pack_cmd(
            archive_file: USE_STDIN,
            target_directory: storage_realpath,
            target: '.',
            excludes: excludes)

          result = shell_pipeline.new(tar_command, compress_command).run!(output: archive_file)
        end

        success = pipeline_succeeded?(
          tar_status: result.status_list[0],
          compress_status: result.status_list[1],
          output: result.stderr)

        raise_custom_error(backup_tarball) unless success
      end

      override :restore

      def restore(backup_tarball, _)
        backup_existing_files_dir(backup_tarball)

        tar_utils = ::Gitlab::Backup::Cli::Utils::Tar.new
        shell_pipeline = ::Gitlab::Backup::Cli::Shell::Pipeline
        decompress_command = ::Gitlab::Backup::Cli::Shell::Command.new(decompress_cmd)

        archive_file = backup_tarball.to_s
        tar_command = tar_utils.extract_cmd(
          archive_file: USE_STDIN,
          target_directory: storage_realpath)

        result = shell_pipeline.new(decompress_command, tar_command).run!(input: archive_file)

        success = pipeline_succeeded?(
          compress_status: result.status_list[0],
          tar_status: result.status_list[1],
          output: result.stderr)

        raise Backup::Error, "Restore operation failed: #{result.stderr}" unless success
      end

      def backup_existing_files_dir(backup_tarball)
        name = File.basename(backup_tarball, '.tar.gz')
        timestamped_files_path = backup_basepath.join('tmp', "#{name}.#{Time.now.to_i}")

        return unless File.exist?(storage_realpath)

        # Move all files in the existing repos directory except . and .. to
        # repositories.<timestamp> directory
        FileUtils.mkdir_p(timestamped_files_path, mode: 0o700)

        dot_references = [File.join(storage_realpath, "."), File.join(storage_realpath, "..")]
        matching_files = Dir.glob(File.join(storage_realpath, "*"), File::FNM_DOTMATCH)
        files = matching_files - dot_references

        FileUtils.mv(files, timestamped_files_path)
      rescue Errno::EACCES
        access_denied_error(storage_realpath)
      rescue Errno::EBUSY
        resource_busy_error(storage_realpath)
      end

      def noncritical_warning?(warning)
        noncritical_warnings = [
          /^g?tar: \.: Cannot mkdir: No such file or directory$/
        ]

        noncritical_warnings.map { |w| warning =~ w }.any?
      end

      def pipeline_succeeded?(tar_status:, compress_status:, output:)
        return false unless compress_status&.success?

        tar_status&.success? || tar_ignore_non_success?(tar_status.exitstatus, output)
      end

      def tar_ignore_non_success?(exitstatus, output)
        # tar can exit with nonzero code:
        #  1 - if some files changed (i.e. a CI job is currently writes to log)
        #  2 - if it cannot create `.` directory (see issue https://gitlab.com/gitlab-org/gitlab/-/issues/22442)
        #  http://www.gnu.org/software/tar/manual/html_section/tar_19.html#Synopsis
        #  so check tar status 1 or stderr output against some non-critical warnings
        if exitstatus == 1
          $stdout.puts "Ignoring tar exit status 1 'Some files differ': #{output}"
          return true
        end

        # allow tar to fail with other non-success status if output contain non-critical warning
        if noncritical_warning?(output)
          $stdout.puts(
            "Ignoring non-success exit status #{exitstatus} due to output of non-critical warning(s): #{output}")
          return true
        end

        false
      end

      def exclude_dirs_rsync
        default = DEFAULT_EXCLUDE.map { |entry| "--exclude=#{entry}" }

        basepath = Pathname(File.basename(storage_realpath))

        default.concat(excludes.map { |entry| "--exclude=/#{basepath.join(entry)}" })
      end

      def raise_custom_error(backup_tarball)
        raise FileBackupError.new(storage_realpath, backup_tarball)
      end

      def asynchronous?
        false
      end

      private

      def storage_realpath
        @storage_realpath ||= File.realpath(@storage_path)
      end

      def backup_files_realpath
        @backup_files_realpath ||= backup_basepath.join(File.basename(@storage_path))
      end

      def backup_basepath
        Pathname(Gitlab.config.backup.path)
      end

      def access_denied_error(path)
        message = <<~ERROR

        ### NOTICE ###
        As part of restore, the task tried to move existing content from #{path}.
        However, it seems that directory contains files/folders that are not owned
        by the user #{Gitlab.config.gitlab.user}. To proceed, please move the files
        or folders inside #{path} to a secure location so that #{path} is empty and
        run restore task again.

        ERROR
        raise message
      end

      def resource_busy_error(path)
        message = <<~ERROR

        ### NOTICE ###
        As part of restore, the task tried to rename `#{path}` before restoring.
        This could not be completed, perhaps `#{path}` is a mountpoint?

        To complete the restore, please move the contents of `#{path}` to a
        different location and run the restore task again.

        ERROR
        raise message
      end
    end
  end
end