File: automated_cleanup.rb

package info (click to toggle)
gitlab 17.6.5-19
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 629,368 kB
  • sloc: ruby: 1,915,304; javascript: 557,307; sql: 60,639; xml: 6,509; sh: 4,567; makefile: 1,239; python: 406
file content (234 lines) | stat: -rwxr-xr-x 8,132 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env ruby

# frozen_string_literal: true

# We need to take some precautions when using the `gitlab` gem in this project.
#
# See https://docs.gitlab.com/ee/development/pipelines/internals.html#using-the-gitlab-ruby-gem-in-the-canonical-project.
require 'gitlab'
require 'optparse'
require 'time'

require_relative File.expand_path('../../tooling/lib/tooling/helm3_client.rb', __dir__)
require_relative File.expand_path('../../tooling/lib/tooling/kubernetes_client.rb', __dir__)

module ReviewApps
  class AutomatedCleanup
    ENVIRONMENTS_PER_PAGE = 100
    IGNORED_HELM_ERRORS = [
      'transport is closing',
      'error upgrading connection',
      'not found'
    ].freeze
    IGNORED_KUBERNETES_ERRORS = [
      'NotFound'
    ].freeze
    ENVIRONMENTS_NOT_FOUND_THRESHOLD = 3

    def self.parse_args(argv)
      options = {
        dry_run: false
      }

      OptionParser.new do |opts|
        opts.on("-d BOOLEAN", "--dry-run BOOLEAN", String, "Whether to perform a dry-run or not.") do |value|
          options[:dry_run] = true if value == 'true'
        end

        opts.on("-h", "--help", "Prints this help") do
          puts opts
          exit
        end
      end.parse!(argv)

      options
    end

    # $GITLAB_PROJECT_REVIEW_APP_CLEANUP_API_TOKEN => `Automated Review App Cleanup` project token
    def initialize(
      project_path: ENV['CI_PROJECT_PATH'],
      gitlab_token: ENV['GITLAB_PROJECT_REVIEW_APP_CLEANUP_API_TOKEN'],
      api_endpoint: ENV['CI_API_V4_URL'],
      options: {}
    )
      @project_path                     = project_path
      @gitlab_token                     = gitlab_token
      @api_endpoint                     = api_endpoint
      @dry_run                          = options[:dry_run]
    end

    def gitlab
      @gitlab ||= begin
        Gitlab.configure do |config|
          config.endpoint = api_endpoint
          # gitlab-bot's token "GitLab review apps cleanup"
          config.private_token = gitlab_token
        end

        Gitlab
      end
    end

    def helm
      @helm ||= Tooling::Helm3Client.new
    end

    def kubernetes
      @kubernetes ||= Tooling::KubernetesClient.new
    end

    def perform_gitlab_environment_cleanup!(env_prefix:, days_for_delete:)
      puts "Dry-run mode." if dry_run
      puts "Checking for GitLab #{env_prefix} environments deployed more than #{days_for_delete} days ago..."

      delete_threshold = threshold_time(days: days_for_delete)

      gitlab.environments(project_path, per_page: ENVIRONMENTS_PER_PAGE, sort: 'desc', search: env_prefix).auto_paginate do |environment|
        next unless environment.name.start_with?(env_prefix)
        # TODO: Find a way to reset those, so that we can properly delete them.
        next if environment.state == 'stopping' # We cannot delete environments in stopping state
        next if Time.parse(environment.created_at) > delete_threshold

        stop_environment(environment)
        delete_environment(environment)
      end
    end

    def perform_helm_releases_cleanup!(days:)
      puts "Dry-run mode." if dry_run
      puts "Checking for Helm releases that are failed or not updated in the last #{days} days..."

      threshold = threshold_time(days: days)

      releases_to_delete = []

      helm_releases.each do |release|
        # Prevents deleting `dns-gitlab-review-app` releases or other unrelated releases
        next unless Tooling::KubernetesClient::K8S_ALLOWED_NAMESPACES_REGEX.match?(release.namespace)
        next unless release.name.start_with?('review-')

        if release.status == 'failed' || release.last_update < threshold
          releases_to_delete << release
        else
          print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'leaving')
        end
      end

      delete_helm_releases(releases_to_delete)
    end

    def perform_stale_namespace_cleanup!(days:)
      puts "Dry-run mode." if dry_run

      kubernetes.cleanup_namespaces_by_created_at(created_before: threshold_time(days: days)) unless dry_run
    end

    private

    attr_reader :api_endpoint, :dry_run, :gitlab_token, :project_path

    def stop_environment(environment)
      return if environment.state == 'stopped' || environment.state == 'stopping'

      print_release_state(subject: 'GitLab Environment', release_name: environment.slug, release_date: environment.created_at, action: 'stopping')
      gitlab.stop_environment(project_path, environment.id) unless dry_run
    rescue Gitlab::Error::Forbidden
      puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) is forbidden: skipping it"
    end

    def delete_environment(environment)
      return if environment.state == 'stopping'

      print_release_state(subject: 'GitLab environment', release_name: environment.slug, release_date: environment.created_at, action: 'deleting')
      gitlab.delete_environment(project_path, environment.id) unless dry_run
    rescue Gitlab::Error::NotFound
      puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) was not found: ignoring it"
    rescue Gitlab::Error::Forbidden
      puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) is forbidden: skipping it"
    rescue Gitlab::Error::InternalServerError
      puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) 500 error: ignoring it"
    end

    def helm_releases
      args = ['--all', '--all-namespaces', '--date']

      helm.releases(args: args)
    end

    def delete_helm_releases(releases)
      return if releases.empty?

      releases.each do |release|
        print_release_state(subject: 'Release', release_name: release.name, release_status: release.status, release_date: release.last_update, action: 'cleaning')
      end

      releases_names = releases.map(&:name)
      unless dry_run
        helm.delete(release_name: releases_names)
        kubernetes.delete_namespaces(releases_names)
      end

    rescue Tooling::Helm3Client::CommandFailedError => ex
      raise ex unless ignore_exception?(ex.message, IGNORED_HELM_ERRORS)

      puts "Ignoring the following Helm error:\n#{ex}\n"
    rescue Tooling::KubernetesClient::CommandFailedError => ex
      raise ex unless ignore_exception?(ex.message, IGNORED_KUBERNETES_ERRORS)

      puts "Ignoring the following Kubernetes error:\n#{ex}\n"
    end

    def threshold_time(days:)
      days_integer = days.to_i

      raise "days should be an integer between 1 and 365 inclusive! Got #{days_integer}" unless days_integer.between?(1, 365)

      Time.now - (days_integer * 24 * 3600)
    end

    def ignore_exception?(exception_message, exceptions_ignored)
      exception_message.match?(/(#{exceptions_ignored})/)
    end

    def print_release_state(subject:, release_name:, release_date:, action:, release_status: nil)
      puts "\n#{subject} '#{release_name}' #{"(#{release_status}) " if release_status}was last deployed on #{release_date}: #{action} it.\n"
    end
  end
end

def timed(task)
  start = Time.now
  yield(self)
  puts "#{task} finished in #{Time.now - start} seconds.\n"
end

if $PROGRAM_NAME == __FILE__
  options           = ReviewApps::AutomatedCleanup.parse_args(ARGV)
  automated_cleanup = ReviewApps::AutomatedCleanup.new(options: options)

  puts

  timed('Helm releases cleanup') do
    automated_cleanup.perform_helm_releases_cleanup!(days: 2)
  end

  puts

  timed('Review Apps Environments cleanup') do
    automated_cleanup.perform_gitlab_environment_cleanup!(env_prefix: 'review/', days_for_delete: 14)
  end

  timed('Docs Review Apps environments cleanup') do
    automated_cleanup.perform_gitlab_environment_cleanup!(env_prefix: 'review-docs/', days_for_delete: 30)
  end

  timed('as-if-foss Environments cleanup') do
    automated_cleanup.perform_gitlab_environment_cleanup!(env_prefix: 'as-if-foss/', days_for_delete: 30)
  end

  puts

  timed('Stale Namespace cleanup') do
    automated_cleanup.perform_stale_namespace_cleanup!(days: 3)
  end
end