File: db_cleaner.rb

package info (click to toggle)
gitlab 17.6.5-19
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 629,368 kB
  • sloc: ruby: 1,915,304; javascript: 557,307; sql: 60,639; xml: 6,509; sh: 4,567; makefile: 1,239; python: 406
file content (137 lines) | stat: -rw-r--r-- 5,087 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# frozen_string_literal: true

module DbCleaner
  def all_connection_classes
    ::TestProf::BeforeAll::Adapters::ActiveRecord.all_connections.map(&:connection_class).uniq
  end

  def delete_from_all_tables!(except: [])
    except << 'ar_internal_metadata'

    DatabaseCleaner.clean_with(:deletion, cache_tables: false, except: except)
  end

  def deletion_except_tables
    %w[
      work_item_types work_item_hierarchy_restrictions
      work_item_widget_definitions work_item_related_link_restrictions
    ]
  end

  def setup_database_cleaner
    all_connection_classes.each do |connection_class|
      DatabaseCleaner[:active_record, db: connection_class]
    end
  end

  def any_connection_class_with_more_than_allowed_columns?
    all_connection_classes.any? do |connection_class|
      more_than_allowed_columns?(connection_class)
    end
  end

  def more_than_allowed_columns?(connection_class)
    # Postgres maximum number of columns in a table is 1600 (https://github.com/postgres/postgres/blob/de41869b64d57160f58852eab20a27f248188135/src/include/access/htup_details.h#L23-L47).
    # And since:
    # "The DROP COLUMN form does not physically remove the column, but simply makes
    # it invisible to SQL operations. Subsequent insert and update operations in the
    # table will store a null value for the column. Thus, dropping a column is quick
    # but it will not immediately reduce the on-disk size of your table, as the space
    # occupied by the dropped column is not reclaimed.
    # The space will be reclaimed over time as existing rows are updated."
    # according to https://www.postgresql.org/docs/current/sql-altertable.html.
    # We drop and recreate the database if any table has more than 1200 columns, just to be safe.
    max_allowed_columns = 1200
    tables_with_more_than_allowed_columns = connection_class.connection.execute(<<-SQL)
      SELECT attrelid::regclass::text AS table, COUNT(*) AS column_count
        FROM pg_attribute
        GROUP BY attrelid
        HAVING COUNT(*) > #{max_allowed_columns}
    SQL

    tables_with_more_than_allowed_columns.each do |result|
      puts "The #{result['table']} (#{connection_class.connection_db_config.name}) table has #{result['column_count']} columns."
    end

    tables_with_more_than_allowed_columns.any?
  end

  def recreate_all_databases!
    start = ::Gitlab::Metrics::System.monotonic_time

    puts "Recreating the database"

    force_disconnect_all_connections!

    ActiveRecord::Tasks::DatabaseTasks.drop_current
    ActiveRecord::Tasks::DatabaseTasks.create_current
    ActiveRecord::Tasks::DatabaseTasks.load_schema_current

    # Migrate each database individually
    with_reestablished_active_record_base do
      all_connection_classes.each do |connection_class|
        ActiveRecord::Base.establish_connection(connection_class.connection_db_config) # rubocop: disable Database/EstablishConnection

        ActiveRecord::Tasks::DatabaseTasks.migrate
      end
    end

    disable_ddl_was = Feature.enabled?(:disallow_database_ddl_feature_flags, type: :ops)
    stub_feature_flags(disallow_database_ddl_feature_flags: false)
    Gitlab::Database::Partitioning.sync_partitions_ignore_db_error
    stub_feature_flags(disallow_database_ddl_feature_flags: disable_ddl_was)

    puts "Databases re-creation done in #{::Gitlab::Metrics::System.monotonic_time - start}"
  end

  def recreate_databases_and_seed_if_needed
    # Postgres maximum number of columns in a table is 1600 (https://github.com/postgres/postgres/blob/de41869b64d57160f58852eab20a27f248188135/src/include/access/htup_details.h#L23-L47).
    # We drop and recreate the database if any table has more than 1200 columns, just to be safe.
    return false unless any_connection_class_with_more_than_allowed_columns?

    recreate_all_databases!

    # Seed required data as recreating DBs will delete it
    TestEnv.seed_db

    true
  end

  def force_disconnect_all_connections!
    cmd = <<~SQL
      SELECT pg_terminate_backend(pg_stat_activity.pid)
      FROM pg_stat_activity
      WHERE datname = current_database()
        AND pid <> pg_backend_pid();
    SQL

    Gitlab::Database::EachDatabase.each_connection(include_shared: false) do |connection|
      connection.execute(cmd)
    end

    ActiveRecord::Base.clear_all_connections! # rubocop:disable Database/MultipleDatabases
  end
end

DbCleaner.prepend_mod_with('DbCleaner')

# We patch the establish_master_connection so that it establishes a connection
# using a ActiveRecord::DatabaseConfigurations::HashConfig instead of a hash.
#
# Using a HashConfig avoids resetting the name of the connection.
module PostgreSQLDatabaseTasksPatch
  def establish_master_connection
    establish_connection(
      ActiveRecord::DatabaseConfigurations::HashConfig.new(
        db_config.env_name,
        db_config.name,
        db_config.configuration_hash.merge(
          database: "postgres",
          schema_search_path: "public"
        )
      )
    )
  end
end

ActiveRecord::Tasks::PostgreSQLDatabaseTasks.prepend(PostgreSQLDatabaseTasksPatch)