File: generate-selects-for-organization

package info (click to toggle)
gitlab 17.6.5-19
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 629,368 kB
  • sloc: ruby: 1,915,304; javascript: 557,307; sql: 60,639; xml: 6,509; sh: 4,567; makefile: 1,239; python: 406
file content (325 lines) | stat: -rwxr-xr-x 9,853 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
#!/usr/bin/env ruby
# frozen_string_literal: true

require 'optparse'
require 'open3'

module Cells
  class GenerateSelectsForOrganization
    attr_reader :organization_ids, :root_namespace_ids, :psql_command, :force

    def self.for_cli
      options = parse_options

      new(**options)
    end

    def self.parse_options
      options = {}

      OptionParser.new do |opts|
        opts.banner = "Usage: generate-selects-for-organization [options]"

        opts.on('-o', '--organization-ids IDS', 'Organization ID(s). Integer, or a comma-separated list of integers.
                                      Required if --root-namespace-ids is not specified.') do |value|
          options[:organization_ids] = value.split(',').map(&:to_i)
        end
        opts.on('-n', '--root-namespace-ids IDS', 'Root namespace ID(s). Integer, or a comma-separated list of integers.
                                      Required if --organization-ids is not specified.') do |value|
          options[:root_namespace_ids] = value.split(',').map(&:to_i)
        end
        opts.on('-p', '--psql COMMAND', 'Default: gdk psql -c') do |value|
          options[:psql_command] = value
        end
        opts.on('--debug', 'Enable debugging output') { options[:debug] = true }
        opts.on('--force', 'Ignore missing tables error (not recommended)') { options[:force] = true }
      end.parse!

      if options[:organization_ids].nil? && options[:root_namespace_ids].nil?
        raise '--organization-ids or --root-namespace-ids is required'
      end

      options
    end

    def initialize(
      organization_ids: nil, root_namespace_ids: nil, psql_command: 'gdk psql -c', debug: false,
      force: false)
      @organization_ids = organization_ids.sort if organization_ids
      @root_namespace_ids = root_namespace_ids.sort if root_namespace_ids
      @psql_command = psql_command
      @debug = debug
      @force = force
    end

    def execute
      if organization_ids
        print_selects_for_organizations
      else
        print_selects_for_root_namespaces
      end
    end

    def print_selects_for_organizations
      print_selects_by_organization_id(organization_ids)

      namespace_ids = get_namespace_ids_for_organizations(organization_ids)
      return unless namespace_ids.any?

      print_selects_by_namespace_id(namespace_ids)

      project_ids = get_project_ids_for_namespaces(namespace_ids)
      return unless project_ids.any?

      print_selects_by_project_id(project_ids)
    end

    def print_selects_for_root_namespaces
      namespace_ids = get_namespace_ids_for_root_namespaces(root_namespace_ids)
      print_selects_by_namespace_id(namespace_ids)

      project_ids = get_project_ids_for_namespaces(namespace_ids)
      return unless project_ids.any?

      print_selects_by_project_id(project_ids)
    end

    def print_selects_by_organization_id(organization_ids)
      print_selects_of_tables(
        with_column: 'organization_id', with_ids: organization_ids)
    end

    def print_selects_by_namespace_id(namespace_ids)
      print_selects_of_tables(
        with_column: 'namespace_id', with_ids: namespace_ids, without_columns: %w[organization_id])
    end

    def print_selects_by_project_id(project_ids)
      print_selects_of_tables(
        with_column: 'project_id', with_ids: project_ids, without_columns: %w[organization_id namespace_id])
    end

    def print_selects_of_tables(with_column: nil, with_ids: nil, without_columns: nil)
      table_names = get_table_names(with_column: with_column, without_columns: without_columns)
      puts "Initial table names: #{table_names}" if debug?

      table_dependencies = get_table_dependencies(table_names)
      puts "Table dependencies: #{table_dependencies}" if debug?

      ordered_tables = order_tables_by_dependencies(table_names, table_dependencies)
      puts "Ordered table names: #{ordered_tables}" if debug?

      return if debug?

      ordered_tables.each do |table_name|
        with_column_and_ids_clause = <<~SQL
          WHERE #{table_name}.#{with_column}
          IN (#{with_ids.join(',')})
        SQL

        copy_select_sql = <<~SQL
          COPY (
            SELECT #{table_name}.*
            FROM #{table_name}
            #{with_column_and_ids_clause if with_ids}
          )
          TO STDOUT
          WITH (FORMAT CSV, HEADER)
        SQL

        puts copy_select_sql.split.join(' ')
      end
    end

    def get_table_dependencies(table_names)
      table_names_str = table_names.map { |name| "'#{name}'" }.join(", ")

      command = <<-SQL
        COPY (
          SELECT conname AS constraint_name,
            conrelid::regclass AS table_name,
            confrelid::regclass AS foreign_table_name
          FROM pg_constraint
          WHERE contype = 'f'
            AND (conrelid::regclass::text IN (#{table_names_str})
              OR confrelid::regclass::text IN (#{table_names_str}))
        ) TO STDOUT
      SQL

      dependencies_str, status = psql(command)
      raise "Failed to get table dependencies" unless status.success?

      dependencies = dependencies_str.split("\n").map { |line| line.split("\t") }
      dependencies.group_by { |d| d[1] }.transform_values { |v| v.map { |d| d[2] } } # rubocop:disable Rails/Pluck -- not using Rails
    end

    def order_tables_by_dependencies(tables, dependencies)
      ordered = []
      visited = {}
      all_required_tables = dependencies.values.flatten.uniq

      cyclic_dependencies = []

      visit = ->(table, stack) do
        return if visited[table]

        if stack.include?(table)
          cycle = stack[stack.index(table)..] + [table]
          cyclic_dependencies << cycle
          return
        end

        stack.push(table)

        (dependencies[table] || []).each do |dependency|
          visit.call(dependency, stack)
        end

        stack.pop
        visited[table] = true
        ordered << table
      end

      # Sort the tables by the number of dependencies in descending order
      tables_sorted_by_dependencies = tables.sort_by { |table| -(dependencies[table] || []).size }

      tables_sorted_by_dependencies.each do |table|
        visit.call(table, [])
      end

      # Filter only those tables that are in get_table_names
      ordered_tables = ordered.select { |table| tables.include?(table) }

      # Output cyclic dependencies
      puts "[WARNING] Cyclic dependencies detected: #{cyclic_dependencies}" if cyclic_dependencies.any? && debug?

      # Check for missing tables
      missing_tables = all_required_tables - tables
      abort "[ERROR] Missing tables to cover all dependencies: #{missing_tables}" if missing_tables.any? && !force

      ordered_tables
    end

    def get_namespace_ids_for_root_namespaces(root_namespace_ids)
      command = <<-SQL
        COPY (
          SELECT DISTINCT namespaces.id
          FROM namespaces
          WHERE namespaces.traversal_ids[1] IN (#{root_namespace_ids.join(',')})
          ORDER BY namespaces.id ASC
        )
        TO STDOUT
      SQL

      namespace_ids_str, status = psql(command)
      raise "Failed to get namespace ids for root namespaces" unless status.success?

      namespace_ids_str.split("\n")
    end

    def get_table_names(with_column: nil, without_columns: nil)
      command = <<-SQL
        COPY (
          SELECT DISTINCT table_name
          FROM information_schema.columns
          WHERE
            table_schema = 'public'
            #{with_column_clause(with_column)}
            #{without_columns_clause(without_columns)}
        ) TO STDOUT
      SQL

      table_names_str, status = psql(command)
      raise "Failed to get tables with #{with_column} column" unless status.success?

      table_names_str.split("\n")
    end

    def with_column_clause(with_column)
      return unless with_column

      "AND column_name = '#{with_column}'"
    end

    def without_columns_clause(without_columns)
      return unless without_columns

      <<-SQL
        AND table_name NOT IN (
          SELECT table_name
          FROM information_schema.columns
          WHERE
            table_schema = 'public'
            AND column_name IN ('#{without_columns.join("','")}')
        )
      SQL
    end

    # @return [Array<String>]
    def get_namespace_ids_for_organizations(organization_ids)
      namespace_ids_str, status = psql(
        <<~SQL
          COPY (
            SELECT namespaces.id
            FROM namespaces
            WHERE namespaces.organization_id
            IN (#{organization_ids.join(',')})
            ORDER BY namespaces.id ASC
          )
          TO STDOUT
        SQL
      )
      raise "Failed to get namespace ids for organizations" unless status.success?

      namespace_ids_str.split("\n")
    end

    def get_project_ids_for_namespaces(namespace_ids)
      project_ids_str, status = psql(
        <<~SQL
          COPY (
            SELECT projects.id
            FROM projects
            WHERE projects.namespace_id
            IN (#{namespace_ids.join(',')})
            ORDER BY projects.id ASC
          )
          TO STDOUT
        SQL
      )
      raise "Failed to get project ids for organizations" unless status.success?

      project_ids_str.split("\n")
    end

    def psql(query)
      command = psql_command.split << query.split.join(' ')

      capture2(command)
    end

    def capture2(command)
      debug %(Run command: #{command.inspect})

      stdout_str, status = Open3.capture2(*command)

      debug %(Run command: stdout: "#{stdout_str}", exitstatus: "#{status.exitstatus}")

      [stdout_str, status]
    end

    def debug(output)
      puts "[DEBUG] #{output}" if debug?
    end

    def debug?
      !!@debug
    end

    def force?
      !!@force
    end
  end
end

Cells::GenerateSelectsForOrganization.for_cli.execute