1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
|
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'optparse'
require 'open3'
module Cells
class GenerateSelectsForOrganization
attr_reader :organization_ids, :root_namespace_ids, :psql_command, :force
def self.for_cli
options = parse_options
new(**options)
end
def self.parse_options
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: generate-selects-for-organization [options]"
opts.on('-o', '--organization-ids IDS', 'Organization ID(s). Integer, or a comma-separated list of integers.
Required if --root-namespace-ids is not specified.') do |value|
options[:organization_ids] = value.split(',').map(&:to_i)
end
opts.on('-n', '--root-namespace-ids IDS', 'Root namespace ID(s). Integer, or a comma-separated list of integers.
Required if --organization-ids is not specified.') do |value|
options[:root_namespace_ids] = value.split(',').map(&:to_i)
end
opts.on('-p', '--psql COMMAND', 'Default: gdk psql -c') do |value|
options[:psql_command] = value
end
opts.on('--debug', 'Enable debugging output') { options[:debug] = true }
opts.on('--force', 'Ignore missing tables error (not recommended)') { options[:force] = true }
end.parse!
if options[:organization_ids].nil? && options[:root_namespace_ids].nil?
raise '--organization-ids or --root-namespace-ids is required'
end
options
end
def initialize(
organization_ids: nil, root_namespace_ids: nil, psql_command: 'gdk psql -c', debug: false,
force: false)
@organization_ids = organization_ids.sort if organization_ids
@root_namespace_ids = root_namespace_ids.sort if root_namespace_ids
@psql_command = psql_command
@debug = debug
@force = force
end
def execute
if organization_ids
print_selects_for_organizations
else
print_selects_for_root_namespaces
end
end
def print_selects_for_organizations
print_selects_by_organization_id(organization_ids)
namespace_ids = get_namespace_ids_for_organizations(organization_ids)
return unless namespace_ids.any?
print_selects_by_namespace_id(namespace_ids)
project_ids = get_project_ids_for_namespaces(namespace_ids)
return unless project_ids.any?
print_selects_by_project_id(project_ids)
end
def print_selects_for_root_namespaces
namespace_ids = get_namespace_ids_for_root_namespaces(root_namespace_ids)
print_selects_by_namespace_id(namespace_ids)
project_ids = get_project_ids_for_namespaces(namespace_ids)
return unless project_ids.any?
print_selects_by_project_id(project_ids)
end
def print_selects_by_organization_id(organization_ids)
print_selects_of_tables(
with_column: 'organization_id', with_ids: organization_ids)
end
def print_selects_by_namespace_id(namespace_ids)
print_selects_of_tables(
with_column: 'namespace_id', with_ids: namespace_ids, without_columns: %w[organization_id])
end
def print_selects_by_project_id(project_ids)
print_selects_of_tables(
with_column: 'project_id', with_ids: project_ids, without_columns: %w[organization_id namespace_id])
end
def print_selects_of_tables(with_column: nil, with_ids: nil, without_columns: nil)
table_names = get_table_names(with_column: with_column, without_columns: without_columns)
puts "Initial table names: #{table_names}" if debug?
table_dependencies = get_table_dependencies(table_names)
puts "Table dependencies: #{table_dependencies}" if debug?
ordered_tables = order_tables_by_dependencies(table_names, table_dependencies)
puts "Ordered table names: #{ordered_tables}" if debug?
return if debug?
ordered_tables.each do |table_name|
with_column_and_ids_clause = <<~SQL
WHERE #{table_name}.#{with_column}
IN (#{with_ids.join(',')})
SQL
copy_select_sql = <<~SQL
COPY (
SELECT #{table_name}.*
FROM #{table_name}
#{with_column_and_ids_clause if with_ids}
)
TO STDOUT
WITH (FORMAT CSV, HEADER)
SQL
puts copy_select_sql.split.join(' ')
end
end
def get_table_dependencies(table_names)
table_names_str = table_names.map { |name| "'#{name}'" }.join(", ")
command = <<-SQL
COPY (
SELECT conname AS constraint_name,
conrelid::regclass AS table_name,
confrelid::regclass AS foreign_table_name
FROM pg_constraint
WHERE contype = 'f'
AND (conrelid::regclass::text IN (#{table_names_str})
OR confrelid::regclass::text IN (#{table_names_str}))
) TO STDOUT
SQL
dependencies_str, status = psql(command)
raise "Failed to get table dependencies" unless status.success?
dependencies = dependencies_str.split("\n").map { |line| line.split("\t") }
dependencies.group_by { |d| d[1] }.transform_values { |v| v.map { |d| d[2] } } # rubocop:disable Rails/Pluck -- not using Rails
end
def order_tables_by_dependencies(tables, dependencies)
ordered = []
visited = {}
all_required_tables = dependencies.values.flatten.uniq
cyclic_dependencies = []
visit = ->(table, stack) do
return if visited[table]
if stack.include?(table)
cycle = stack[stack.index(table)..] + [table]
cyclic_dependencies << cycle
return
end
stack.push(table)
(dependencies[table] || []).each do |dependency|
visit.call(dependency, stack)
end
stack.pop
visited[table] = true
ordered << table
end
# Sort the tables by the number of dependencies in descending order
tables_sorted_by_dependencies = tables.sort_by { |table| -(dependencies[table] || []).size }
tables_sorted_by_dependencies.each do |table|
visit.call(table, [])
end
# Filter only those tables that are in get_table_names
ordered_tables = ordered.select { |table| tables.include?(table) }
# Output cyclic dependencies
puts "[WARNING] Cyclic dependencies detected: #{cyclic_dependencies}" if cyclic_dependencies.any? && debug?
# Check for missing tables
missing_tables = all_required_tables - tables
abort "[ERROR] Missing tables to cover all dependencies: #{missing_tables}" if missing_tables.any? && !force
ordered_tables
end
def get_namespace_ids_for_root_namespaces(root_namespace_ids)
command = <<-SQL
COPY (
SELECT DISTINCT namespaces.id
FROM namespaces
WHERE namespaces.traversal_ids[1] IN (#{root_namespace_ids.join(',')})
ORDER BY namespaces.id ASC
)
TO STDOUT
SQL
namespace_ids_str, status = psql(command)
raise "Failed to get namespace ids for root namespaces" unless status.success?
namespace_ids_str.split("\n")
end
def get_table_names(with_column: nil, without_columns: nil)
command = <<-SQL
COPY (
SELECT DISTINCT table_name
FROM information_schema.columns
WHERE
table_schema = 'public'
#{with_column_clause(with_column)}
#{without_columns_clause(without_columns)}
) TO STDOUT
SQL
table_names_str, status = psql(command)
raise "Failed to get tables with #{with_column} column" unless status.success?
table_names_str.split("\n")
end
def with_column_clause(with_column)
return unless with_column
"AND column_name = '#{with_column}'"
end
def without_columns_clause(without_columns)
return unless without_columns
<<-SQL
AND table_name NOT IN (
SELECT table_name
FROM information_schema.columns
WHERE
table_schema = 'public'
AND column_name IN ('#{without_columns.join("','")}')
)
SQL
end
# @return [Array<String>]
def get_namespace_ids_for_organizations(organization_ids)
namespace_ids_str, status = psql(
<<~SQL
COPY (
SELECT namespaces.id
FROM namespaces
WHERE namespaces.organization_id
IN (#{organization_ids.join(',')})
ORDER BY namespaces.id ASC
)
TO STDOUT
SQL
)
raise "Failed to get namespace ids for organizations" unless status.success?
namespace_ids_str.split("\n")
end
def get_project_ids_for_namespaces(namespace_ids)
project_ids_str, status = psql(
<<~SQL
COPY (
SELECT projects.id
FROM projects
WHERE projects.namespace_id
IN (#{namespace_ids.join(',')})
ORDER BY projects.id ASC
)
TO STDOUT
SQL
)
raise "Failed to get project ids for organizations" unless status.success?
project_ids_str.split("\n")
end
def psql(query)
command = psql_command.split << query.split.join(' ')
capture2(command)
end
def capture2(command)
debug %(Run command: #{command.inspect})
stdout_str, status = Open3.capture2(*command)
debug %(Run command: stdout: "#{stdout_str}", exitstatus: "#{status.exitstatus}")
[stdout_str, status]
end
def debug(output)
puts "[DEBUG] #{output}" if debug?
end
def debug?
!!@debug
end
def force?
!!@force
end
end
end
Cells::GenerateSelectsForOrganization.for_cli.execute
|