1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
|
# -*- ruby -*-
# vim: set nosta noet ts=4 sw=4:
#
# Script to automatically move partitioned tables and their indexes
# to a separate area on disk.
#
# Mahlon E. Smith <mahlon@martini.nu>
#
# Example use case:
#
# - You've got a heavy insert table, such as syslog data.
# - This table has a partitioning trigger (or is manually partitioned)
# by date, to separate incoming stuff from archival/report stuff.
# - You have a tablespace on cheap or slower disk (maybe even
# ZFS compressed, or some such!)
#
# The only assumption this script makes is that your tables are dated, and
# the tablespace they're moving into already exists.
#
# A full example, using the syslog idea from above, where each child
# table is date partitioned by a convention of "syslog_YEAR-WEEKOFYEAR":
#
# syslog # <--- parent
# syslog_2012_06 # <--- inherited
# syslog_2012_07 # <--- inherited
# syslog_2012_08 # <--- inherited
# ...
#
# You'd run this script like so:
#
# ./warehouse_partitions.rb -F syslog_%Y_%U
#
# Assuming this was week 12 of the year, tables syslog_2012_06 through
# syslog_2012_11 would start sequentially migrating into the tablespace
# called 'warehouse'.
#
require 'date'
require 'ostruct'
require 'optparse'
require 'pathname'
require 'etc'
require 'pg'
### A tablespace migration class.
###
class PGWarehouse
def initialize( opts )
@opts = opts
@db = PG.connect(
:dbname => opts.database,
:host => opts.host,
:port => opts.port,
:user => opts.user,
:password => opts.pass,
:sslmode => 'prefer'
)
@db.exec "SET search_path TO %s" % [ opts.schema ] if opts.schema
@relations = self.relations
end
attr_reader :db
######
public
######
### Perform the tablespace moves.
###
def migrate
if @relations.empty?
$stderr.puts 'No tables were found for warehousing.'
return
end
$stderr.puts "Found %d relation%s to move." % [ relations.length, relations.length == 1 ? '' : 's' ]
@relations.sort_by{|_,v| v[:name] }.each do |_, val|
$stderr.print " - Moving table '%s' to '%s'... " % [
val[:name], @opts.tablespace
]
if @opts.dryrun
$stderr.puts '(not really)'
else
age = self.timer do
db.exec "ALTER TABLE %s SET TABLESPACE %s;" % [
val[:name], @opts.tablespace
]
end
puts age
end
val[ :indexes ].each do |idx|
$stderr.print " - Moving index '%s' to '%s'... " % [
idx, @opts.tablespace
]
if @opts.dryrun
$stderr.puts '(not really)'
else
age = self.timer do
db.exec "ALTER INDEX %s SET TABLESPACE %s;" % [
idx, @opts.tablespace
]
end
puts age
end
end
end
end
#########
protected
#########
### Get OIDs and current tablespaces for everything under the
### specified schema.
###
def relations
return @relations if @relations
relations = {}
query = %q{
SELECT c.oid AS oid,
c.relname AS name,
c.relkind AS kind,
t.spcname AS tspace
FROM pg_class AS c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
LEFT JOIN pg_tablespace t ON t.oid = c.reltablespace
WHERE c.relkind = 'r' }
query << "AND n.nspname='#{@opts.schema}'" if @opts.schema
# Get the relations list, along with each element's current tablespace.
#
self.db.exec( query ) do |res|
res.each do |row|
relations[ row['oid'] ] = {
:name => row['name'],
:tablespace => row['tspace'],
:indexes => [],
:parent => nil
}
end
end
# Add table inheritance information.
#
db.exec 'SELECT inhrelid AS oid, inhparent AS parent FROM pg_inherits' do |res|
res.each do |row|
relations[ row['oid'] ][ :parent ] = row['parent']
end
end
# Remove tables that don't qualify for warehousing.
#
# - Tables that are not children of a parent
# - Tables that are already in the warehouse tablespace
# - The currently active child (it's likely being written to!)
# - Any table that can't be parsed into the specified format
#
relations.reject! do |oid, val|
begin
val[:parent].nil? ||
val[:tablespace] == @opts.tablespace ||
val[:name] == Time.now.strftime( @opts.format ) ||
! DateTime.strptime( val[:name], @opts.format )
rescue ArgumentError
true
end
end
query = %q{
SELECT c.oid AS oid,
i.indexname AS name
FROM pg_class AS c
INNER JOIN pg_indexes AS i
ON i.tablename = c.relname }
query << "AND i.schemaname='#{@opts.schema}'" if @opts.schema
# Attach index names to tables.
#
db.exec( query ) do |res|
res.each do |row|
relations[ row['oid'] ][ :indexes ] << row['name'] if relations[ row['oid'] ]
end
end
return relations
end
### Wrap arbitrary commands in a human readable timer.
###
def timer
start = Time.now
yield
age = Time.now - start
diff = age
secs = diff % 60
diff = ( diff - secs ) / 60
mins = diff % 60
diff = ( diff - mins ) / 60
hour = diff % 24
return "%02d:%02d:%02d" % [ hour, mins, secs ]
end
end
### Parse command line arguments. Return a struct of global options.
###
def parse_args( args )
options = OpenStruct.new
options.database = Etc.getpwuid( Process.uid ).name
options.host = '127.0.0.1'
options.port = 5432
options.user = Etc.getpwuid( Process.uid ).name
options.sslmode = 'prefer'
options.tablespace = 'warehouse'
opts = OptionParser.new do |opts|
opts.banner = "Usage: #{$0} [options]"
opts.separator ''
opts.separator 'Connection options:'
opts.on( '-d', '--database DBNAME',
"specify the database to connect to (default: \"#{options.database}\")" ) do |db|
options.database = db
end
opts.on( '-h', '--host HOSTNAME', 'database server host' ) do |host|
options.host = host
end
opts.on( '-p', '--port PORT', Integer,
"database server port (default: \"#{options.port}\")" ) do |port|
options.port = port
end
opts.on( '-n', '--schema SCHEMA', String,
"operate on the named schema only (default: none)" ) do |schema|
options.schema = schema
end
opts.on( '-T', '--tablespace SPACE', String,
"move old tables to this tablespace (default: \"#{options.tablespace}\")" ) do |tb|
options.tablespace = tb
end
opts.on( '-F', '--tableformat FORMAT', String,
"The naming format (strftime) for the inherited tables (default: none)" ) do |format|
options.format = format
end
opts.on( '-U', '--user NAME',
"database user name (default: \"#{options.user}\")" ) do |user|
options.user = user
end
opts.on( '-W', 'force password prompt' ) do |pw|
print 'Password: '
begin
system 'stty -echo'
options.pass = gets.chomp
ensure
system 'stty echo'
puts
end
end
opts.separator ''
opts.separator 'Other options:'
opts.on_tail( '--dry-run', "don't actually do anything" ) do
options.dryrun = true
end
opts.on_tail( '--help', 'show this help, then exit' ) do
$stderr.puts opts
exit
end
opts.on_tail( '--version', 'output version information, then exit' ) do
puts Stats::VERSION
exit
end
end
opts.parse!( args )
return options
end
if __FILE__ == $0
opts = parse_args( ARGV )
raise ArgumentError, "A naming format (-F) is required." unless opts.format
$stdout.sync = true
PGWarehouse.new( opts ).migrate
end
|