File: warehouse_partitions.rb

package info (click to toggle)
ruby-pg 1.6.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,316 kB
  • sloc: ansic: 9,403; ruby: 3,160; makefile: 10
file content (311 lines) | stat: -rw-r--r-- 7,356 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# -*- ruby -*-
# vim: set nosta noet ts=4 sw=4:
#
# Script to automatically move partitioned tables and their indexes
# to a separate area on disk.
#
# Mahlon E. Smith <mahlon@martini.nu>
#
# Example use case:
#
#   - You've got a heavy insert table, such as syslog data.
#   - This table has a partitioning trigger (or is manually partitioned)
#     by date, to separate incoming stuff from archival/report stuff.
#   - You have a tablespace on cheap or slower disk (maybe even
#     ZFS compressed, or some such!)
#
# The only assumption this script makes is that your tables are dated, and
# the tablespace they're moving into already exists.
#
# A full example, using the syslog idea from above, where each child
# table is date partitioned by a convention of "syslog_YEAR-WEEKOFYEAR":
#
#    syslog             # <--- parent
#    syslog_2012_06     # <--- inherited
#    syslog_2012_07     # <--- inherited
#    syslog_2012_08     # <--- inherited
#    ...
#
# You'd run this script like so:
#
#    ./warehouse_partitions.rb -F syslog_%Y_%U
#
# Assuming this was week 12 of the year, tables syslog_2012_06 through
# syslog_2012_11 would start sequentially migrating into the tablespace
# called 'warehouse'.
#


require 'date'
require 'ostruct'
require 'optparse'
require 'pathname'
require 'etc'
require 'pg'


### A tablespace migration class.
###
class PGWarehouse

	def initialize( opts )
		@opts = opts
		@db = PG.connect(
			:dbname   => opts.database,
			:host     => opts.host,
			:port     => opts.port,
			:user     => opts.user,
			:password => opts.pass,
			:sslmode  => 'prefer'
		)
		@db.exec "SET search_path TO %s" % [ opts.schema ] if opts.schema

		@relations = self.relations
	end

	attr_reader :db

	######
	public
	######

	### Perform the tablespace moves.
	###
	def migrate
		if @relations.empty?
			$stderr.puts 'No tables were found for warehousing.'
			return
		end

		$stderr.puts "Found %d relation%s to move." % [ relations.length, relations.length == 1 ? '' : 's' ]
		@relations.sort_by{|_,v| v[:name] }.each do |_, val|
			$stderr.print "  - Moving table '%s' to '%s'... "  % [
				val[:name], @opts.tablespace
			]

			if @opts.dryrun
				$stderr.puts '(not really)'

			else
				age = self.timer do
					db.exec "ALTER TABLE %s SET TABLESPACE %s;" % [
						val[:name], @opts.tablespace
					]
				end
				puts age
			end

			val[ :indexes ].each do |idx|
				$stderr.print "      - Moving index '%s' to '%s'... "  % [
					idx, @opts.tablespace
				]
				if @opts.dryrun
					$stderr.puts '(not really)'

				else
					age = self.timer do
						db.exec "ALTER INDEX %s SET TABLESPACE %s;" % [
							idx, @opts.tablespace
						]
					end
					puts age
				end
			end
		end
	end


	#########
	protected
	#########

	### Get OIDs and current tablespaces for everything under the
	### specified schema.
	###
	def relations
		return @relations if @relations
		relations = {}

		query =  %q{
			SELECT c.oid AS oid,
				c.relname AS name,
				c.relkind AS kind,
				t.spcname AS tspace
			FROM pg_class AS c
			LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
			LEFT JOIN pg_tablespace t ON t.oid = c.reltablespace
			WHERE c.relkind = 'r' }
		query << "AND n.nspname='#{@opts.schema}'" if @opts.schema

		# Get the relations list, along with each element's current tablespace.
		#
		self.db.exec( query ) do |res|
			res.each do |row|
				relations[ row['oid'] ] = {
					:name       => row['name'],
					:tablespace => row['tspace'],
					:indexes    => [],
					:parent     => nil
				}
			end
		end

		# Add table inheritance information.
		#
		db.exec 'SELECT inhrelid AS oid, inhparent AS parent FROM pg_inherits' do |res|
			res.each do |row|
				relations[ row['oid'] ][ :parent ] = row['parent']
			end
		end

		# Remove tables that don't qualify for warehousing.
		#
		#   - Tables that are not children of a parent
		#   - Tables that are already in the warehouse tablespace
		#   - The currently active child (it's likely being written to!)
		#   - Any table that can't be parsed into the specified format
		#
		relations.reject! do |oid, val|
			begin
				val[:parent].nil? ||
				val[:tablespace] == @opts.tablespace ||
				val[:name] == Time.now.strftime( @opts.format ) ||
				! DateTime.strptime( val[:name], @opts.format )
			rescue ArgumentError
				true
			end
		end

		query = %q{
			SELECT c.oid AS oid,
				i.indexname AS name
			FROM pg_class AS c
			INNER JOIN pg_indexes AS i
				ON i.tablename = c.relname }
		query << "AND i.schemaname='#{@opts.schema}'" if @opts.schema

		# Attach index names to tables.
		#
		db.exec( query ) do |res|
			res.each do |row|
				relations[ row['oid'] ][ :indexes ] << row['name'] if relations[ row['oid'] ]
			end
		end

		return relations
	end


	### Wrap arbitrary commands in a human readable timer.
	###
	def timer
		start = Time.now
		yield
		age = Time.now - start

		diff = age
		secs = diff % 60
		diff = ( diff - secs ) / 60
		mins = diff % 60
		diff = ( diff - mins ) / 60
		hour = diff % 24

		return "%02d:%02d:%02d" % [ hour, mins, secs ]
	end
end


### Parse command line arguments.  Return a struct of global options.
###
def parse_args( args )
	options          = OpenStruct.new
	options.database = Etc.getpwuid( Process.uid ).name
	options.host     = '127.0.0.1'
	options.port     = 5432
	options.user     = Etc.getpwuid( Process.uid ).name
	options.sslmode  = 'prefer'
	options.tablespace = 'warehouse'

	opts = OptionParser.new do |opts|
		opts.banner = "Usage: #{$0} [options]"

		opts.separator ''
		opts.separator 'Connection options:'

		opts.on( '-d', '--database DBNAME',
				"specify the database to connect to (default: \"#{options.database}\")" ) do |db|
			options.database = db
		end

		opts.on( '-h', '--host HOSTNAME', 'database server host' ) do |host|
			options.host = host
		end

		opts.on( '-p', '--port PORT', Integer,
				"database server port (default: \"#{options.port}\")" ) do |port|
			options.port = port
		end

		opts.on( '-n', '--schema SCHEMA', String,
				"operate on the named schema only (default: none)" ) do |schema|
			options.schema = schema
		end

		opts.on( '-T', '--tablespace SPACE', String,
				"move old tables to this tablespace (default: \"#{options.tablespace}\")" ) do |tb|
			options.tablespace = tb
		end

		opts.on( '-F', '--tableformat FORMAT', String,
				"The naming format (strftime) for the inherited tables (default: none)" ) do |format|
			options.format = format
		end

		opts.on( '-U', '--user NAME',
				"database user name (default: \"#{options.user}\")" ) do |user|
			options.user = user
		end

		opts.on( '-W', 'force password prompt' ) do |pw|
			print 'Password: '
			begin
				system 'stty -echo'
				options.pass = gets.chomp
			ensure
				system 'stty echo'
				puts
			end
		end

		opts.separator ''
		opts.separator 'Other options:'

		opts.on_tail( '--dry-run', "don't actually do anything" ) do
			options.dryrun = true
		end

		opts.on_tail( '--help', 'show this help, then exit' ) do
			$stderr.puts opts
			exit
		end

		opts.on_tail( '--version', 'output version information, then exit' ) do
			puts Stats::VERSION
			exit
		end
	end

	opts.parse!( args )
	return options
end


if __FILE__ == $0
	opts = parse_args( ARGV )
	raise ArgumentError, "A naming format (-F) is required." unless opts.format

	$stdout.sync = true
	PGWarehouse.new( opts ).migrate
end