File: object_graph.rb

package info (click to toggle)
libsequel-core-ruby 1.5.1-1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 648 kB
  • ctags: 840
  • sloc: ruby: 10,949; makefile: 36
file content (211 lines) | stat: -rw-r--r-- 9,615 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
module Sequel
  class Dataset
    # Allows you to join multiple datasets/tables and have the result set
    # split into component tables.
    #
    # This differs from the usual usage of join, which returns the result set
    # as a single hash.  For example:
    #
    #   # CREATE TABLE artists (id INTEGER, name TEXT);
    #   # CREATE TABLE albums (id INTEGER, name TEXT, artist_id INTEGER);
    #   DB[:artists].left_outer_join(:albums, :artist_id=>:id).first
    #   => {:id=>(albums.id||artists.id), :name=>(albums.name||artist.names), :artist_id=>albums.artist_id}
    #   DB[:artists].graph(:albums, :artist_id=>:id).first
    #   => {:artists=>{:id=>artists.id, :name=>artists.name}, :albums=>{:id=>albums.id, :name=>albums.name, :artist_id=>albums.artist_id}}
    #
    # Using a join such as left_outer_join, the attribute names that are shared between
    # the tables are combined in the single return hash.  You can get around that by
    # using .select with correct aliases for all of the columns, but it is simpler to
    # use graph and have the result set split for you.  In addition, graph respects
    # any row_proc or transform attributes of the current dataset and the datasets
    # you use with graph.
    #
    # If you are graphing a table and all columns for that table are nil, this
    # indicates that no matching rows existed in the table, so graph will return nil
    # instead of a hash with all nil values:
    #
    #   # If the artist doesn't have any albums
    #   DB[:artists].graph(:albums, :artist_id=>:id).first
    #   => {:artists=>{:id=>artists.id, :name=>artists.name}, :albums=>nil}
    #
    # Arguments:
    # * dataset -  Can be a symbol (specifying a table), another dataset,
    #   or an object that responds to .dataset and yields a symbol or a dataset
    # * join_conditions - A conditions hash that is passed to the join_table method
    # * options -  A hash of graph options.  The following options are currently used:
    #   * :table_alias - The alias to use for the table.  If not specified, doesn't
    #     alias the table.  You will get an error if the the alias (or table) name is
    #     used more than once.
    #   * :join_type - The type of join to use (passed to join_table).  Defaults to
    #     :left_outer.
    #   * :select - Whether to select the columns from the you are joining, and 
    #     include them as a separate hash in the output.  With this set to false,
    #     it is like simply joining the tables.  This is designed to be used for
    #     many_to_many join tables, where the columns are just foreign keys to primary
    #     keys in other tables.
    def graph(dataset, join_conditions, options = {})
      # Allow the use of a model, dataset, or symbol as the first argument
      # Find the table name/dataset based on the argument
      dataset = dataset.dataset if dataset.respond_to?(:dataset)
      case dataset
      when Symbol
        table = dataset
        dataset = @db[dataset]
      when ::Sequel::Dataset
        table = dataset.first_source
      else
        raise Error, "The dataset argument should be a symbol, dataset, or model"
      end

      # Raise Sequel::Error with explanation that the table alias has been used
      raise_alias_error = lambda do
        raise(Error, "this #{options[:table_alias] ? 'alias' : 'table'} has already been been used, please specify " \
          "#{options[:table_alias] ? 'a different alias' : 'an alias via the :table_alias option'}") 
      end

      # Only allow table aliases that haven't been used
      table_alias = options[:table_alias] || table
      raise_alias_error.call if @opts[:graph] && @opts[:graph][:table_aliases] && @opts[:graph][:table_aliases].include?(table_alias)

      # Join the table early in order to avoid cloning the dataset twice
      ds = join_table(options[:join_type] || :left_outer, table == table_alias ? table : "#{table} #{table_alias}", join_conditions)
      opts = ds.opts

      # Whether to include the table in the result set
      add_table = options[:select] == false ? false : true
      # Whether to add the columns to the list of column aliases
      add_columns = !ds.opts.include?(:graph_aliases)

      # Setup the initial graph data structure if it doesn't exist
      unless graph = opts[:graph]
        master = ds.first_source
        raise_alias_error.call if master == table_alias
        # Master hash storing all .graph related information
        graph = opts[:graph] = {}
        # Associates column aliases back to tables and columns
        column_aliases = graph[:column_aliases] = {}
        # Associates table alias (the master is never aliased)
        table_aliases = graph[:table_aliases] = {master=>self}
        # Keep track of the alias numbers used
        ca_num = graph[:column_alias_num] = {}
        # All columns in the master table are never
        # aliased, but are not included if set_graph_aliases
        # has been used.
        if add_columns
          select = (opts[:select] ||= [])
          columns.each do |column|
            column_aliases[column] = [master, column]
            select.push(:"#{master}__#{column}")
          end
        end
      end

      # Add the table alias to the list of aliases
      # Even if it isn't been used in the result set,
      # we add a key for it with a nil value so we can check if it
      # is used more than once
      table_aliases = graph[:table_aliases]
      table_aliases[table_alias] = add_table ? dataset : nil

      # Add the columns to the selection unless we are ignoring them
      if add_table && add_columns
        select = opts[:select]
        column_aliases = graph[:column_aliases]
        ca_num = graph[:column_alias_num]
        # If the column hasn't been used yet, don't alias it.
        # If it has been used, try table_column.
        # If that has been used, try table_column_N 
        # using the next value of N that we know hasn't been
        # used
        dataset.columns.each do |column|
          col_alias, c = if column_aliases[column]
            tc = :"#{table_alias}_#{column}"
            if column_aliases[tc]
              if can = ca_num[tc]
                ca_num[tc] += 1
                tc = :"#{tc}_#{can}"
              else
                ca_num[tc] = 1
                tc = :"#{tc}_0"
             end
            end
            [tc, :"#{table_alias}__#{column}___#{tc}"]
          else
            [column, :"#{table_alias}__#{column}"]
          end
          column_aliases[col_alias] = [table_alias, column]
          select.push(c)
        end
      end
      ds
    end

    # This allows you to manually specify the graph aliases to use
    # when using graph.  You can use it to only select certain
    # columns, and have those columns mapped to specific aliases
    # in the result set.  This is the equivalent of .select for a
    # graphed dataset, and must be used instead of .select whenever
    # graphing is used. Example:
    #
    #   DB[:artists].graph(:albums, :artist_id=>:id).set_graph_aliases(:artist_name=>[:artists, :name], :album_name=>[:albums, :name]).first
    #   => {:artists=>{:name=>artists.name}, :albums=>{:name=>albums.name}}
    #
    # Arguments:
    # * graph_aliases - Should be a hash with keys being symbols of
    #   column aliases, and values being arrays with two symbol elements.
    #   The first element of the array should be the table alias,
    #   and the second should be the actual column name.
    def set_graph_aliases(graph_aliases)
      ds = select(*graph_aliases.collect{|col_alias, tc| :"#{tc[0]}__#{tc[1]}#{"___#{col_alias}" unless tc[1] == col_alias}"})
      ds.opts[:graph_aliases]=graph_aliases
      ds
    end

    private
      # Fetch the rows, split them into component table parts,
      # tranform and run the row_proc on each part (if applicable),
      # and yield a hash of the parts.
      def graph_each(opts, &block)
        # Reject tables with nil datasets, as they are excluded from
        # the result set
        datasets = @opts[:graph][:table_aliases].to_a.reject{|ta,ds| ds.nil?}
        # Get just the list of table aliases into a local variable, for speed
        table_aliases = datasets.collect{|ta,ds| ta}
        # Get an array of arrays, one for each dataset, with
        # the necessary information about each dataset, for speed
        datasets = datasets.collect do |ta, ds|
          [ta, ds, ds.instance_variable_get(:@transform), ds.row_proc]
        end
        # Use the manually set graph aliases, if any, otherwise
        # use the ones automatically created by .graph
        column_aliases = @opts[:graph_aliases] || @opts[:graph][:column_aliases]
        fetch_rows(select_sql(opts)) do |r|
          graph = {}
          # Create the sub hashes, one per table
          table_aliases.each{|ta| graph[ta]={}}
          # Split the result set based on the column aliases
          # If there are columns in the result set that are
          # not in column_aliases, they are ignored
          column_aliases.each do |col_alias, tc|
            ta, column = tc
            graph[ta][column] = r[col_alias]
          end
          # For each dataset, transform and run the row
          # row_proc if applicable
          datasets.each do |ta,ds,tr,rp|
            g = graph[ta]
            graph[ta] = if g.values.any?
              g = ds.transform_load(g) if tr
              g = rp[g] if rp
              g
            else
              nil
            end
          end

          yield graph
        end
        self
      end
  end
end