File: fingerprint.rb

package info (click to toggle)
ruby-gitlab-pg-query 2.0.4-3
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 15,584 kB
  • sloc: ansic: 143,939; ruby: 2,096; makefile: 4
file content (150 lines) | stat: -rw-r--r-- 5,005 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
require 'digest'

module PgQuery
  class ParserResult
    def fingerprint
      hash = FingerprintSubHash.new
      fingerprint_tree(hash)
      fp = PgQuery.hash_xxh3_64(hash.parts.join, FINGERPRINT_VERSION)
      format('%016x', fp)
    end

    private

    FINGERPRINT_VERSION = 3

    class FingerprintSubHash
      attr_reader :parts

      def initialize
        @parts = []
      end

      def update(part)
        @parts << part
      end

      def flush_to(hash)
        parts.each do |part|
          hash.update part
        end
      end
    end

    def ignored_fingerprint_value?(val)
      [nil, 0, false, [], ''].include?(val)
    end

    def fingerprint_value(val, hash, parent_node_name, parent_field_name, need_to_write_name) # rubocop:disable Metrics/CyclomaticComplexity
      subhash = FingerprintSubHash.new

      if val.is_a?(Google::Protobuf::RepeatedField)
        fingerprint_list(val, subhash, parent_node_name, parent_field_name)
      elsif val.is_a?(List)
        fingerprint_list(val.items, subhash, parent_node_name, parent_field_name)
      elsif val.is_a?(Google::Protobuf::MessageExts)
        fingerprint_node(val, subhash, parent_node_name, parent_field_name)
      elsif !ignored_fingerprint_value?(val)
        subhash.update val.to_s
      end

      return if subhash.parts.empty?

      hash.update(parent_field_name) if need_to_write_name
      subhash.flush_to(hash)
    end

    def ignored_node_type?(node)
      [A_Const, Alias, ParamRef, SetToDefault, IntList, OidList, Null].include?(node.class) ||
        node.is_a?(TypeCast) && (node.arg.node == :a_const || node.arg.node == :param_ref)
    end

    def node_protobuf_field_name_to_json_name(node_class, field)
      # Use protobuf descriptor once json_name support is fixed: https://github.com/protocolbuffers/protobuf/pull/8356
      # node_class.descriptor.find { |d| d.name == field.to_s }.json_name
      INTERNAL_PROTO_FIELD_NAME_TO_JSON_NAME.fetch([node_class, field])
    end

    def fingerprint_node(node, hash, parent_node_name = nil, parent_field_name = nil) # rubocop:disable Metrics/CyclomaticComplexity
      return if ignored_node_type?(node)

      if node.is_a?(Node)
        return if node.node.nil?
        node_val = node[node.node.to_s]
        unless ignored_node_type?(node_val)
          unless node_val.is_a?(List)
            postgres_node_name = node_protobuf_field_name_to_json_name(node.class, node.node)
            hash.update(postgres_node_name)
          end
          fingerprint_value(node_val, hash, parent_node_name, parent_field_name, false)
        end
        return
      end

      postgres_node_name = node.class.name.split('::').last

      node.to_h.keys.sort.each do |field_name|
        val = node[field_name.to_s]

        postgres_field_name = node_protobuf_field_name_to_json_name(node.class, field_name)

        case postgres_field_name
        when 'location'
          next
        when 'name'
          next if [PrepareStmt, ExecuteStmt, DeallocateStmt].include?(node.class)
          next if node.is_a?(ResTarget) && parent_node_name == 'SelectStmt' && parent_field_name == 'targetList'
        when 'gid', 'options', 'savepoint_name'
          next if node.is_a?(TransactionStmt)
        when 'portalname'
          next if [DeclareCursorStmt, FetchStmt, ClosePortalStmt].include?(node.class)
        when 'relname'
          next if node.is_a?(RangeVar) && node.relpersistence == 't'
          if node.is_a?(RangeVar)
            fingerprint_value(val.gsub(/\d{2,}/, ''), hash, postgres_node_name, postgres_field_name, true)
            next
          end
        when 'stmt_len'
          next if node.is_a?(RawStmt)
        when 'stmt_location'
          next if node.is_a?(RawStmt)
        when 'kind'
          if node.is_a?(A_Expr) && (val == :AEXPR_OP_ANY || val == :AEXPR_IN)
            fingerprint_value(:AEXPR_OP, hash, postgres_node_name, postgres_field_name, true)
            next
          end
        end

        fingerprint_value(val, hash, postgres_node_name, postgres_field_name, true)
      end
    end

    def fingerprint_list(values, hash, parent_node_name, parent_field_name)
      if %w[fromClause targetList cols rexpr valuesLists args].include?(parent_field_name)
        values_subhashes = values.map do |val|
          subhash = FingerprintSubHash.new
          fingerprint_value(val, subhash, parent_node_name, parent_field_name, false)
          subhash
        end

        values_subhashes.uniq!(&:parts)
        values_subhashes.sort_by! { |s| PgQuery.hash_xxh3_64(s.parts.join, FINGERPRINT_VERSION) }

        values_subhashes.each do |subhash|
          subhash.flush_to(hash)
        end
      else
        values.each do |val|
          fingerprint_value(val, hash, parent_node_name, parent_field_name, false)
        end
      end
    end

    def fingerprint_tree(hash)
      @tree.stmts.each do |node|
        hash.update 'RawStmt'
        fingerprint_node(node, hash)
      end
    end
  end
end