File: positional_generator.rb

package info (click to toggle)
ruby-faker 3.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,360 kB
  • sloc: ruby: 20,654; makefile: 6; sh: 6
file content (480 lines) | stat: -rw-r--r-- 13,345 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
# frozen_string_literal: true

##
# A high level way to generate a list of generated values that fit a specific
# format, such as an ID, postal code, or phone number.
#
# It provides generators for random digits and letters, hardcoded literal
# strings, computed values based on previously-generated values, union (one-of)
# selectors, and grouped generators.
#
# The generation allows for dependencies on previously generated values -- most
# useful for computations -- and this object knows how to build that dependency
# graph.
#
# See {PositionalGenerator::Builder} for more.
class PositionalGenerator
  ##
  # @param as_type [Symbol] +:string+ to generate a String
  # @param block [Method] a function that interacts with the {Builder}
  def initialize(as_type, &block)
    @block = block
    @generator_builder = Builder.new(as_type)
  end

  ##
  # @return [String] if +as_type+ is +:string+
  def generate
    @block.call(@generator_builder)
    @generator_builder.build
  end

  Component = Struct.new(:position, :name, :deps, :generator)

  class Builder
    attr_reader :as_type

    def initialize(as_type)
      @components = []
      @as_type = as_type
    end

    ##
    # Generate a value in the range of 0..9.
    #
    # @param name [Symbol] the name for this node in the group
    # @param length [Integer] how many digits to generate
    # @param ranges [Array<Range, Array, Set>] an array of limitations on the
    #   generation. Elements can be a Range to select from within that range,
    #   or an Array or Set to select an element from within the list.
    # @return [void]
    #
    # @example a digit
    #   int
    #
    # @example five digits named :a
    #   int(name: :a, length: 5)
    #
    # @example digits of any length between 4 to 10
    #   int(ranges: [1_000 .. 9_999_999_999)
    def int(name: nil, length: 1, ranges: nil)
      @components << Component.new(@components.count, name, [], Int.new(length, ranges))
    end

    ##
    # Generate a value in the range of 'a'..'Z'.
    #
    # @param name [Symbol] the name for this node in the group
    # @param length [Integer, Range] how many letters to generate
    # @param ranges [Array<Range, Array, Set>] an array of limitations on the
    #   generation. Elements can be a Range to select from within that range,
    #   or an Array or Set to select an element from within the list.
    # @return [void]
    #
    # @example Generate a letter
    #   letter
    #
    # @example Generate five uppercase letters named :b
    #   letter(name: :b, length: 5, ranges: ['A'..'Z'])
    #
    # @example Generate three-letter strings from within specific values
    #   letter(ranges: ['700'..'799', '7A0'..'7F9'])
    def letter(name: nil, length: 1, ranges: ['a'..'z', 'A'..'Z'])
      @components << Component.new(@components.count, name, [], Letter.new(length, ranges))
    end

    ##
    # Generate a literal String
    #
    # @param value [String]
    # @param name [Symbol] the name for this node in the group
    # @return [void]
    # @example
    #   lit("-")
    def lit(value, name: nil)
      @components << Component.new(@components.count, name, [], Literal.new(value))
    end

    ##
    # Fill the position with an arbitrary value.
    #
    # @param name [Symbol] the name for this node in the group
    # @param deps [Array<Symbol>] the name of other fields that this one depends on
    # @param block [Method] the block that yields the arbitrary value. Its
    #   arguments are the deps.
    # @return [void]
    #
    # @example Today's date
    #   computed do
    #     Date.today
    #   end
    #
    # @example A check digit
    #   int(name: :a, length: 5)
    #   computed(deps: [:a]) do |a|
    #     a.to_s.bytes.sum % 10
    #   end
    def computed(name: nil, deps: [], &block)
      @components << Component.new(@components.count, name, deps, Computed.new(block))
    end

    ##
    # Fill the position with one of the results from the given generators.
    #
    # @param name [Symbol] the name for this node in the group
    # @param block [Method] subgenerator block
    # @return [void]
    #
    # @example Either five digits, or two letters
    #   oneof do |or_else|
    #     or_else.int(length: 5)
    #     or_else.letter(length: 2)
    #   end
    #
    # @example Either one letter; or a slash, five digits, then a slash.
    #   oneof do |or_else|
    #     or_else.letter
    #     or_else.group do |g_|
    #       g_.lit("/")
    #       g_.digit(length: 5)
    #       g_.lit("/")
    #     end
    #   end
    def oneof(name: nil, &block)
      @components << Component.new(@components.count, name, [], Oneof.new(self, block))
    end

    ##
    # A group of generators. Useful for {#oneof}.
    #
    # @param name [Symbol] the name for this node in the group
    # @param block [Method] a subgenerator block
    # @return [void]
    def group(name: nil, &block)
      @components << Component.new(@components.count, name, [], Group.new(@as_type, block))
    end

    ##
    # Generate the value.
    #
    # @return [String] if +as_type+ is +:string+
    def build
      graph = build_graph
      stack = build_stack(graph)
      values = generate_values(stack)
      convert(values)
    end

    private

    ##
    # Turn the components into a graph following dependencies.
    #
    # @return [Array<(Integer, Integer)>]
    #
    # Components can have dependencies. Here's one where a computation (b)
    # depends on a value generated after it (c):
    #
    #     @components = [
    #       Int.new(0, :a, 1, nil),
    #       Computed.new(1, :b, [:c]) { |c| c + 1 },
    #       Int.new(2, :c, 1, nil),
    #     ]
    #
    # We can think of a graph like so:
    #
    #      (a)  (c)
    #       |    |
    #       |   (b)
    #       \   /
    #        end
    #
    # Or in Mermaid:
    #
    # ```mermaid
    # stateDiagram-v2
    #     a --> [*]
    #     c --> b
    #     b --> [*]
    # ```
    #
    # This method builds that graph, using their positional locations as the
    # ID. The end state is represented as +nil+. So continuing the example
    # above, it will give this output:
    #
    #     [
    #       [0, nil],
    #       [2, 1],
    #       [1, nil],
    #     ]
    #
    # Later we can look up the appropriate component by indexing into the
    # +@components+ array.
    def build_graph
      graph = []

      # rubocop:disable Style/CombinableLoops
      @components.each do |component|
        component.deps.each do |dep|
          dep_component = @components.detect { |c| c.name == dep }
          raise if dep_component.nil?

          graph.push([dep_component.position, component.position])
        end
      end

      @components.each do |component|
        graph.push([component.position, nil]) if graph.none? { |(from, _to)| from == component.position }
      end
      # rubocop:enable Style/CombinableLoops

      graph
    end

    ##
    # Produce a stack of components to evaluate in sequence.
    #
    # @param graph [Array<(Integer, Integer)>]
    # @return [Array<Array<Int>>]
    #
    # Now that we have a graph, we know enough to determine how to traverse the
    # generators such that all dependencies are met.
    #
    # The initial stack is an array of all the free traversals to the goal
    # (where the +to+ is +nil+).
    #
    # Loop over the top of the stack:
    # - The next array is all the nodes that lead into the nodes atop the
    #    stack.
    # - If the next array has values, push that onto the top of the stack.
    # - If the next array is empty, we are done.
    #
    # For example, given the graph:
    #
    #     [
    #       [0, nil],
    #       [2, 1],
    #       [1, nil],
    #     ]
    #
    # The initial stack is:
    #
    #     [
    #       [0, 1]
    #     ]
    #
    # We loop over the top of the stack, +[0, 1]+, and find all the nodes of
    # the graph that lead there. Nothing leads to +0+, and +2+ leads to +1+.
    #
    # Therefore, push +[2]+ to the top of the stack.
    #
    # Repeat for +[2]+. Nothing leads to +2+, so our new goal is +[]+. This is
    # empty, so don't push it onto the stack. We are done.
    #
    # The final stack is:
    #
    #     [
    #       [0, 1],
    #       [2]
    #     ]
    def build_stack(graph)
      require 'set'

      terminals = graph.filter_map { |(from, to)| to.nil? && from }
      stack = [terminals]
      seen = Set.new(terminals)
      deps = []

      loop do
        stack[-1].each do |e|
          deps = graph.select { |(from, to)| to == e && !seen.include?(from) }.map do |from, _to|
            seen << from
            from
          end
          stack << deps if deps.any?
        end

        break if deps.empty?
      end

      stack
    end

    ##
    # Turn a stack into a list of generated values.
    #
    # @param stack [Array<Array<Int>>]
    # @return [Array<Object>] values sorted by desired order
    #
    # We start with a stack of components we need evaluated. We have been
    # tracking these components by position, so first we need to look up the
    # component in our list.
    #
    # From there we can get a list of all the dependencies for the component.
    # These have already been evaluated, since +stack+ is sorted, so we fetch
    # them.
    #
    # Since the stack was sorted by computation order, we must re-sort them
    # into positional order at the end.
    def generate_values(stack)
      result = []

      while (top = stack.pop)
        top.each do |component_id|
          component = @components[component_id]
          raise if component.nil?

          values = result.filter_map do |(_id, name, value)|
            value if component.deps.include?(name)
          end

          result << [component.position, component.name, component.generator.generate(values)]
        end
      end

      result.sort_by do |component_position, _, _|
        component_position
      end
    end

    ##
    # @param values [Array<Object>]
    # @return [String] if +@as_type+ is +:string+
    # @raise [ArgumentError] if +@as_type+ is unsupported
    def convert(values)
      case @as_type
      when :string
        values.inject('') do |acc, (_, _, v)|
          "#{acc}#{v}"
        end
      else
        raise ArgumentError, "unknown return type: #{@as_type}"
      end
    end

    class Group
      def initialize(as_type, block)
        @as_type = as_type
        @block = block
      end

      def generate(_)
        builder = Builder.new(@as_type)
        @block.call(builder)
        builder.build
      end
    end

    class Oneof
      def initialize(builder, block)
        @block = block
        @builder = builder
      end

      def generate(...)
        subgens = OneofSelector.new(@builder)
        @block.call(subgens)
        subgens.sample
        subgens.generate(...)
      end

      class OneofSelector
        def initialize(builder)
          @subgens = []
          @builder = Builder.new(builder.as_type)
        end

        def method_missing(meth, *args, **kwargs, &block)
          @subgens << [meth, args, kwargs, block]
        end

        def respond_to_missing?(method_name, include_private = false)
          @builder.respond_to?(method_name, include_private)
        end

        def sample
          (meth, args, kwargs, block) = Faker::Base.sample(@subgens)
          @builder.send(meth, *args, **kwargs, &block)
        end

        def generate(...)
          @builder.build
        end
      end
    end

    class Int
      def initialize(length, ranges)
        # Internally we store only an Enumerable of Range values. So if we are
        # not given any Ranges but are given a length, we need to convert the
        # length to a Range.
        #
        # If the length is `5`, that means we should compute the Range `10000..99999`.
        # We can compute the lower end with a simple exponent: 10^4 = 10000.
        # The upper end is one less than an exponent: 10^5 - 1 = 99999.
        if ranges.nil?
          lower = 10**(length - 1)
          upper = (10**length) - 1
          ranges = [lower..upper]
        end

        @ranges = ranges
      end

      def generate(_)
        Faker::Base.rand(@ranges.sample(random: Faker::Config.random))
      end
    end

    class Letter
      def initialize(length, ranges)
        @length = length
        @ranges = ranges
      end

      def generate(_)
        @length.times.inject('') do |acc, _index|
          generated_character = char
          "#{acc}#{generated_character}"
        end
      end

      private

      def char
        if @ranges
          case s = @ranges.sample(random: Faker::Config.random)
          when Range
            s.to_a.sample(random: Faker::Config.random)
          when Array, Set
            s.sample(random: Faker::Config.random)
          else
            raise ArgumentError, "unsupported range type: #{s.inspect}"
          end
        else
          Faker::Base.sample(Faker::Base::Letters)
        end
      end
    end

    class Literal
      def initialize(value)
        @value = value
      end

      def generate(_)
        @value
      end
    end

    class Computed
      def initialize(block)
        @block = block
      end

      def generate(args)
        @block.call(*args)
      end
    end
  end
end