File: spearman_rank_coefficient.rb

package info (click to toggle)
ruby-statistics 2.1.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid, trixie
  • size: 224 kB
  • sloc: ruby: 989; sh: 4; makefile: 4
file content (71 lines) | stat: -rw-r--r-- 2,692 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
module Statistics
  class SpearmanRankCoefficient
    def self.rank(data:, return_ranks_only: true)
      descending_order_data = data.sort { |a, b| b <=> a }
      rankings = {}

      data.each do |value|
        # If we have ties, the find_index method will only retrieve the index of the
        # first element in the list (i.e, the most close to the left of the array),
        # so when a tie is detected, we increase the temporal ranking by the number of
        # counted elements at that particular time and then we increase the counter.
        temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index

        if rankings.fetch(value, false)
          rankings[value][:rank] += (temporal_ranking + rankings[value][:counter])
          rankings[value][:counter] += 1
          rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_f
        else
          rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking }
        end
      end

      if return_ranks_only
        data.map do |value|
          rankings[value][:tie_rank]
        end
      else
        rankings
      end
    end

    # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php
    def self.coefficient(set_one, set_two)
      raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size
      return if set_one.size == 0 && set_two.size == 0

      set_one_mean, set_two_mean = set_one.mean, set_two.mean
      have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) }

      if have_tie_ranks
        numerator = 0
        squared_differences_set_one = 0
        squared_differences_set_two = 0

        set_one.size.times do |idx|
          local_diff_one = (set_one[idx] - set_one_mean)
          local_diff_two = (set_two[idx] - set_two_mean)

          squared_differences_set_one += local_diff_one ** 2
          squared_differences_set_two += local_diff_two ** 2

          numerator += local_diff_one * local_diff_two
        end

        denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two)

        numerator / denominator.to_f # This is rho or spearman's coefficient.
      else
        sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)|
          memo += ((rank_one - set_two[index]) ** 2)
          memo
        end

        numerator = 6 * sum_squared_differences
        denominator = ((set_one.size ** 3) - set_one.size)

        1.0 - (numerator / denominator.to_f) # This is rho or spearman's coefficient.
      end
    end
  end
end