File: index.rake

package info (click to toggle)
ruby-ruby-lsp 0.26.7-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 27,676 kB
  • sloc: ruby: 35,294; javascript: 29; sh: 7; makefile: 4
file content (96 lines) | stat: -rw-r--r-- 2,857 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# frozen_string_literal: true

require "ruby_lsp/internal"

# Based on https://github.com/ruby/prism/blob/main/rakelib/lex.rake

module GemIndexing
  class << self
    # This method is responsible for iterating through a list of items and running
    # each item in a separate thread. It will block until all items have been
    # processed. This is particularly useful for tasks that are IO-bound like
    # downloading files or reading files from disk.
    def parallelize(items, &block)
      Thread.abort_on_exception = true

      queue = Queue.new
      items.each { |item| queue << item }

      workers =
        ENV.fetch("WORKERS") { 16 }.to_i.times.map do
          parallelize_thread(queue, &block)
        end

      workers.map(&:join)
    end

    private

    # Create a new thread with a minimal number of locals that it can access.
    def parallelize_thread(queue, &block)
      Thread.new { block.call(queue.shift) until queue.empty? }
    end
  end
end

TOP_100_GEM_FILENAME = "rakelib/top_100_gems.yml"
TOP_100_GEMS_DIR = "tmp/top_100_gems"

namespace :download do
  directory TOP_100_GEMS_DIR

  desc "Download the top 100 rubygems under #{TOP_100_GEMS_DIR}/"
  task topgems: TOP_100_GEMS_DIR do
    $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
    require "net/http"
    require "rubygems/package"
    require "tmpdir"

    GemIndexing.parallelize(YAML.safe_load_file(TOP_100_GEM_FILENAME)) do |gem_name|
      directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}")
      next if File.directory?(directory)

      puts "Downloading #{gem_name}"

      uri = URI.parse("https://rubygems.org/gems/#{gem_name}.gem")
      response = Net::HTTP.get_response(uri)
      raise gem_name unless response.is_a?(Net::HTTPSuccess)

      Dir.mktmpdir do |tmpdir|
        filepath = File.join(tmpdir, "#{gem_name}.gem")
        File.write(filepath, response.body)
        Gem::Package.new(filepath).extract_files(directory, "**/*.rb")
      end
    end
  end
end

# This task indexes against the top 100 gems, and will exit(1) if any fail.
desc "Index against the top 100 rubygems"
task "index:topgems": ["download:topgems"] do
  $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
  require "net/http"
  require "rubygems/package"
  require "tmpdir"

  gem_names = YAML.safe_load_file(TOP_100_GEM_FILENAME)

  errors = []
  GemIndexing.parallelize(gem_names) do |gem_name|
    directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}")

    index = RubyIndexer::Index.new

    errors = Dir[File.join(directory, "**", "*.rb")].filter_map do |filepath|
      print(".")
      index.index_file(URI::Generic.from_path(path: filepath))
      nil
    rescue => e
      errors << { message: e.message, file: filepath }
    end
  end

  puts "errors: #{errors}" if errors.any?
ensure
  FileUtils.rm_rf(TOP_100_GEMS_DIR)
end