#!/usr/bin/env ruby
# frozen_string_literal: true

require 'fileutils'
require 'yaml'

class ApplicationSettingsAnalysis
  CODEBASE_FIELDS = %i[
    column
    db_type
    api_type
    encrypted
    not_null
    default
    gitlab_com_different_than_default
    description
    jihu
  ].freeze
  ApplicationSettingPrototype = Struct.new(
    *CODEBASE_FIELDS,
    :attr,
    :clusterwide,
    keyword_init: true)

  class ApplicationSetting < ApplicationSettingPrototype
    # Computed from Teleport Rails console with:
    # ```shell
    # $ as = Gitlab::CurrentSettings.current_application_settings
    # $ as_defaults = ApplicationSetting.defaults
    # $ new_as = ApplicationSetting.new
    # $ diff_than_def = as.attributes.to_h.select { |k, v| (as_defaults[k] || new_as[k]) != v }; nil
    # $ diff_than_def_valid_columns = diff_than_default.keys.reject { |k| k.match?(%r{^(encrypted_\w+_iv|\w+_html)$}) }
    # $ diff_than_def_valid_columns.sort.each { |d| puts d }; nil
    # ```
    #
    # rubocop:disable Naming/InclusiveLanguage -- This is the actual column name
    GITLAB_COM_DIFFERENT_THAN_DEFAULT = %w[
      abuse_notification_email
      after_sign_out_path
      after_sign_up_text
      arkose_labs_namespace
      asset_proxy_enabled
      asset_proxy_url
      asset_proxy_whitelist
      authorized_keys_enabled
      auto_devops_domain
      auto_devops_enabled
      automatic_purchased_storage_allocation
      check_namespace_plan
      clickhouse
      cluster_agents
      code_creation
      commit_email_hostname
      container_expiration_policies_enable_historic_entries
      container_registry_data_repair_detail_worker_max_concurrency
      container_registry_db_enabled
      container_registry_expiration_policies_worker_capacity
      container_registry_features
      container_registry_token_expire_delay
      container_registry_vendor
      container_registry_version
      created_at
      cube_api_base_url
      custom_http_clone_url_root
      dashboard_limit
      dashboard_limit_enabled
      database_grafana_api_url
      database_grafana_tag
      database_max_running_batched_background_migrations
      deactivation_email_additional_text
      default_artifacts_expire_in
      default_branch_name
      default_branch_protection_defaults
      default_ci_config_path
      default_group_visibility
      default_projects_limit
      delete_unconfirmed_users
      diff_max_files
      diff_max_lines
      domain_denylist
      domain_denylist_enabled
      downstream_pipeline_trigger_limit_per_project_user_sha
      duo_workflow
      duo_workflow_oauth_application_id
      eks_access_key_id
      eks_account_id
      eks_integration_enabled
      elasticsearch_aws_access_key
      elasticsearch_client_request_timeout
      elasticsearch_indexed_field_length_limit
      elasticsearch_indexing
      elasticsearch_limit_indexing
      elasticsearch_max_code_indexing_concurrency
      elasticsearch_requeue_workers
      elasticsearch_search
      elasticsearch_url
      elasticsearch_username
      elasticsearch_worker_number_of_shards
      email_additional_text
      email_confirmation_setting
      email_restrictions
      email_restrictions_enabled
      enabled_git_access_protocol
      encrypted_akismet_api_key
      encrypted_arkose_labs_client_secret
      encrypted_arkose_labs_client_xid
      encrypted_arkose_labs_data_exchange_key
      encrypted_arkose_labs_private_api_key
      encrypted_arkose_labs_public_api_key
      encrypted_asset_proxy_secret_key
      encrypted_ci_jwt_signing_key
      encrypted_cube_api_key
      encrypted_customers_dot_jwt_signing_key
      encrypted_database_grafana_api_key
      encrypted_eks_secret_access_key
      encrypted_elasticsearch_aws_secret_access_key
      encrypted_elasticsearch_password
      encrypted_external_pipeline_validation_service_token
      encrypted_lets_encrypt_private_key
      encrypted_mailgun_signing_key
      encrypted_product_analytics_configurator_connection_string
      encrypted_recaptcha_private_key
      encrypted_recaptcha_site_key
      encrypted_secret_detection_token_revocation_token
      encrypted_slack_app_secret
      encrypted_slack_app_signing_secret
      encrypted_slack_app_verification_token
      encrypted_spam_check_api_key
      encrypted_telesign_api_key
      encrypted_telesign_customer_xid
      enforce_terms
      error_tracking_access_token_encrypted
      error_tracking_api_url
      error_tracking_enabled
      external_authorization_service_default_label
      external_authorization_service_url
      external_pipeline_validation_service_timeout
      external_pipeline_validation_service_url
      geo_status_timeout
      gitpod_enabled
      globally_allowed_ips
      gravatar_enabled
      health_check_access_token
      help_page_documentation_base_url
      help_page_support_url
      help_page_text
      home_page_url
      identity_verification_settings
      import_sources
      importers
      integrations
      invisible_captcha_enabled
      issues_create_limit
      jira_connect_application_key
      jira_connect_proxy_url
      jira_connect_public_key_storage_enabled
      lets_encrypt_notification_email
      lets_encrypt_terms_of_service_accepted
      local_markdown_version
      mailgun_events_enabled
      maven_package_requests_forwarding
      max_artifacts_size
      max_export_size
      max_import_size
      max_pages_custom_domains_per_project
      max_pages_size
      metrics_enabled
      metrics_method_call_threshold
      metrics_packet_size
      metrics_port
      mirror_capacity_threshold
      mirror_max_capacity
      mirror_max_delay
      namespace_storage_forks_cost_factor
      notes_create_limit
      notes_create_limit_allowlist
      outbound_local_requests_whitelist
      package_registry
      pages
      password_authentication_enabled_for_web
      performance_bar_allowed_group_id
      pipeline_limit_per_project_user_sha
      plantuml_enabled
      plantuml_url
      pre_receive_secret_detection_enabled
      product_analytics_data_collector_host
      product_analytics_enabled
      productivity_analytics_start_date
      prometheus_alert_db_indicators_settings
      push_rule_id
      rate_limiting_response_text
      rate_limits
      rate_limits_unauthenticated_git_http
      recaptcha_enabled
      receive_max_input_size
      repository_size_limit
      repository_storages
      repository_storages_weighted
      require_admin_approval_after_user_signup
      require_admin_two_factor_authentication
      restricted_visibility_levels
      runners_registration_token
      runners_registration_token_encrypted
      search_rate_limit
      search_rate_limit_allowlist
      secret_detection_revocation_token_types_url
      secret_detection_token_revocation_enabled
      secret_detection_token_revocation_url
      security_policies
      security_policy_global_group_approvers_enabled
      security_policy_scheduled_scans_max_concurrency
      security_txt_content
      sentry_clientside_dsn
      sentry_clientside_traces_sample_rate
      sentry_dsn
      sentry_enabled
      sentry_environment
      service_ping_settings
      shared_runners_minutes
      shared_runners_text
      sidekiq_job_limiter_limit_bytes
      sign_in_restrictions
      signup_enabled
      silent_admin_exports_enabled
      slack_app_enabled
      slack_app_id
      snowplow_app_id
      snowplow_collector_hostname
      snowplow_cookie_domain
      snowplow_enabled
      sourcegraph_enabled
      sourcegraph_url
      spam_check_endpoint_enabled
      spam_check_endpoint_url
      static_objects_external_storage_auth_token_encrypted
      static_objects_external_storage_url
      throttle_authenticated_api_period_in_seconds
      throttle_authenticated_api_requests_per_period
      throttle_authenticated_deprecated_api_period_in_seconds
      throttle_authenticated_web_period_in_seconds
      throttle_authenticated_web_requests_per_period
      throttle_incident_management_notification_enabled
      throttle_protected_paths_enabled
      throttle_unauthenticated_api_enabled
      throttle_unauthenticated_api_period_in_seconds
      throttle_unauthenticated_api_requests_per_period
      throttle_unauthenticated_deprecated_api_requests_per_period
      throttle_unauthenticated_enabled
      throttle_unauthenticated_git_http_enabled
      throttle_unauthenticated_git_http_period_in_seconds
      throttle_unauthenticated_git_http_requests_per_period
      throttle_unauthenticated_period_in_seconds
      throttle_unauthenticated_requests_per_period
      time_tracking_limit_to_hours
      unconfirmed_users_delete_after_days
      unique_ips_limit_per_user
      unique_ips_limit_time_window
      updated_at
      usage_stats_set_by_user_id
      use_clickhouse_for_analytics
      user_default_internal_regex
      users_get_by_id_limit_allowlist
      uuid
      vertex_ai_project
      web_ide_oauth_application_id
      zoekt_cpu_to_tasks_ratio
      zoekt_indexing_enabled
      zoekt_search_enabled
      zoekt_settings
    ].freeze
    # rubocop:enable Naming/InclusiveLanguage

    def initialize(hash)
      super(hash)
      self[:encrypted] = column.start_with?('encrypted_') || column.end_with?('_encrypted')
      self[:attr] = column.delete_prefix('encrypted_').delete_suffix('_encrypted')
      self[:gitlab_com_different_than_default] = GITLAB_COM_DIFFERENT_THAN_DEFAULT.include?(column)
      populate_fields_from_definition!
    end

    def populate_fields_from_definition!
      definition.each do |k, v|
        next if v.nil?
        next if CODEBASE_FIELDS.include?(k.to_sym)

        self[k] = v
      end
    end

    def definition_file_path
      File.expand_path("../../config/application_setting_columns/#{attr}.yml", __dir__)
    end

    def definition_file_exist?
      File.exist?(definition_file_path)
    end

    private

    def definition
      @definition ||= definition_file_exist? ? YAML.safe_load_file(definition_file_path) : {}
    end
  end

  ApplicationSettingApiDoc = Struct.new(:attr, :db_type, :api_type, :required, :description, keyword_init: true)

  ENUM_ATTRIBUTES = %w[
    default_group_visibility
    default_project_visibility
    default_snippet_visibility
    email_confirmation_setting
    performance_bar_allowed_group_id
    sidekiq_job_limiter_mode
    whats_new_variant
  ].freeze
  API_TYPE_STRING_OR_ARRAY_OF_STRING = ['string', 'array of strings', 'string or array of strings'].freeze
  API_TYPE_ARRAY_OF_INTEGER = ['array of integers'].freeze
  API_TYPE_INTEGER = ['integer'].freeze
  API_TYPE_FLOAT = ['float'].freeze
  DB_TYPE_TO_COMPATIBLE_API_TYPES = {
    'character' => API_TYPE_STRING_OR_ARRAY_OF_STRING,
    'text' => API_TYPE_STRING_OR_ARRAY_OF_STRING,
    'text[]' => API_TYPE_STRING_OR_ARRAY_OF_STRING,
    'bytea' => API_TYPE_STRING_OR_ARRAY_OF_STRING,
    'integer[]' => API_TYPE_ARRAY_OF_INTEGER,
    'smallint[]' => API_TYPE_ARRAY_OF_INTEGER,
    'jsonb' => ['hash', 'hash of strings to integers', 'object'],
    'smallint' => API_TYPE_INTEGER,
    'bigint' => API_TYPE_INTEGER,
    'double' => API_TYPE_FLOAT,
    'numeric' => API_TYPE_FLOAT
  }.freeze

  DB_STRUCTURE_FILE_PATH = File.expand_path('../../db/structure.sql', __dir__)
  CREATE_TABLE_REGEX = /CREATE TABLE application_settings \((?<columns>.+?)\);/m
  JIHU_COMMENT_REGEX = /COMMENT ON COLUMN application_settings.(?<column>\w+) IS 'JiHu-specific column';/
  IGNORED_COLUMNS_REGEX = %r{
    ^(
      encrypted_\w+_iv # ignore encryption-related extra columns
      |
      \w+_html # ignore Markdown-caching extra columns
      |
      # this is a legacy column, but we want to reference the
      # runners_registration_token_encrypted column instead
      runners_registration_token
    )$
  }x
  DEFAULT_REGEX = /DEFAULT (?<default>[^\s,]+)/

  DOC_API_SETTINGS_FILE_PATH = File.expand_path('../../doc/api/settings.md', __dir__)
  DOC_API_SETTINGS_TABLE_REGEX = Regexp.new(
    "## List of settings that can be accessed via API calls(?:.*?)(?:--\|\n)+?(?<rows>.+)" \
      "### Configure inactive project deletion", Regexp::MULTILINE
  )

  DOC_PAGE_HEADERS = [
    "---",
    "stage: Data Stores",
    "group: Tenant Scale",
    "info: Analysis of Application Settings for Cells 1.0.",
    "---",
    "# Application Settings analysis\n",
    "## Statistics\n"
  ].freeze

  def self.definition_files
    @definition_files ||= Dir.glob(File.expand_path("../../config/application_setting_columns/*.yml", __dir__))
  end

  def initialize(stdout: $stdout)
    @stdout = stdout
  end

  def execute
    warn_about_virtual_attributes!
    write_attributes!
    write_documentation_page!
    clean_outdated_definition_files!
  end

  def attributes
    @attributes ||= begin
      structure_sql = File.read(DB_STRUCTURE_FILE_PATH)
      match = structure_sql.match(CREATE_TABLE_REGEX)
      jihu_columns = structure_sql.scan(JIHU_COMMENT_REGEX).flatten
      structure_columns = match[:columns].lines(chomp: true).map(&:strip).reject do |line|
        line.empty? || line.start_with?('CONSTRAINT')
      end.sort

      structure_columns.filter_map do |line|
        # Example lines:
        # throttle_authenticated_packages_api_requests_per_period integer DEFAULT 1000 NOT NULL
        # valid_runner_registrars character varying[] DEFAULT '{project,group}'::character varying[]
        column, db_type = line.chomp(',').split(' ').map(&:strip)
        next if column.match?(IGNORED_COLUMNS_REGEX)

        default_match = line.match(DEFAULT_REGEX)&.values_at(:default)&.first

        ApplicationSetting.new(column: column, db_type: db_type, not_null: line.include?('NOT NULL'),
          default: default_match, jihu: jihu_columns.include?(column)).tap do |as_attr|
          as_attr.api_type, as_attr.description = fetch_type_and_description_from_api_documentation(as_attr)
        end
      end
    end.sort_by(&:attr)
  end

  private

  attr_reader :stdout, :application_setting_attrs

  def documentation_api_settings
    @documentation_api_settings ||= begin
      settings_md = File.read(DOC_API_SETTINGS_FILE_PATH)
      match = settings_md.match(DOC_API_SETTINGS_TABLE_REGEX)
      doc_rows = match[:rows].lines(chomp: true).map(&:strip).filter_map do |line|
        line.delete_prefix("| ") if line.start_with?('| `')
      end.sort

      doc_rows.map do |line|
        attr, api_type, required, description = line.split('|').map(&:strip)
        attr.delete!('`')

        ApplicationSettingApiDoc.new(attr: attr, api_type: api_type, required: required, description: description)
      end
    end
  end

  def fetch_type_and_description_from_api_documentation(as_attr)
    existing_attribute_from_doc_api_settings = documentation_api_settings.find do |api|
      api.attr == as_attr.attr
    end
    return unless existing_attribute_from_doc_api_settings

    compatible_api_types = DB_TYPE_TO_COMPATIBLE_API_TYPES.fetch(as_attr.db_type, [as_attr.db_type])
    if ENUM_ATTRIBUTES.include?(as_attr.attr) && compatible_api_types.include?('integer')
      compatible_api_types = ['string']
    end

    unless compatible_api_types.include?(existing_attribute_from_doc_api_settings.api_type)
      raise "`#{as_attr.attr}`: Documented type `#{existing_attribute_from_doc_api_settings.api_type}` " \
        "isn't compatible with actual DB type `#{as_attr.db_type}`!"
    end

    [existing_attribute_from_doc_api_settings.api_type, existing_attribute_from_doc_api_settings.description]
  end

  def warn_about_virtual_attributes!
    db_structure_attrs = attributes.map(&:attr)
    virtual_api_settings = documentation_api_settings.reject { |api| db_structure_attrs.include?(api.attr) }
    virtual_api_settings.each do |virtual_api_setting|
      stdout.puts "API setting `#{virtual_api_setting.attr}` doesn't actually exist as a DB " \
        "column in `application_settings`!"
    end
  end

  def write_attributes!
    attributes.each do |final_attribute|
      File.write(
        final_attribute.definition_file_path,
        Hash[final_attribute.to_h.sort].transform_keys(&:to_s).to_yaml
      )
    end
  end

  def clean_outdated_definition_files!
    valid_attribute_names = attributes.map(&:attr)

    self.class.definition_files.each do |path|
      attribute_name = File.basename(path, '.yml')
      next if valid_attribute_names.include?(attribute_name)

      stdout.puts "Deleting #{path} since the #{attribute_name} attribute doesn't exist anymore."
      File.unlink(path)
    end
  end

  def write_documentation_page! # rubocop:disable Metrics/AbcSize: -- The method generates a doc page so it's a bit special
    doc_page = DOC_PAGE_HEADERS.dup

    doc_page << "- Number of attributes: #{attributes.count}"

    as_encrypted = attributes.count(&:encrypted)
    doc_page << "- Number of encrypted attributes: #{as_encrypted} " \
      "(#{(as_encrypted.to_f / attributes.count).round(2) * 100}%)"

    as_documented = attributes.count(&:description)
    doc_page << "- Number of attributes documented: #{as_documented} " \
      "(#{(as_documented.to_f / attributes.count).round(2) * 100}%)"

    as_on_gitlab_com_different_than_default = attributes.count(&:gitlab_com_different_than_default)
    doc_page << "- Number of attributes on GitLab.com different from the defaults: " \
      "#{as_on_gitlab_com_different_than_default} " \
      "(#{(as_on_gitlab_com_different_than_default.to_f / attributes.count).round(2) * 100}%)"

    as_with_clusterwide_set = attributes.count { |as| !as.clusterwide.nil? }
    doc_page << "- Number of attributes with `clusterwide` set: #{as_with_clusterwide_set} " \
      "(#{(as_with_clusterwide_set.to_f / attributes.count).round(2) * 100}%)"

    as_with_clusterwide_true = attributes.count(&:clusterwide)
    doc_page << "- Number of attributes with `clusterwide: true` set: #{as_with_clusterwide_true} " \
      "(#{(as_with_clusterwide_true.to_f / attributes.count).round(2) * 100}%)\n"

    doc_page << "## Individual columns\n"
    doc_page << "| Attribute name | Encrypted | DB Type | API Type | Not Null? | Default | " \
      "GitLab.com != default | Cluster-wide? | Documented? |"
    doc_page << "| -------------- | ------------- | --------- | --------- | ----------------- | " \
      "--------------------- | ------------- | ----------- |"

    attributes.each do |as|
      jihu = as.jihu ? ' [JIHU]' : ''
      doc_page << "| `#{as.attr}`#{jihu} | `#{as.encrypted}` | `#{as.db_type}` | `#{as.api_type}` | `#{as.not_null}` " \
        "| `#{as.default || (as.not_null ? '???' : 'null')}` | `#{as.gitlab_com_different_than_default}` " \
        "| `#{as.clusterwide.nil? ? '???' : as.clusterwide}`| `#{!!as.description}` |"
    end

    doc_page << '' # trailing line

    File.write(File.expand_path("../../doc/development/cells/application_settings_analysis.md", __dir__),
      doc_page.join("\n"))
  end
end

ApplicationSettingsAnalysis.new.execute if $PROGRAM_NAME == __FILE__
