# frozen_string_literal: true
# rubocop:todo all

require 'spec_helper'

describe 'SDAM error handling' do
  require_topology :single, :replica_set, :sharded
  require_mri

  clean_slate

  retry_test

  after do
    # Close all clients after every test to avoid leaking expectations into
    # subsequent tests because we set global assertions on sockets.
    ClientRegistry.instance.close_all_clients
  end

  # These tests operate on specific servers, and don't work in a multi
  # shard cluster where multiple servers are equally eligible
  require_no_multi_mongos

  let(:diagnostic_subscriber) { Mrss::VerboseEventSubscriber.new }

  let(:client) do
    new_local_client(SpecConfig.instance.addresses,
      SpecConfig.instance.all_test_options.merge(
        socket_timeout: 3, connect_timeout: 3,
        heartbeat_frequency: 100,
        populator_io: false,
        # Uncomment to print all events to stdout:
        #sdam_proc: Utils.subscribe_all_sdam_proc(diagnostic_subscriber),
        **Utils.disable_retries_client_options)
    )
  end

  let(:server) { client.cluster.next_primary }

  shared_examples_for 'marks server unknown' do
    before do
      server.monitor.stop!
    end

    after do
      client.close
    end

    it 'marks server unknown' do
      expect(server).not_to be_unknown
      RSpec::Mocks.with_temporary_scope do
        operation
        expect(server).to be_unknown
      end
    end
  end

  shared_examples_for 'does not mark server unknown' do
    before do
      server.monitor.stop!
    end

    after do
      client.close
    end

    it 'does not mark server unknown' do
      expect(server).not_to be_unknown
      RSpec::Mocks.with_temporary_scope do
        operation
        expect(server).not_to be_unknown
      end
    end
  end

  shared_examples_for 'requests server scan' do
    it 'requests server scan' do
      RSpec::Mocks.with_temporary_scope do
        expect(server.scan_semaphore).to receive(:signal)
        operation
      end
    end
  end

  shared_examples_for 'does not request server scan' do
    it 'does not request server scan' do
      RSpec::Mocks.with_temporary_scope do
        expect(server.scan_semaphore).not_to receive(:signal)
        operation
      end
    end
  end

  shared_examples_for 'clears connection pool' do
    it 'clears connection pool' do
      generation = server.pool.generation
      RSpec::Mocks.with_temporary_scope do
        operation
        new_generation = server.pool_internal.generation
        expect(new_generation).to eq(generation + 1)
      end
    end
  end

  shared_examples_for 'does not clear connection pool' do
    it 'does not clear connection pool' do
      generation = server.pool.generation
      RSpec::Mocks.with_temporary_scope do
        operation
        new_generation = server.pool_internal.generation
        expect(new_generation).to eq(generation)
      end
    end
  end

  describe 'when there is an error during an operation' do

    before do
      client.cluster.next_primary
      # we also need a connection to the primary so that our error
      # expectations do not get triggered during handshakes which
      # have different behavior from non-handshake errors
      client.database.command(ping: 1)
    end

    let(:operation) do
      expect_any_instance_of(Mongo::Server::Connection).to receive(:deliver).and_return(reply)
      expect do
        client.database.command(ping: 1)
      end.to raise_error(Mongo::Error::OperationFailure, exception_message)
    end

    shared_examples_for 'not master or node recovering' do
      it_behaves_like 'marks server unknown'
      it_behaves_like 'requests server scan'

      context 'server 4.2 or higher' do
        min_server_fcv '4.2'

        it_behaves_like 'does not clear connection pool'
      end

      context 'server 4.0 or lower' do
        max_server_version '4.0'

        it_behaves_like 'clears connection pool'
      end
    end

    shared_examples_for 'node shutting down' do
      it_behaves_like 'marks server unknown'
      it_behaves_like 'requests server scan'
      it_behaves_like 'clears connection pool'
    end

    context 'not master error' do
      let(:exception_message) do
        /not master/
      end

      let(:reply) do
        make_not_master_reply
      end

      it_behaves_like 'not master or node recovering'
    end

    context 'node recovering error' do
      let(:exception_message) do
        /DueToStepDown/
      end

      let(:reply) do
        make_node_recovering_reply
      end

      it_behaves_like 'not master or node recovering'
    end

    context 'node shutting down error' do
      let(:exception_message) do
        /shutdown in progress/
      end

      let(:reply) do
        make_node_shutting_down_reply
      end

      it_behaves_like 'node shutting down'
    end

    context 'network error' do
      # With 4.4 servers we set up two monitoring connections, hence global
      # socket expectations get hit twice.
      max_server_version '4.2'

      let(:operation) do
        expect_any_instance_of(Mongo::Socket).to receive(:read).and_raise(exception)
        expect do
          client.database.command(ping: 1)
        end.to raise_error(exception)
      end

      context 'non-timeout network error' do
        let(:exception) do
          Mongo::Error::SocketError
        end

        it_behaves_like 'marks server unknown'
        it_behaves_like 'does not request server scan'
        it_behaves_like 'clears connection pool'
      end

      context 'network timeout error' do
        let(:exception) do
          Mongo::Error::SocketTimeoutError
        end

        it_behaves_like 'does not mark server unknown'
        it_behaves_like 'does not request server scan'
        it_behaves_like 'does not clear connection pool'
      end
    end
  end

  describe 'when there is an error during connection establishment' do
    require_topology :single

    # The push monitor creates sockets unpredictably and interferes with this
    # test.
    max_server_version '4.2'

    # When TLS is used there are two socket classes and we can't simply
    # mock the base Socket class.
    require_no_tls

    {
      SystemCallError => Mongo::Error::SocketError,
      Errno::ETIMEDOUT => Mongo::Error::SocketTimeoutError,
    }.each do |raw_error_cls, mapped_error_cls|
      context raw_error_cls.name do
        let(:socket) do
          double('mock socket').tap do |socket|
            allow(socket).to receive(:set_encoding)
            allow(socket).to receive(:setsockopt)
            allow(socket).to receive(:getsockopt)
            allow(socket).to receive(:connect)
            allow(socket).to receive(:close)
            socket.should receive(:write).and_raise(raw_error_cls, 'mocked failure')
          end
        end

        it 'marks server unknown' do
          server = client.cluster.next_primary
          pool = client.cluster.pool(server)
          client.cluster.servers.map(&:disconnect!)

          RSpec::Mocks.with_temporary_scope do

            Socket.should receive(:new).with(any_args).ordered.once.and_return(socket)
            allow(pool).to receive(:paused?).and_return(false)
            lambda do
              client.command(ping: 1)
            end.should raise_error(mapped_error_cls, /mocked failure/)

            server.should be_unknown
          end
        end

        it 'recovers' do
          server = client.cluster.next_primary
          # If we do not kill the monitor, the client will recover automatically.

          RSpec::Mocks.with_temporary_scope do

            Socket.should receive(:new).with(any_args).ordered.once.and_return(socket)
            Socket.should receive(:new).with(any_args).ordered.once.and_call_original

            lambda do
              client.command(ping: 1)
            end.should raise_error(mapped_error_cls, /mocked failure/)

            client.command(ping: 1)
          end
        end
      end
    end

    after do
      # Since we stopped monitoring on the client, close it.
      ClientRegistry.instance.close_all_clients
    end
  end

  describe 'when there is an error on monitoring connection' do
    clean_slate_for_all

    let(:subscriber) { Mrss::EventSubscriber.new }

    let(:set_subscribers) do
      client.subscribe(Mongo::Monitoring::SERVER_DESCRIPTION_CHANGED, subscriber)
      client.subscribe(Mongo::Monitoring::CONNECTION_POOL, subscriber)
    end

    let(:operation) do
      expect(server.monitor.connection).not_to be nil
      set_subscribers
      RSpec::Mocks.with_temporary_scope do
        expect(server.monitor).to receive(:check).and_raise(exception)
        server.monitor.scan!
      end
      expect_server_state_change
    end

    shared_examples_for 'marks server unknown - sdam event' do
      it 'marks server unknown' do
        expect(server).not_to be_unknown

        #subscriber.clear_events!
        events = subscriber.select_succeeded_events(Mongo::Monitoring::Event::ServerDescriptionChanged)
        events.should be_empty

        RSpec::Mocks.with_temporary_scope do
          operation

          events = subscriber.select_succeeded_events(Mongo::Monitoring::Event::ServerDescriptionChanged)
          events.should_not be_empty
          event = events.detect do |event|
            event.new_description.address == server.address &&
            event.new_description.unknown?
          end
          event.should_not be_nil
        end
      end
    end

    shared_examples_for 'clears connection pool - cmap event' do
      it 'clears connection pool' do
        #subscriber.clear_events!
        events = subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared)
        events.should be_empty

        RSpec::Mocks.with_temporary_scope do
          operation

          events = subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared)
          events.should_not be_empty
          event = events.detect do |event|
            event.address == server.address
          end
          event.should_not be_nil
        end
      end
    end

    shared_examples_for 'marks server unknown and clears connection pool' do
=begin These tests are not reliable
      context 'via object inspection' do
        let(:expect_server_state_change) do
          server.summary.should =~ /unknown/i
          expect(server).to be_unknown
        end

        it_behaves_like 'marks server unknown'
        it_behaves_like 'clears connection pool'
      end
=end

      context 'via events' do
        # When we use events we do not need to examine object state, therefore
        # it does not matter whether the server stays unknown or gets
        # successfully checked.
        let(:expect_server_state_change) do
          # nothing
        end

        it_behaves_like 'marks server unknown - sdam event'
        it_behaves_like 'clears connection pool - cmap event'
      end
    end

    context 'via stubs' do
      # With 4.4 servers we set up two monitoring connections, hence global
      # socket expectations get hit twice.
      max_server_version '4.2'

      context 'network timeout' do
        let(:exception) { Mongo::Error::SocketTimeoutError }

        it_behaves_like 'marks server unknown and clears connection pool'
      end

      context 'non-timeout network error' do
        let(:exception) { Mongo::Error::SocketError }

        it_behaves_like 'marks server unknown and clears connection pool'
      end
    end

    context 'non-timeout network error via fail point' do
      require_fail_command

      let(:admin_client) { client.use(:admin) }

      let(:set_fail_point) do
        admin_client.command(
          configureFailPoint: 'failCommand',
          mode: {times: 2},
          data: {
            failCommands: %w(isMaster hello),
            closeConnection: true,
          },
        )
      end

      let(:operation) do
        expect(server.monitor.connection).not_to be nil
        set_subscribers
        set_fail_point
        server.monitor.scan!
        expect_server_state_change
      end

      # https://jira.mongodb.org/browse/RUBY-2523
      # it_behaves_like 'marks server unknown and clears connection pool'

      after do
        admin_client.command(configureFailPoint: 'failCommand', mode: 'off')
      end
    end
  end

  context "when there is an error on the handshake" do
    # require appName for fail point
    min_server_version "4.9"

    let(:admin_client) do
      new_local_client(
        [SpecConfig.instance.addresses.first],
        SpecConfig.instance.test_options.merge({
          connect: :direct,
          populator_io: false,
          direct_connection: true,
          app_name: "SDAMMinHeartbeatFrequencyTest",
          database: 'admin'
        })
      )
    end

    let(:cmd_client) do
      # Change the server selection timeout so that we are given a new cluster.
      admin_client.with(server_selection_timeout: 5)
    end

    let(:set_fail_point) do
      admin_client.command(
        configureFailPoint: 'failCommand',
        mode: { times: 5 },
        data: {
          failCommands: %w(isMaster hello),
          errorCode: 1234,
          appName: "SDAMMinHeartbeatFrequencyTest"
        },
      )
    end

    let(:operation) do
      expect(server.monitor.connection).not_to be nil
      set_fail_point
    end

    it "waits 500ms between failed hello checks" do
      operation
      start = Mongo::Utils.monotonic_time
      cmd_client.command(hello: 1)
      duration = Mongo::Utils.monotonic_time - start
      expect(duration).to be >= 2
      expect(duration).to be <= 3.5

      # The cluster that we use to set up the failpoint should not be the same
      # one we ping on, so that the ping will have to select a server. The admin
      # client has already selected a server.
      expect(admin_client.cluster.object_id).to_not eq(cmd_client.cluster.object_id)
    end

    after do
      admin_client.command(configureFailPoint: 'failCommand', mode: 'off')
      cmd_client.close
    end
  end
end
