File: client_side_detection_service.h

package info (click to toggle)
chromium 139.0.7258.127-1
links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (432 lines) | stat: -rw-r--r-- 18,918 bytes
parent folder | download | duplicates (3)
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Helper class which handles communication with the SafeBrowsing backends for
// client-side phishing detection.  This class is used to fetch the client-side
// model and send it to all renderers.  This class is also used to send a ping
// back to Google to verify if a particular site is really phishing or not.
//
// This class is not thread-safe and expects all calls to be made on the UI
// thread.  We also expect that the calling thread runs a message loop.

#ifndef COMPONENTS_SAFE_BROWSING_CONTENT_BROWSER_CLIENT_SIDE_DETECTION_SERVICE_H_
#define COMPONENTS_SAFE_BROWSING_CONTENT_BROWSER_CLIENT_SIDE_DETECTION_SERVICE_H_

#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>

#include "base/containers/flat_map.h"
#include "base/containers/queue.h"
#include "base/functional/callback_forward.h"
#include "base/gtest_prod_util.h"
#include "base/memory/read_only_shared_memory_region.h"
#include "base/memory/ref_counted.h"
#include "base/memory/weak_ptr.h"
#include "base/scoped_multi_source_observation.h"
#include "base/sequence_checker.h"
#include "base/time/time.h"
#include "components/keyed_service/core/keyed_service.h"
#include "components/optimization_guide/core/optimization_guide_model_executor.h"
#include "components/optimization_guide/proto/common_types.pb.h"
#include "components/optimization_guide/proto/features/scam_detection.pb.h"
#include "components/prefs/pref_change_registrar.h"
#include "components/safe_browsing/content/browser/client_side_phishing_model.h"
#include "components/safe_browsing/core/common/proto/csd.pb.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/render_process_host_creation_observer.h"
#include "content/public/browser/render_process_host_observer.h"
#include "net/base/ip_address.h"
#include "net/http/http_status_code.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "url/gurl.h"

namespace network {
class SimpleURLLoader;
class SharedURLLoaderFactory;
}  // namespace network

namespace safe_browsing {
class ClientPhishingRequest;
class ClientSideDetectionHost;

// Enum used to keep stats on classification using threshold comparison.
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class SBClientDetectionClassifyThresholdsResult {
  kSuccess = 0,
  kModelSizeMismatch = 1,
  kModelLabelNotFound = 2,
  kMaxValue = kModelLabelNotFound,
};

using ScamDetectionRequest = optimization_guide::proto::ScamDetectionRequest;
using ScamDetectionResponse = optimization_guide::proto::ScamDetectionResponse;

// Main service which pushes models to the renderers, responds to classification
// requests. This owns two ModelLoader objects.
class ClientSideDetectionService
    : public KeyedService,
      public content::RenderProcessHostCreationObserver,
      public content::RenderProcessHostObserver {
 public:
  // void(GURL phishing_url, bool is_phishing,
  // std::optional<net::HttpStatusCode> response_code,
  // std::optional<IntelligentScanVerdict> intelligent_scan_verdict).
  typedef base::OnceCallback<void(GURL,
                                  bool,
                                  std::optional<net::HttpStatusCode>,
                                  std::optional<IntelligentScanVerdict>)>
      ClientReportPhishingRequestCallback;

  // Delegate which allows to provide embedder specific implementations.
  class Delegate {
   public:
    virtual ~Delegate() = default;

    // Returns the pref service associated with the current profile.
    virtual PrefService* GetPrefs() = 0;
    // Returns the main URLLoaderFactory.
    virtual scoped_refptr<network::SharedURLLoaderFactory>
    GetURLLoaderFactory() = 0;
    virtual scoped_refptr<network::SharedURLLoaderFactory>
    GetSafeBrowsingURLLoaderFactory() = 0;
    virtual bool ShouldSendModelToBrowserContext(
        content::BrowserContext* context) = 0;
    // Starts listening to the on-device model update through OptimizationGuide.
    // A check will be made in the delegate to confirm that it's not listening
    // for availability before subscribing. This will be called when the user
    // preferences change and the user is subscribed to Enhanced Safe Browsing.
    virtual void StartListeningToOnDeviceModelUpdate() = 0;
    // Stops listening to the on-device model update through OptimizationGuide.
    // A check is handled in the delegate if the user is already stopped
    // listening for on-device model updates.
    virtual void StopListeningToOnDeviceModelUpdate() = 0;
    // Returns the on-device model session which allows us to execute the model.
    virtual std::unique_ptr<
        optimization_guide::OptimizationGuideModelExecutor::Session>
    GetModelExecutorSession() = 0;
    virtual void LogOnDeviceModelEligibilityReason() = 0;
  };

  ClientSideDetectionService(
      std::unique_ptr<Delegate> delegate,
      optimization_guide::OptimizationGuideModelProvider* opt_guide);

  ClientSideDetectionService(const ClientSideDetectionService&) = delete;
  ClientSideDetectionService& operator=(const ClientSideDetectionService&) =
      delete;

  ~ClientSideDetectionService() override;

  void Shutdown() override;

  bool enabled() const {
    DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
    return enabled_;
  }

  void OnURLLoaderComplete(network::SimpleURLLoader* url_loader,
                           base::Time start_time,
                           std::unique_ptr<std::string> response_body);

  // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
  // The URL scheme of the |url()| in the request should be HTTP.  This method
  // takes ownership of the |verdict| as well as the |callback| and calls the
  // the callback once the result has come back from the server or if an error
  // occurs during the fetch.  If the service is disabled or an error occurs the
  // phishing verdict will always be false.  The callback is always called after
  // SendClientReportPhishingRequest() returns and on the same thread as
  // SendClientReportPhishingRequest() was called.  You may set |callback| to
  // NULL if you don't care about the server verdict.  If |access_token| is not
  // empty, it is set in the "Authorization: Bearer" header.
  virtual void SendClientReportPhishingRequest(
      std::unique_ptr<ClientPhishingRequest> verdict,
      ClientReportPhishingRequestCallback callback,
      const std::string& access_token);

  // Returns true if the given IP address falls within a private
  // (unroutable) network block.  Pages which are hosted on these IP addresses
  // are exempt from client-side phishing detection.  This is called by the
  // ClientSideDetectionHost prior to sending the renderer a
  // SafeBrowsingMsg_StartPhishingDetection IPC.
  virtual bool IsPrivateIPAddress(const net::IPAddress& address) const;

  // Returns true if the given IP address does not refer to remote content. For
  // example, local files and chrome:// pages will create navigations that
  // return true.
  virtual bool IsLocalResource(const net::IPAddress& address) const;

  // Returns true and sets is_phishing if url is in the cache and valid.
  virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);

  // Returns true if we have sent at least kMaxReportsPerInterval phishing
  // reports in the last kReportsInterval.
  virtual bool AtPhishingReportLimit();

  // Sends a model to each renderer.
  virtual void SendModelToRenderers();

  // Returns the model type (protobuf or flatbuffer). Virtual so that mock
  // implementation can override it.
  virtual CSDModelType GetModelType();

  // Returns the ReadOnlySharedMemoryRegion for the flatbuffer model. Virtual so
  // that mock implementation can override it.
  virtual base::ReadOnlySharedMemoryRegion GetModelSharedMemoryRegion();

  // Returns the TfLite model file. Virtual so that mock implementation can
  // override it.
  virtual const base::File& GetVisualTfLiteModel();

  // Returns the Image Embedding model file. Virtual so that mock implementation
  // can override it.
  virtual const base::File& GetImageEmbeddingModel();

  virtual bool IsModelMetadataImageEmbeddingVersionMatching();

  // Returns the visual TFLite model thresholds from the model class
  virtual const base::flat_map<std::string, TfLiteModelMetadata::Threshold>&
  GetVisualTfLiteModelThresholds();

  // Compare the scores from classification to TFLite model thresholds
  virtual void ClassifyPhishingThroughThresholds(
      ClientPhishingRequest* verdict);

  // Overrides the SharedURLLoaderFactory
  void SetURLLoaderFactoryForTesting(
      scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory);

  // Sends a model to each renderer.
  void SetPhishingModel(content::RenderProcessHost* rph,
                        bool new_renderer_process_host);

  // Returns a WeakPtr for this service.
  base::WeakPtr<ClientSideDetectionService> GetWeakPtr();

  // Checks whether the model class has a model available or not. Virtual so
  // that mock classes can override it.
  virtual bool IsModelAvailable();

  // Checks whether the model class has an image embedding model available or
  // not.
  bool HasImageEmbeddingModel();

  // For testing the model in browser test.
  void SetModelAndVisualTfLiteForTesting(const base::FilePath& model,
                                         const base::FilePath& visual_tf_lite);

  bool IsSubscribedToImageEmbeddingModelUpdates();

  base::CallbackListSubscription RegisterCallbackForModelUpdates(
      base::RepeatingClosure callback);

  // Returns the trigger model version to be used in cache for CSD-Phishing
  // debugging metadata.
  int GetTriggerModelVersion();

  // Called from the delegate when the on-device model is available to create a
  // session.
  void NotifyOnDeviceModelAvailable();

  // Returns |on_device_model_available_| which indicates the availability of
  // on-device model session creation. Also logs failed eligibility reason
  // histograms if |log_failed_eligibility_reason| is true.
  bool IsOnDeviceModelAvailable(bool log_failed_eligibility_reason);

  // Resets the session that's created by the on-device model. This occurs when
  // there is a new page navigation and at the start and end of
  // |InquireOnDeviceModel|.
  void ResetOnDeviceSession(bool inquiry_complete);

  // Called from the host class when the proper requirements are met to inquire
  // the on-device model.
  virtual void InquireOnDeviceModel(
      std::string rendered_texts,
      base::OnceCallback<
          void(std::optional<optimization_guide::proto::ScamDetectionResponse>)>
          callback);

  // For testing the on-device model flow in unit test.
  void SetOnDeviceAvailabilityForTesting(bool available);

 private:
  friend class ClientSideDetectionServiceTest;
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           SetEnabledAndRefreshState);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           ServiceObjectDeletedBeforeCallbackDone);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           SendClientReportPhishingRequest);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           GetNumReportTestWhenPrefsPreloaded);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, GetNumReportTest);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, GetNumReportTestESB);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           TestModelFollowsPrefs);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           TestOnDeviceModelFetchCall);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           TestSessionCreationFailure);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           TestSessionCreationSuccess);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           TestSessionExecutionSuccess);
  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
                           TestSessionExecutionFailure);

  // CacheState holds all information necessary to respond to a caller without
  // actually making a HTTP request.
  struct CacheState {
    bool is_phishing;
    base::Time timestamp;

    CacheState(bool phish, base::Time time);
  };

  static const char kClientReportPhishingUrl[];
  static const int kMaxReportsPerInterval;
  static const int kReportsIntervalDays;
  static const int kNegativeCacheIntervalDays;
  static const int kPositiveCacheIntervalMinutes;

  // Called when the prefs have changed in a way we may need to respond to. May
  // enable or disable the service and refresh the state of all renderers.
  // Disabling cancels any pending requests; existing ClientSideDetectionHosts
  // will have their callbacks called with "false" verdicts.  Enabling starts
  // downloading the model after a delay.  In all cases, each render process is
  // updated to match the state
  void OnPrefsUpdated();

  // Unsubscribes to model subscriptions. Currently we unsubscribe to the image
  // embedding model as well as the on device model depending on user
  // preferences.
  void UnsubscribeToModelSubscription();

  // Starts sending the request to the client-side detection frontends.
  // This method takes ownership of both pointers.
  void StartClientReportPhishingRequest(
      std::unique_ptr<ClientPhishingRequest> request,
      ClientReportPhishingRequestCallback callback,
      const std::string& access_token);

  // Called by OnURLFetchComplete to handle the server response from
  // sending the client-side phishing request.
  void HandlePhishingVerdict(network::SimpleURLLoader* source,
                             const GURL& url,
                             int net_error,
                             std::optional<net::HttpStatusCode> response_code,
                             const std::string& data);

  // Invalidate cache results which are no longer useful.
  void UpdateCache();

  // Get the number of phishing reports that we have sent over kReportsInterval.
  int GetPhishingNumReports();

  // Returns true if we can successfully add a phishing report to
  // |phishing_report_times_| and stores the result in prefs. Returns false if
  // we're at the ping limit or prefs is null.
  bool AddPhishingReport(base::Time timestamp);

  // Populates |phishing_report_times_| with the data stored in local prefs.
  void LoadPhishingReportTimesFromPrefs();

  // Returns the URL that will be used for phishing requests.
  static GURL GetClientReportUrl(const std::string& report_url);

  // content::RenderProcessHostCreationObserver:
  void OnRenderProcessHostCreated(content::RenderProcessHost* rph) override;

  //  content::RenderProcessHostObserver
  void RenderProcessHostDestroyed(content::RenderProcessHost* rph) override;
  void RenderProcessReady(content::RenderProcessHost* rph) override;

  void ModelExecutionCallback(
      optimization_guide::OptimizationGuideModelStreamingExecutionResult
          result);

  // Whether the service is running or not.  When the service is not running,
  // it won't download the model nor report detected phishing URLs.
  bool enabled_ = false;

  // Whether the service is in extended reporting mode or not. This affects the
  // choice of model.
  bool extended_reporting_ = false;

  // Whether the trigger models have been sent or not. This is used to determine
  // whether an empty model in the model class determines whether the models
  // haven't been sent or we should clear the models in the scorer because they
  // have been sent.
  bool sent_trigger_models_ = false;

  // This is to keep track of the trigger model version that was last sent to
  // the renderer host processes. This is used to determine, when the image
  // embedding model arrives, whether a new scorer should be made with all
  // models or the image embedding model can be attached to the current scorer.
  // This is also used to add to CSD-Phishing debugging metadata to PhishGuard
  // pings.
  int trigger_model_version_ = 0;

  // Map of client report phishing request to the corresponding callback that
  // has to be invoked when the request is done.
  struct ClientPhishingReportInfo;
  std::map<const network::SimpleURLLoader*,
           std::unique_ptr<ClientPhishingReportInfo>>
      client_phishing_reports_;

  // Cache of completed requests. Used to satisfy requests for the same urls
  // as long as the next request falls within our caching window (which is
  // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
  // size of this cache is limited by kMaxReportsPerDay *
  // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
  // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
  std::map<GURL, std::unique_ptr<CacheState>> cache_;

  // Timestamp of when we sent a phishing request. Used to limit the number
  // of phishing requests that we send in a day.
  std::deque<base::Time> phishing_report_times_;

  // The URLLoaderFactory we use to issue network requests.
  scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_;

  // PrefChangeRegistrar used to track when the Safe Browsing pref changes.
  PrefChangeRegistrar pref_change_registrar_;

  std::unique_ptr<Delegate> delegate_;

  base::CallbackListSubscription update_model_subscription_;

  std::unique_ptr<ClientSidePhishingModel> client_side_phishing_model_;
  base::ScopedMultiSourceObservation<content::RenderProcessHost,
                                     content::RenderProcessHostObserver>
      observed_render_process_hosts_{this};

  // This is used to check before fetching the session when the correct trigger
  // is called to generate the on-device model LLM. This is set through the
  // delegate.
  bool on_device_model_available_ = false;

  base::TimeTicks session_execution_start_time_;
  // The underlying session provided by optimization guide component.
  std::unique_ptr<optimization_guide::OptimizationGuideModelExecutor::Session>
      session_;
  base::OnceCallback<void(
      std::optional<optimization_guide::proto::ScamDetectionResponse>)>
      inquire_on_device_model_callback_;

  SEQUENCE_CHECKER(sequence_checker_);

  // Used to asynchronously call the callbacks for
  // SendClientReportPhishingRequest.
  base::WeakPtrFactory<ClientSideDetectionService> weak_factory_{this};
};

}  // namespace safe_browsing

#endif  // COMPONENTS_SAFE_BROWSING_CONTENT_BROWSER_CLIENT_SIDE_DETECTION_SERVICE_H_