File: file_analyzer.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (162 lines) | stat: -rw-r--r-- 5,919 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_SAFE_BROWSING_DOWNLOAD_PROTECTION_FILE_ANALYZER_H_
#define CHROME_BROWSER_SAFE_BROWSING_DOWNLOAD_PROTECTION_FILE_ANALYZER_H_

#include "base/files/file_path.h"
#include "base/functional/callback.h"
#include "base/memory/scoped_refptr.h"
#include "base/memory/weak_ptr.h"
#include "base/time/time.h"
#include "base/types/optional_ref.h"
#include "build/build_config.h"
#include "chrome/common/safe_browsing/archive_analyzer_results.h"
#include "chrome/common/safe_browsing/binary_feature_extractor.h"
#include "components/safe_browsing/core/common/proto/csd.pb.h"
#include "third_party/protobuf/src/google/protobuf/repeated_field.h"

#if !BUILDFLAG(IS_ANDROID)
#include "chrome/services/file_util/public/cpp/sandboxed_rar_analyzer.h"
#include "chrome/services/file_util/public/cpp/sandboxed_seven_zip_analyzer.h"
#include "chrome/services/file_util/public/cpp/sandboxed_zip_analyzer.h"
#endif

#if BUILDFLAG(IS_MAC)
#include "chrome/common/safe_browsing/disk_image_type_sniffer_mac.h"
#include "chrome/services/file_util/public/cpp/sandboxed_dmg_analyzer_mac.h"
#endif

namespace safe_browsing {

// This class does the file content analysis for a user download, extracting the
// features that will be sent to the SB backend. This class lives on the UI
// thread, which is where the result callback will be invoked.
class FileAnalyzer {
 public:
  enum class ArchiveValid { UNSET, VALID, INVALID };

  // This struct holds the possible features extracted from a file.
  struct Results {
    Results();
    Results(const Results& other);
    ~Results();

    // What type of inspection was performed to yield the results here.
    DownloadFileType::InspectionType inspection_performed =
        DownloadFileType::NONE;

    // When analyzing a ZIP or RAR, the type becomes clarified by content
    // inspection (does it contain binaries/archives?). So we return a type.
    ClientDownloadRequest::DownloadType type;

    // For archive files, whether the archive contains an executable. Has
    // unspecified contents for non-archive files.
    bool archived_executable = false;

    // For archive files, whether the archive contains an archive. Has
    // unspecified contents for non-archive files.
    bool archived_archive = false;

    // For archive files, the features extracted from each contained
    // archive/binary.
    google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
        archived_binaries;

    // For executables, information about the signature of the executable.
    ClientDownloadRequest::SignatureInfo signature_info;

    // For executables, information about the file headers.
    ClientDownloadRequest::ImageHeaders image_headers;

#if BUILDFLAG(IS_MAC)
    // For DMG files, the signature of the DMG.
    std::vector<uint8_t> disk_image_signature;

    // For DMG files, any detached code signatures in the DMG.
    google::protobuf::RepeatedPtrField<
        ClientDownloadRequest::DetachedCodeSignature>
        detached_code_signatures;
#endif

    // For archives, the features and metadata extracted from the file.
    ClientDownloadRequest::ArchiveSummary archive_summary;

    // Information about the encryption on this file.
    EncryptionInfo encryption_info;
  };

  explicit FileAnalyzer(
      scoped_refptr<BinaryFeatureExtractor> binary_feature_extractor);
  ~FileAnalyzer();
  void Start(const base::FilePath& target_file_name,
             const base::FilePath& tmp_path,
             base::optional_ref<const std::string> password,
             base::OnceCallback<void(Results)> callback);

 private:
  void StartExtractFileFeatures();
  void OnFileAnalysisFinished(FileAnalyzer::Results results);

#if !BUILDFLAG(IS_ANDROID)
  void StartExtractZipFeatures();
  void OnZipAnalysisFinished(const ArchiveAnalyzerResults& archive_results);

  void StartExtractRarFeatures();
  void OnRarAnalysisFinished(const ArchiveAnalyzerResults& archive_results);
#endif

#if BUILDFLAG(IS_MAC)
  void StartExtractDmgFeatures();
  void ExtractFileOrDmgFeatures(bool download_file_has_koly_signature);
  void OnDmgAnalysisFinished(
      const safe_browsing::ArchiveAnalyzerResults& archive_results);
#endif

#if !BUILDFLAG(IS_ANDROID)
  void StartExtractSevenZipFeatures();
  void OnSevenZipAnalysisFinished(
      const ArchiveAnalyzerResults& archive_results);
#endif

  void LogAnalysisDurationWithAndWithoutSuffix(const std::string& suffix);

  // The ultimate destination/filename for the download. This is used to
  // determine the filetype from the filename extension/suffix, and should be a
  // human-readable filename (i.e. not a content-URI, on Android).
  base::FilePath target_file_name_;

  // The current path to the file contents.
  base::FilePath tmp_path_;

  std::optional<std::string> password_;
  scoped_refptr<BinaryFeatureExtractor> binary_feature_extractor_;
  base::OnceCallback<void(Results)> callback_;
  base::Time start_time_;
  Results results_;

#if !BUILDFLAG(IS_ANDROID)
  std::unique_ptr<SandboxedZipAnalyzer, base::OnTaskRunnerDeleter>
      zip_analyzer_{nullptr, base::OnTaskRunnerDeleter(nullptr)};

  std::unique_ptr<SandboxedRarAnalyzer, base::OnTaskRunnerDeleter>
      rar_analyzer_{nullptr, base::OnTaskRunnerDeleter(nullptr)};
#endif

#if BUILDFLAG(IS_MAC)
  std::unique_ptr<SandboxedDMGAnalyzer, base::OnTaskRunnerDeleter>
      dmg_analyzer_{nullptr, base::OnTaskRunnerDeleter(nullptr)};
#endif

#if !BUILDFLAG(IS_ANDROID)
  std::unique_ptr<SandboxedSevenZipAnalyzer, base::OnTaskRunnerDeleter>
      seven_zip_analyzer_{nullptr, base::OnTaskRunnerDeleter(nullptr)};
#endif

  base::WeakPtrFactory<FileAnalyzer> weakptr_factory_{this};
};

}  // namespace safe_browsing

#endif  // CHROME_BROWSER_SAFE_BROWSING_DOWNLOAD_PROTECTION_FILE_ANALYZER_H_