1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// BrowserFeatureExtractor computes various browser features for client-side
// phishing detection. For now it does a bunch of lookups in the history
// service to see whether a particular URL has been visited before by the
// user.
#ifndef CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
#define CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "base/basictypes.h"
#include "base/callback.h"
#include "base/containers/hash_tables.h"
#include "base/memory/scoped_ptr.h"
#include "base/task/cancelable_task_tracker.h"
#include "base/time/time.h"
#include "chrome/browser/safe_browsing/safe_browsing_service.h"
#include "chrome/browser/safe_browsing/ui_manager.h"
#include "components/history/core/browser/history_types.h"
#include "content/public/common/resource_type.h"
#include "url/gurl.h"
class HistoryService;
namespace content {
class WebContents;
}
namespace safe_browsing {
class ClientMalwareRequest;
class ClientPhishingRequest;
class ClientSideDetectionHost;
struct IPUrlInfo {
// The url on the bad IP address.
std::string url;
std::string method;
std::string referrer;
content::ResourceType resource_type;
IPUrlInfo(const std::string& url,
const std::string& method,
const std::string& referrer,
const content::ResourceType& resource_type);
~IPUrlInfo();
};
typedef std::map<std::string, std::vector<IPUrlInfo> > IPUrlMap;
struct BrowseInfo {
// The URL we're currently browsing.
GURL url;
// List of IPv4 and IPv6 addresses from which content was requested
// together with the hosts on it, while browsing to the |url|.
IPUrlMap ips;
// If a SafeBrowsing interstitial was shown for the current URL
// this will contain the UnsafeResource struct for that URL.
scoped_ptr<SafeBrowsingUIManager::UnsafeResource> unsafe_resource;
// List of redirects that lead to the first page on the current host and
// the current url respectively. These may be the same if the current url
// is the first page on its host.
std::vector<GURL> host_redirects;
std::vector<GURL> url_redirects;
// URL of the referrer of this URL load.
GURL referrer;
// The HTTP status code from this navigation.
int http_status_code;
BrowseInfo();
~BrowseInfo();
};
// All methods of this class must be called on the UI thread (including
// the constructor).
class BrowserFeatureExtractor {
public:
// Called when feature extraction is done. The first argument will be
// true iff feature extraction succeeded. The second argument is the
// phishing request which was modified by the feature extractor. The
// DoneCallback takes ownership of the request object.
typedef base::Callback<void(bool, scoped_ptr<ClientPhishingRequest>)>
DoneCallback;
typedef base::Callback<void(bool, scoped_ptr<ClientMalwareRequest>)>
MalwareDoneCallback;
// The caller keeps ownership of the tab and host objects and is
// responsible for ensuring that they stay valid for the entire
// lifetime of this object.
BrowserFeatureExtractor(content::WebContents* tab,
ClientSideDetectionHost* host);
// The destructor will cancel any pending requests.
virtual ~BrowserFeatureExtractor();
// Begins extraction of the browser features. We take ownership
// of the request object until |callback| is called (see DoneCallback above)
// and will write the extracted features to the feature map. Once the
// feature extraction is complete, |callback| is run on the UI thread. We
// take ownership of the |callback| object. |info| may not be valid after
// ExtractFeatures returns. This method must run on the UI thread.
virtual void ExtractFeatures(const BrowseInfo* info,
ClientPhishingRequest* request,
const DoneCallback& callback);
// Begins extraction of the malware related features. We take ownership
// of the request object until |callback| is called. Once feature extraction
// is complete, |callback| will run on the UI thread. |info| is not expected
// to stay valid after ExtractMalwareFeatures returns. All IPs stored in
// |info| will be cleared by calling this function.
virtual void ExtractMalwareFeatures(BrowseInfo* info,
ClientMalwareRequest* request,
const MalwareDoneCallback& callback);
private:
// Synchronous browser feature extraction.
void ExtractBrowseInfoFeatures(const BrowseInfo& info,
ClientPhishingRequest* request);
// Actually starts feature extraction (does the real work).
void StartExtractFeatures(scoped_ptr<ClientPhishingRequest> request,
const DoneCallback& callback);
// HistoryService callback which is called when we're done querying URL visits
// in the history.
void QueryUrlHistoryDone(scoped_ptr<ClientPhishingRequest> request,
const DoneCallback& callback,
bool success,
const history::URLRow& row,
const history::VisitVector& visits);
// HistoryService callback which is called when we're done querying HTTP host
// visits in the history.
void QueryHttpHostVisitsDone(scoped_ptr<ClientPhishingRequest> request,
const DoneCallback& callback,
bool success,
int num_visits,
base::Time first_visit);
// HistoryService callback which is called when we're done querying HTTPS host
// visits in the history.
void QueryHttpsHostVisitsDone(scoped_ptr<ClientPhishingRequest> request,
const DoneCallback& callback,
bool success,
int num_visits,
base::Time first_visit);
// Helper function which sets the host history features given the
// number of host visits and the time of the fist host visit. Set
// |is_http_query| to true if the URL scheme is HTTP and to false if
// the scheme is HTTPS.
void SetHostVisitsFeatures(int num_visits,
base::Time first_visit,
bool is_http_query,
ClientPhishingRequest* request);
// Helper function which gets the history server if possible. If the pointer
// is set it will return true and false otherwise.
bool GetHistoryService(HistoryService** history);
// Helper function which is called when we're done filtering out benign IPs
// on the IO thread. This function is called on the UI thread.
void FinishExtractMalwareFeatures(scoped_ptr<IPUrlMap> bad_ips,
MalwareDoneCallback callback,
scoped_ptr<ClientMalwareRequest> request);
content::WebContents* tab_;
ClientSideDetectionHost* host_;
base::CancelableTaskTracker cancelable_task_tracker_;
base::WeakPtrFactory<BrowserFeatureExtractor> weak_factory_;
DISALLOW_COPY_AND_ASSIGN(BrowserFeatureExtractor);
};
} // namespace safe_browsing
#endif // CHROME_BROWSER_SAFE_BROWSING_BROWSER_FEATURE_EXTRACTOR_H_
|