File: ieem_sitelist_parser.cc

package info (click to toggle)
chromium 138.0.7204.157-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,864 kB
  • sloc: cpp: 34,936,859; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,967; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (226 lines) | stat: -rw-r--r-- 9,592 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/browser_switcher/ieem_sitelist_parser.h"

#include "base/functional/bind.h"
#include "base/strings/string_util.h"
#include "content/public/browser/browser_thread.h"
#include "services/data_decoder/public/cpp/data_decoder.h"
#include "services/data_decoder/public/cpp/safe_xml_parser.h"
#include "services/data_decoder/public/mojom/xml_parser.mojom.h"

namespace browser_switcher {

namespace {

const char kInvalidRootElement[] = "Invalid XML root element";

const char kSchema1RulesElement[] = "rules";
const char kSchema1EmieElement[] = "emie";
const char kSchema1DocModeElement[] = "docMode";
const char kSchema1DomainElement[] = "domain";
const char kSchema1PathElement[] = "path";
const char kSchema1ExcludeAttribute[] = "exclude";
const char kSchema1DoNotTransitionAttribute[] = "doNotTransition";
const char kSchema1TrueValue[] = "true";

const char kSchema2SiteListElement[] = "site-list";
const char kSchema2SiteElement[] = "site";
const char kSchema2SiteUrlAttribute[] = "url";
const char kSchema2SiteOpenInElement[] = "open-in";

std::vector<const base::Value*> GetChildrenWithTag(const base::Value& node,
                                                   const std::string& tag) {
  std::vector<const base::Value*> children;
  data_decoder::GetAllXmlElementChildrenWithTag(node, tag, &children);
  return children;
}

// Data in a v.1 schema <domain> or <path> element.
struct Entry {
  // URL or path concerned.
  std::string text;
  // True if the exclude attribute is "true".
  bool exclude;
  // True if the doNotTransition attribute is "true".
  bool do_not_transition;
};

Entry ParseDomainOrPath(const base::Value& node, ParsedXml* result) {
  DCHECK(data_decoder::IsXmlElementNamed(node, kSchema1DomainElement) ||
         data_decoder::IsXmlElementNamed(node, kSchema1PathElement));

  Entry entry;

  std::string exclude_attrib =
      data_decoder::GetXmlElementAttribute(node, kSchema1ExcludeAttribute);
  entry.exclude = (exclude_attrib == kSchema1TrueValue);

  std::string do_not_transition_attrib = data_decoder::GetXmlElementAttribute(
      node, kSchema1DoNotTransitionAttribute);
  entry.do_not_transition = (do_not_transition_attrib == kSchema1TrueValue);

  data_decoder::GetXmlElementText(node, &entry.text);
  base::TrimWhitespaceASCII(entry.text, base::TRIM_ALL, &entry.text);

  return entry;
}

// Parses Enterprise Mode schema 1 files according to:
// https://technet.microsoft.com/itpro/internet-explorer/ie11-deploy-guide/enterprise-mode-schema-version-1-guidance
void ParseIeFileVersionOne(const base::Value& xml,
                           ParsingMode parsing_mode,
                           ParsedXml* result) {
  const bool none_is_greylist = parsing_mode == ParsingMode::kIESiteListMode;

  DCHECK(data_decoder::IsXmlElementNamed(xml, kSchema1RulesElement));
  for (const base::Value& node : *data_decoder::GetXmlElementChildren(xml)) {
    // Skip over anything that is not a <emie> or <docMode> element.
    if (!data_decoder::IsXmlElementNamed(node, kSchema1EmieElement) &&
        !data_decoder::IsXmlElementNamed(node, kSchema1DocModeElement)) {
      continue;
    }
    // Loop over <domain> elements.
    for (const base::Value* domain_node :
         GetChildrenWithTag(node, kSchema1DomainElement)) {
      Entry domain = ParseDomainOrPath(*domain_node, result);
      if (!domain.text.empty() && !domain.exclude) {
        if (none_is_greylist) {
          if (domain.do_not_transition) {
            // doNotTransition="true" means greylist.
            result->rules.greylist.push_back(domain.text);
          } else {
            // doNotTransition="false", absent or unrecognized means sitelist.
            result->rules.sitelist.push_back(domain.text);
          }
        } else {
          // TODO(crbug.com/40812726): Remove this else branch, and the
          // kBrowserSwitcherNoneIsGreylist flag, once we're confident this
          // doesn't break customers. This was added in M99.
          std::string prefix = (domain.do_not_transition ? "!" : "");
          result->rules.sitelist.push_back(prefix + domain.text);
        }
      }
      // Loop over <path> elements.
      for (const base::Value* path_node :
           GetChildrenWithTag(*domain_node, kSchema1PathElement)) {
        Entry path = ParseDomainOrPath(*path_node, result);
        if (!path.text.empty() && !domain.text.empty() && !path.exclude) {
          if (none_is_greylist) {
            if (path.do_not_transition) {
              // doNotTransition="true" means greylist.
              result->rules.greylist.push_back(domain.text + path.text);
            } else {
              // doNotTransition="false", absent or unrecognized means sitelist.
              result->rules.sitelist.push_back(domain.text + path.text);
            }
          } else {
            // TODO(crbug.com/40812726): Remove this else branch, and the
            // kBrowserSwitcherNoneIsGreylist flag, once we're confident this
            // doesn't break customers. This was added in M99.
            std::string prefix = (path.do_not_transition ? "!" : "");
            result->rules.sitelist.push_back(prefix + domain.text + path.text);
          }
        }
      }
    }
  }
}

// Parses Enterprise Mode schema 2 files according to:
// https://technet.microsoft.com/itpro/internet-explorer/ie11-deploy-guide/enterprise-mode-schema-version-2-guidance
void ParseIeFileVersionTwo(const base::Value& xml,
                           ParsingMode parsing_mode,
                           ParsedXml* result) {
  const bool none_is_greylist = parsing_mode == ParsingMode::kIESiteListMode;

  DCHECK(data_decoder::IsXmlElementNamed(xml, kSchema2SiteListElement));
  // Iterate over <site> elements. Notably, skip <created-by> elements.
  for (const base::Value* site_node :
       GetChildrenWithTag(xml, kSchema2SiteElement)) {
    std::string url = data_decoder::GetXmlElementAttribute(
        *site_node, kSchema2SiteUrlAttribute);
    base::TrimWhitespaceASCII(url, base::TRIM_ALL, &url);
    if (url.empty())
      continue;
    // Read all sub-elements and keep the content of the <open-in> element.
    std::string open_in;
    for (const base::Value* open_in_node :
         GetChildrenWithTag(*site_node, kSchema2SiteOpenInElement)) {
      data_decoder::GetXmlElementText(*open_in_node, &open_in);
    }
    base::TrimWhitespaceASCII(open_in, base::TRIM_ALL, &open_in);

    if (none_is_greylist) {
      if (!base::CompareCaseInsensitiveASCII(open_in, "ie11")) {
        // <open-in>IE11 means sitelist.
        result->rules.sitelist.push_back(url);
      } else if (!base::CompareCaseInsensitiveASCII(open_in, "msedge") ||
                 !base::CompareCaseInsensitiveASCII(open_in, "chrome")) {
        // <open-in>MSEdge or <open-in>Chrome means an inverted rule (i.e., open
        // in Chrome).
        result->rules.sitelist.push_back("!" + url);
      } else {
        // <open-in> absent, unrecognized, or "none" means greylist.
        result->rules.greylist.push_back(url);
      }
    } else {
      // TODO(crbug.com/40812726): Remove this else branch, and the
      // kBrowserSwitcherNoneIsGreylist flag, once we're confident this
      // doesn't break customers. This was added in M99.
      std::string prefix = (open_in.empty() ||
                            !base::CompareCaseInsensitiveASCII(open_in, "none"))
                               ? "!"
                               : "";
      result->rules.sitelist.push_back(prefix + url);
    }
  }
}

void RawXmlParsed(ParsingMode parsing_mode,
                  base::OnceCallback<void(ParsedXml)> callback,
                  data_decoder::DataDecoder::ValueOrError xml) {
  if (!xml.has_value()) {
    // Copies the string, but it should only be around 20 characters.
    std::move(callback).Run(ParsedXml({}, {}, xml.error()));
    return;
  }
  DCHECK(data_decoder::IsXmlElementOfType(
      *xml, data_decoder::mojom::XmlParser::kElementType));
  ParsedXml result;
  if (data_decoder::IsXmlElementNamed(*xml, kSchema1RulesElement)) {
    // Enterprise Mode schema v.1 has <rules> element at its top level.
    ParseIeFileVersionOne(*xml, parsing_mode, &result);
  } else if (data_decoder::IsXmlElementNamed(*xml, kSchema2SiteListElement)) {
    // Enterprise Mode schema v.2 has <site-list> element at its top level.
    ParseIeFileVersionTwo(*xml, parsing_mode, &result);
  } else {
    result.error = kInvalidRootElement;
  }
  std::move(callback).Run(std::move(result));
}

}  // namespace

ParsedXml::ParsedXml() = default;
ParsedXml::ParsedXml(ParsedXml&&) = default;
ParsedXml::ParsedXml(RawRuleSet&& rules_, std::optional<std::string>&& error_)
    : rules(std::move(rules_)), error(std::move(error_)) {}
ParsedXml::ParsedXml(std::vector<std::string>&& sitelist,
                     std::vector<std::string>&& greylist,
                     std::optional<std::string>&& error)
    : ParsedXml(RawRuleSet(std::move(sitelist), std::move(greylist)),
                std::move(error)) {}
ParsedXml::~ParsedXml() = default;

void ParseIeemXml(const std::string& xml,
                  ParsingMode parsing_mode,
                  base::OnceCallback<void(ParsedXml)> callback) {
  data_decoder::DataDecoder::ParseXmlIsolated(
      xml, data_decoder::mojom::XmlParser::WhitespaceBehavior::kIgnore,
      base::BindOnce(&RawXmlParsed, parsing_mode, std::move(callback)));
}

}  // namespace browser_switcher