File: TextResourceDecoder.h

package info (click to toggle)
chromium-browser 57.0.2987.98-1~deb8u1
  • links: PTS, VCS
  • area: main
  • in suites: jessie
  • size: 2,637,852 kB
  • ctags: 2,544,394
  • sloc: cpp: 12,815,961; ansic: 3,676,222; python: 1,147,112; asm: 526,608; java: 523,212; xml: 286,794; perl: 92,654; sh: 86,408; objc: 73,271; makefile: 27,698; cs: 18,487; yacc: 13,031; tcl: 12,957; pascal: 4,875; ml: 4,716; lex: 3,904; sql: 3,862; ruby: 1,982; lisp: 1,508; php: 1,368; exp: 404; awk: 325; csh: 117; jsp: 39; sed: 37
file content (156 lines) | stat: -rw-r--r-- 5,153 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*
    Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
    Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
    Copyright (C) 2006, 2008 Apple Inc. All rights reserved.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301, USA.

*/

#ifndef TextResourceDecoder_h
#define TextResourceDecoder_h

#include "core/CoreExport.h"
#include "wtf/PtrUtil.h"
#include "wtf/text/TextEncoding.h"
#include <memory>

namespace blink {

class HTMLMetaCharsetParser;

class CORE_EXPORT TextResourceDecoder {
  USING_FAST_MALLOC(TextResourceDecoder);
  WTF_MAKE_NONCOPYABLE(TextResourceDecoder);

 public:
  enum EncodingSource {
    DefaultEncoding,
    AutoDetectedEncoding,
    EncodingFromContentSniffing,
    EncodingFromXMLHeader,
    EncodingFromMetaTag,
    EncodingFromCSSCharset,
    EncodingFromHTTPHeader,
    EncodingFromParentFrame
  };

  static std::unique_ptr<TextResourceDecoder> create(
      const String& mimeType,
      const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding()) {
    return WTF::wrapUnique(new TextResourceDecoder(
        mimeType, defaultEncoding, UseContentAndBOMBasedDetection, String()));
  }

  static std::unique_ptr<TextResourceDecoder> createWithAutoDetection(
      const String& mimeType,
      const WTF::TextEncoding& defaultEncoding,
      const String& url) {
    return WTF::wrapUnique(new TextResourceDecoder(mimeType, defaultEncoding,
                                                   UseAllAutoDetection, url));
  }

  // Corresponds to utf-8 decode in Encoding spec:
  // https://encoding.spec.whatwg.org/#utf-8-decode.
  static std::unique_ptr<TextResourceDecoder> createAlwaysUseUTF8ForText() {
    return WTF::wrapUnique(new TextResourceDecoder(
        "plain/text", UTF8Encoding(), AlwaysUseUTF8ForText, String()));
  }
  ~TextResourceDecoder();

  void setEncoding(const WTF::TextEncoding&, EncodingSource);
  const WTF::TextEncoding& encoding() const { return m_encoding; }
  bool encodingWasDetectedHeuristically() const {
    return m_source == AutoDetectedEncoding ||
           m_source == EncodingFromContentSniffing;
  }

  String decode(const char* data, size_t length);
  String flush();

  void setHintEncoding(const WTF::TextEncoding& encoding) {
    m_hintEncoding = encoding.name();
  }

  void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
  bool sawError() const { return m_sawError; }
  size_t checkForBOM(const char*, size_t);

 protected:
  // TextResourceDecoder does three kind of encoding detection:
  // 1. By BOM,
  // 2. By Content if |m_contentType| is not |PlainTextContext|
  //    (e.g. <meta> tag for HTML), and
  // 3. By detectTextEncoding().
  enum EncodingDetectionOption {
    // Use 1. + 2. + 3.
    UseAllAutoDetection,

    // Use 1. + 2.
    UseContentAndBOMBasedDetection,

    // Use None of them.
    // |m_contentType| must be |PlainTextContent| and
    // |m_encoding| must be UTF8Encoding.
    // This doesn't change encoding based on BOMs, but still processes
    // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
    AlwaysUseUTF8ForText
  };

  TextResourceDecoder(const String& mimeType,
                      const WTF::TextEncoding& defaultEncoding,
                      EncodingDetectionOption,
                      const String& url);

 private:
  enum ContentType {
    PlainTextContent,
    HTMLContent,
    XMLContent,
    CSSContent
  };  // PlainText only checks for BOM.
  static ContentType determineContentType(const String& mimeType);
  static const WTF::TextEncoding& defaultEncoding(
      ContentType,
      const WTF::TextEncoding& defaultEncoding);

  bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
  bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
  void checkForMetaCharset(const char*, size_t);
  bool shouldAutoDetect() const;

  ContentType m_contentType;
  WTF::TextEncoding m_encoding;
  std::unique_ptr<TextCodec> m_codec;
  EncodingSource m_source;
  const char* m_hintEncoding;
  const CString m_hintUrl;
  Vector<char> m_buffer;
  char m_hintLanguage[3];
  bool m_checkedForBOM;
  bool m_checkedForCSSCharset;
  bool m_checkedForXMLCharset;
  bool m_checkedForMetaCharset;
  bool m_useLenientXMLDecoding;  // Don't stop on XML decoding errors.
  bool m_sawError;
  EncodingDetectionOption m_encodingDetectionOption;

  std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser;
};

}  // namespace blink

#endif