File: dates.cc

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,112,112 kB
  • sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (134 lines) | stat: -rw-r--r-- 4,651 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "pdf/pdf_utils/dates.h"

#include <stdint.h>

#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/time/time.h"
#include "third_party/abseil-cpp/absl/types/optional.h"

namespace chrome_pdf {

namespace {

class DateDeserializer final {
 public:
  // `parsing` must outlive `this` because `base::StringPiece` has reference
  // semantics.
  explicit DateDeserializer(base::StringPiece parsing)
      : deserializing_(parsing) {}
  ~DateDeserializer() = default;

  // Pops the first `num_digits` characters from the string and converts them to
  // an int if possible. Popping too many characters or characters that cannot
  // be converted puts the deserializer in a stopped state.
  absl::optional<int> PopDigits(size_t num_digits) {
    if (stopped_)
      return absl::nullopt;

    // `base::StringToUint()` allows leading sign characters, so also verify
    // that the front character is a digit.
    uint32_t value;
    if (deserializing_.size() < num_digits ||
        !base::IsAsciiDigit(deserializing_.front()) ||
        !base::StringToUint(deserializing_.substr(0, num_digits), &value)) {
      stopped_ = true;
      return absl::nullopt;
    }

    // Pop front characters.
    deserializing_ = deserializing_.substr(num_digits);
    return value;
  }

  // Pops the front character if it is not a digit. Otherwise, does not change
  // the state of the deserializer and returns `absl::nullopt`.
  absl::optional<char> TryPopNonDigit() {
    if (stopped_ || deserializing_.empty())
      return absl::nullopt;

    const char front = deserializing_.front();
    if (base::IsAsciiDigit(front))
      return absl::nullopt;

    deserializing_ = deserializing_.substr(1);
    return front;
  }

  // Takes the deserializer out of a stopped state.
  void unstop() { stopped_ = false; }

 private:
  base::StringPiece deserializing_;
  bool stopped_ = false;
};

// Parses the offset info in `deserializer`, which is the time offset portion of
// the date format provided in section 7.9.4 "Dates" of the ISO 32000-1:2008
// spec. An input is expected to look like "HH'mm", such that "HH" is the hour
// and "mm" is the minute.
base::TimeDelta ParseOffset(DateDeserializer& deserializer) {
  base::TimeDelta offset;

  // UTC is assumed if no time zone information is provided.
  const absl::optional<char> sign = deserializer.TryPopNonDigit();
  if (!sign.has_value() || (sign.value() != '+' && sign.value() != '-'))
    return offset;

  offset += base::Hours(deserializer.PopDigits(2).value_or(0));

  // The spec requires that the hours offset be followed by an apostrophe, but
  // don't be strict about its presence.
  const absl::optional<char> apostrophe = deserializer.TryPopNonDigit();
  if (apostrophe.has_value() && apostrophe.value() != '\'')
    return sign.value() == '+' ? offset : -offset;

  // The minutes offset follows the hours offset. Be lenient about anything
  // following the minutes offset. One reason for the leniency is the apostrophe
  // following the minues, which is only mentioned in earlier versions of the
  // spec.
  offset += base::Minutes(deserializer.PopDigits(2).value_or(0));

  return sign.value() == '+' ? offset : -offset;
}

}  // namespace

base::Time ParsePdfDate(base::StringPiece date) {
  // The prefix "D:" is required according to the spec, but don't require it as
  // earlier versions of the spec weren't strict about it.
  if (date.substr(0, 2) == "D:")
    date = date.substr(2);

  DateDeserializer deserializer(date);

  // Year is the only required part of a valid date.
  const absl::optional<int> deserialized_year = deserializer.PopDigits(4);
  if (!deserialized_year.has_value())
    return base::Time();

  // Month and day default to 1. The rest of the parts of a date default to 0.
  base::Time::Exploded exploded = {
      .year = deserialized_year.value(),
      .month = deserializer.PopDigits(2).value_or(1),
      .day_of_month = deserializer.PopDigits(2).value_or(1),
      .hour = deserializer.PopDigits(2).value_or(0),
      .minute = deserializer.PopDigits(2).value_or(0),
      .second = deserializer.PopDigits(2).value_or(0)};

  base::Time parsed;
  if (!base::Time::FromUTCExploded(exploded, &parsed))
    return base::Time();

  // `base::Time` is in UTC, so `parsed` must be normalized if there is an
  // offset.
  deserializer.unstop();
  return parsed - ParseOffset(deserializer);
}

}  // namespace chrome_pdf