File: dates.cc

package info (click to toggle)
chromium 145.0.7632.159-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,976,224 kB
  • sloc: cpp: 36,198,469; ansic: 7,634,080; javascript: 3,564,060; python: 1,649,622; xml: 838,470; asm: 717,087; pascal: 185,708; sh: 88,786; perl: 88,718; objc: 79,984; sql: 59,811; cs: 42,452; fortran: 24,101; makefile: 21,144; tcl: 15,277; php: 14,022; yacc: 9,066; ruby: 7,553; awk: 3,720; lisp: 3,233; lex: 1,328; ada: 727; jsp: 228; sed: 36
file content (135 lines) | stat: -rw-r--r-- 4,586 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "pdf/pdf_utils/dates.h"

#include <stdint.h>

#include <optional>
#include <string_view>

#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/time/time.h"

namespace chrome_pdf {

namespace {

class DateDeserializer final {
 public:
  // `parsing` must outlive `this` because `std::string_view` has reference
  // semantics.
  explicit DateDeserializer(std::string_view parsing)
      : deserializing_(parsing) {}
  ~DateDeserializer() = default;

  // Pops the first `num_digits` characters from the string and converts them to
  // an int if possible. Popping too many characters or characters that cannot
  // be converted puts the deserializer in a stopped state.
  std::optional<int> PopDigits(size_t num_digits) {
    if (stopped_)
      return std::nullopt;

    // `base::StringToUint()` allows leading sign characters, so also verify
    // that the front character is a digit.
    uint32_t value;
    if (deserializing_.size() < num_digits ||
        !base::IsAsciiDigit(deserializing_.front()) ||
        !base::StringToUint(deserializing_.substr(0, num_digits), &value)) {
      stopped_ = true;
      return std::nullopt;
    }

    // Pop front characters.
    deserializing_ = deserializing_.substr(num_digits);
    return value;
  }

  // Pops the front character if it is not a digit. Otherwise, does not change
  // the state of the deserializer and returns `std::nullopt`.
  std::optional<char> TryPopNonDigit() {
    if (stopped_ || deserializing_.empty())
      return std::nullopt;

    const char front = deserializing_.front();
    if (base::IsAsciiDigit(front))
      return std::nullopt;

    deserializing_ = deserializing_.substr(1);
    return front;
  }

  // Takes the deserializer out of a stopped state.
  void unstop() { stopped_ = false; }

 private:
  std::string_view deserializing_;
  bool stopped_ = false;
};

// Parses the offset info in `deserializer`, which is the time offset portion of
// the date format provided in section 7.9.4 "Dates" of the ISO 32000-1:2008
// spec. An input is expected to look like "HH'mm", such that "HH" is the hour
// and "mm" is the minute.
base::TimeDelta ParseOffset(DateDeserializer& deserializer) {
  base::TimeDelta offset;

  // UTC is assumed if no time zone information is provided.
  const std::optional<char> sign = deserializer.TryPopNonDigit();
  if (!sign.has_value() || (sign.value() != '+' && sign.value() != '-'))
    return offset;

  offset += base::Hours(deserializer.PopDigits(2).value_or(0));

  // The spec requires that the hours offset be followed by an apostrophe, but
  // don't be strict about its presence.
  const std::optional<char> apostrophe = deserializer.TryPopNonDigit();
  if (apostrophe.has_value() && apostrophe.value() != '\'')
    return sign.value() == '+' ? offset : -offset;

  // The minutes offset follows the hours offset. Be lenient about anything
  // following the minutes offset. One reason for the leniency is the apostrophe
  // following the minues, which is only mentioned in earlier versions of the
  // spec.
  offset += base::Minutes(deserializer.PopDigits(2).value_or(0));

  return sign.value() == '+' ? offset : -offset;
}

}  // namespace

base::Time ParsePdfDate(std::string_view date) {
  // The prefix "D:" is required according to the spec, but don't require it as
  // earlier versions of the spec weren't strict about it.
  if (date.substr(0, 2) == "D:")
    date = date.substr(2);

  DateDeserializer deserializer(date);

  // Year is the only required part of a valid date.
  const std::optional<int> deserialized_year = deserializer.PopDigits(4);
  if (!deserialized_year.has_value())
    return base::Time();

  // Month and day default to 1. The rest of the parts of a date default to 0.
  base::Time::Exploded exploded = {
      .year = deserialized_year.value(),
      .month = deserializer.PopDigits(2).value_or(1),
      .day_of_month = deserializer.PopDigits(2).value_or(1),
      .hour = deserializer.PopDigits(2).value_or(0),
      .minute = deserializer.PopDigits(2).value_or(0),
      .second = deserializer.PopDigits(2).value_or(0)};

  base::Time parsed;
  if (!base::Time::FromUTCExploded(exploded, &parsed))
    return base::Time();

  // `base::Time` is in UTC, so `parsed` must be normalized if there is an
  // offset.
  deserializer.unstop();
  return parsed - ParseOffset(deserializer);
}

}  // namespace chrome_pdf