File: common.cpp

package info (click to toggle)
pytorch-text 0.14.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 11,560 kB
  • sloc: python: 14,197; cpp: 2,404; sh: 214; makefile: 20
file content (38 lines) | stat: -rw-r--r-- 825 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <torchtext/csrc/common.h>

#include <fstream>
#include <ios>
#include <limits>

namespace torchtext {
namespace impl {

int64_t divup(int64_t x, int64_t y) {
  return (x + y - 1) / y;
}

void infer_offsets(
    const std::string& file_path,
    int64_t num_lines,
    int64_t chunk_size,
    std::vector<size_t>& offsets,
    int64_t num_header_lines) {
  std::ifstream fin;
  fin.open(file_path, std::ios::in);

  while (num_header_lines > 0) {
    fin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
    num_header_lines--;
  }
  offsets.push_back(fin.tellg());
  size_t offset = 0;
  while (fin.ignore(std::numeric_limits<std::streamsize>::max(), '\n')) {
    offset++;
    if (offset % chunk_size == 0) {
      offsets.push_back(fin.tellg());
    }
  }
}

} // namespace impl
} // namespace torchtext