File: mt21_decompressor.h

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (127 lines) | stat: -rw-r--r-- 4,886 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Decompression utility for the MT21C pixel format.
//
// Note that this file and its corresponding .cc file have some very SoC
// specific code. While we would ideally like to avoid tying code so closely
// with a specific chip, this code is in the critical path for video decoding,
// and we know that we will only ever need to run this code on the MT8173. Every
// other SoC in the MT81XX line support a pixel format called MM21, which we
// have generic support for in libyuv.
//
// We may some day decide to try using MT21C on other chips in the MT81XX line,
// but we will need to change significant sections of this code to make that
// viable. Our assumptions about the relative speed the big and little cores,
// the number of cores, the CPU IDs of the cores, the timings of the SIMD
// instructions, the availability of ARM64, etc, will all be incorrect.

#ifndef MEDIA_GPU_V4L2_MT21_MT21_DECOMPRESSOR_H_
#define MEDIA_GPU_V4L2_MT21_MT21_DECOMPRESSOR_H_

#include "build/build_config.h"

#if !defined(ARCH_CPU_ARM_FAMILY)
#error "MT21Decompressor is only intended to run on MT8173 (ARM)"
#endif

#if !(defined(COMPILER_GCC) || defined(__clang__))
#error "MT21Decompressor is only intended to be built with GCC or Clang"
#endif

#include <stdint.h>

#include <atomic>
#include <memory>
#include <thread>
#include <vector>

#include "base/memory/raw_ptr.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_refptr.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
#include "ui/gfx/geometry/size.h"

namespace media {

struct GolombRiceTableEntry;

struct MT21DecompressionJob : public base::RefCounted<MT21DecompressionJob> {
  REQUIRE_ADOPTION_FOR_REFCOUNTED_TYPE();

  MT21DecompressionJob(const uint8_t* src,
                       const uint8_t* footer,
                       size_t offset,
                       uint8_t* dest,
                       size_t width,
                       size_t height,
                       bool is_chroma);
  const uint8_t* src;
  const uint8_t* footer;
  size_t offset;
  RAW_PTR_EXCLUSION uint8_t* dest;
  size_t width;
  size_t height;
  bool is_chroma;
  base::WaitableEvent wakeup_event;
  base::WaitableEvent done_event;

 private:
  friend class base::RefCounted<MT21DecompressionJob>;
  ~MT21DecompressionJob() = default;
};

// We considered making this an ImageProcessorBackend, but it turns out we need
// access to the raw V4L2 buffer. MT21C planes have a "secret" footer containing
// metadata necessary for decompression appended to the beginning of the last
// page in the buffer. This extra data is totally unknown to Chrome abstractions
// like VideoFrame, which just assume a plane's size is determined by stride and
// height.
class MT21Decompressor {
 public:
  MT21Decompressor(gfx::Size resolution);
  ~MT21Decompressor();

  void MT21ToNV12(const uint8_t* src_y,
                  const uint8_t* src_uv,
                  const size_t y_buf_size,
                  const size_t uv_buf_size,
                  uint8_t* dest_y,
                  uint8_t* dest_uv);

 private:
  // We divide the frame horizontally 4 times and distribute the job among
  // the 4 CPU cores in the MT8173. Two of these cores are little cores, so we
  // want to divide the task unevenly and make sure the smaller 2 tasks end up
  // scheduled on the smaller cores. In order to accomplish this, we circumvent
  // Chrome's threading system entirely and use raw operating system threads, so
  // we can use sched_setaffinity().
  //
  // One alternative that was considered was breaking the decompression up into
  // a bunch of little atomic tasks and using a threadpool, and just letting
  // the OS scheduler figure out the division of labor. This approach has the
  // significant drawback however of not only introducing more overhead, but
  // more importantly, having potentially very poor memory locality.
  //
  // Note that we also keep threads alive and waiting between runs of the
  // decompression routine. Experimental evidence has indicated that the
  // overhead of start and join syscalls substantially lengthen decompression
  // times, so we just use userspace semaphores for synchronization instead.
  std::atomic_bool should_shutdown_ = false;
  std::vector<std::thread> big_core_threads_;
  std::vector<scoped_refptr<MT21DecompressionJob>> big_core_jobs_;
  raw_ptr<uint8_t> big_core_pivot_;
  std::vector<std::thread> little_core_threads_;
  std::vector<scoped_refptr<MT21DecompressionJob>> little_core_jobs_;
  raw_ptr<uint8_t> little_core_pivot_;

  gfx::Size aligned_resolution_;

  raw_ptr<GolombRiceTableEntry> symbol_cache_;
};

}  // namespace media

#endif  // MEDIA_GPU_V4L2_MT21_MT21_DECOMPRESSOR_H_