1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "DecoderData.h"
#include "Adts.h"
#include "AnnexB.h"
#include "BufferReader.h"
#include "MP4Metadata.h"
#include "VideoUtils.h"
#include "mozilla/EndianUtils.h"
#include "mozilla/Logging.h"
#include "mozilla/glean/DomMediaMp4Metrics.h"
#include "mp4parse.h"
#define LOG(...) \
MOZ_LOG(gMP4MetadataLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
using mozilla::media::TimeUnit;
namespace mozilla {
mozilla::Result<mozilla::Ok, nsresult> CryptoFile::DoUpdate(
const uint8_t* aData, size_t aLength) {
BufferReader reader(aData, aLength);
while (reader.Remaining()) {
PsshInfo psshInfo;
if (!reader.ReadArray(psshInfo.uuid, 16)) {
return mozilla::Err(NS_ERROR_FAILURE);
}
if (!reader.CanReadType<uint32_t>()) {
return mozilla::Err(NS_ERROR_FAILURE);
}
auto length = reader.ReadType<uint32_t>();
if (!reader.ReadArray(psshInfo.data, length)) {
return mozilla::Err(NS_ERROR_FAILURE);
}
pssh.AppendElement(std::move(psshInfo));
}
return mozilla::Ok();
}
static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig,
const Mp4parseSinfInfo& aSinf) {
if (aSinf.is_encrypted != 0) {
if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) {
aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc;
} else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) {
aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs;
} else {
// Unsupported encryption type;
return MediaResult(
NS_ERROR_DOM_MEDIA_METADATA_ERR,
RESULT_DETAIL(
"Unsupported encryption scheme encountered aSinf.scheme_type=%d",
static_cast<int>(aSinf.scheme_type)));
}
aConfig.mCrypto.mIVSize = aSinf.iv_size;
aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length);
aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block;
aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block;
aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data,
aSinf.constant_iv.length);
}
return NS_OK;
}
// Verify various information shared by Mp4ParseTrackAudioInfo and
// Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an
// appropriate MediaResult indicating if the info is valid or not.
// This verifies:
// - That we have a sample_info_count > 0 (valid tracks should have at least one
// sample description entry)
// - That only a single codec is used across all sample infos, as we don't
// handle multiple.
// - If more than one sample information structures contain crypto info. This
// case is not fatal (we don't return an error), but does record telemetry
// to help judge if we need more handling in gecko for multiple crypto.
//
// Telemetry is also recorded on the above. As of writing, the
// telemetry is recorded to give us early warning if MP4s exist that we're not
// handling. Note, if adding new checks and telemetry to this function,
// telemetry should be recorded before returning to ensure it is gathered.
template <typename Mp4ParseTrackAudioOrVideoInfo>
static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry(
Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) {
glean::media_mp4_parse::num_sample_description_entries.AccumulateSingleSample(
audioOrVideoInfo->sample_info_count);
bool hasMultipleCodecs = false;
uint32_t cryptoCount = 0;
Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type;
for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) {
if (audioOrVideoInfo->sample_info[0].codec_type != codecType) {
hasMultipleCodecs = true;
}
// Update our encryption info if any is present on the sample info.
if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) {
cryptoCount += 1;
}
}
glean::media_mp4_parse::sample_description_entries_have_multiple_codecs
.EnumGet(static_cast<glean::media_mp4_parse::
SampleDescriptionEntriesHaveMultipleCodecsLabel>(
hasMultipleCodecs))
.Add();
// Accumulate if we have multiple (2 or more) crypto entries.
// TODO(1715283): rework this to count number of crypto entries + gather
// richer data.
glean::media_mp4_parse::sample_description_entries_have_multiple_crypto
.EnumGet(static_cast<glean::media_mp4_parse::
SampleDescriptionEntriesHaveMultipleCryptoLabel>(
cryptoCount >= 2))
.Add();
if (audioOrVideoInfo->sample_info_count == 0) {
return MediaResult(
NS_ERROR_DOM_MEDIA_METADATA_ERR,
RESULT_DETAIL("Got 0 sample info while verifying track."));
}
if (hasMultipleCodecs) {
// Different codecs in a single track. We don't handle this.
return MediaResult(
NS_ERROR_DOM_MEDIA_METADATA_ERR,
RESULT_DETAIL("Multiple codecs encountered while verifying track."));
}
return NS_OK;
}
MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* aTrack,
const Mp4parseTrackAudioInfo* aAudio,
const IndiceWrapper* aIndices) {
auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(aAudio);
NS_ENSURE_SUCCESS(rv, rv);
Mp4parseCodec codecType = aAudio->sample_info[0].codec_type;
for (uint32_t i = 0; i < aAudio->sample_info_count; i++) {
if (aAudio->sample_info[i].protected_data.is_encrypted) {
auto rv = UpdateTrackProtectedInfo(*this,
aAudio->sample_info[i].protected_data);
NS_ENSURE_SUCCESS(rv, rv);
break;
}
}
// We assume that the members of the first sample info are representative of
// the entire track. This code will need to be updated should this assumption
// ever not hold. E.g. if we need to handle different codecs in a single
// track, or if we have different numbers or channels in a single track.
Mp4parseByteData mp4ParseSampleCodecSpecific =
aAudio->sample_info[0].codec_specific_config;
Mp4parseByteData extraData = aAudio->sample_info[0].extra_data;
MOZ_ASSERT(mCodecSpecificConfig.is<NoCodecSpecificData>(),
"Should have no codec specific data yet");
if (codecType == MP4PARSE_CODEC_OPUS) {
mMimeType = "audio/opus"_ns;
OpusCodecSpecificData opusCodecSpecificData{};
// The Opus decoder expects the container's codec delay or
// pre-skip value, in microseconds, as a 64-bit int at the
// start of the codec-specific config blob.
if (mp4ParseSampleCodecSpecific.data &&
mp4ParseSampleCodecSpecific.length >= 12) {
uint16_t preskip = mozilla::LittleEndian::readUint16(
mp4ParseSampleCodecSpecific.data + 10);
opusCodecSpecificData.mContainerCodecDelayFrames = preskip;
LOG("Opus stream in MP4 container, %" PRId64
" microseconds of encoder delay (%" PRIu16 ").",
opusCodecSpecificData.mContainerCodecDelayFrames, preskip);
} else {
// This file will error later as it will be rejected by the opus decoder.
opusCodecSpecificData.mContainerCodecDelayFrames = 0;
}
opusCodecSpecificData.mHeadersBinaryBlob->AppendElements(
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
mCodecSpecificConfig =
AudioCodecSpecificVariant{std::move(opusCodecSpecificData)};
} else if (codecType == MP4PARSE_CODEC_AAC ||
codecType == MP4PARSE_CODEC_XHEAAC) {
mMimeType = "audio/mp4a-latm"_ns;
int64_t codecDelayUS = aTrack->media_time;
double USECS_PER_S = 1e6;
// We can't use mozilla::UsecsToFrames here because we need to round, and it
// floors.
uint32_t encoderDelayFrameCount = 0;
if (codecDelayUS > 0) {
encoderDelayFrameCount = static_cast<uint32_t>(
std::lround(static_cast<double>(codecDelayUS) *
aAudio->sample_info->sample_rate / USECS_PER_S));
LOG("AAC stream in MP4 container, %" PRIu32 " frames of encoder delay.",
encoderDelayFrameCount);
}
uint64_t mediaFrameCount = 0;
// Pass the padding number, in frames, to the AAC decoder as well.
if (aIndices) {
MP4SampleIndex::Indice firstIndice = {0};
MP4SampleIndex::Indice lastIndice = {0};
bool rv = aIndices->GetIndice(0, firstIndice);
rv |= aIndices->GetIndice(aIndices->Length() - 1, lastIndice);
if (rv) {
if (firstIndice.start_composition > lastIndice.end_composition) {
return MediaResult(
NS_ERROR_DOM_MEDIA_METADATA_ERR,
RESULT_DETAIL("Inconsistent start and end time in index"));
}
// The `end_composition` member of the very last index member is the
// duration of the media in microseconds, excluding decoder delay and
// padding. Convert to frames and give to the decoder so that trimming
// can be done properly.
mediaFrameCount =
lastIndice.end_composition - firstIndice.start_composition;
LOG("AAC stream in MP4 container, total media duration is %" PRIu64
" frames",
mediaFrameCount);
} else {
LOG("AAC stream in MP4 container, couldn't determine total media time");
}
}
AacCodecSpecificData aacCodecSpecificData{};
aacCodecSpecificData.mEncoderDelayFrames = encoderDelayFrameCount;
aacCodecSpecificData.mMediaFrameCount = mediaFrameCount;
// codec specific data is used to store the DecoderConfigDescriptor.
aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements(
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
// extra data stores the ES_Descriptor.
aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements(
extraData.data, extraData.length);
mCodecSpecificConfig =
AudioCodecSpecificVariant{std::move(aacCodecSpecificData)};
} else if (codecType == MP4PARSE_CODEC_FLAC) {
MOZ_ASSERT(extraData.length == 0,
"FLAC doesn't expect extra data so doesn't handle it!");
mMimeType = "audio/flac"_ns;
FlacCodecSpecificData flacCodecSpecificData{};
flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements(
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
mCodecSpecificConfig =
AudioCodecSpecificVariant{std::move(flacCodecSpecificData)};
} else if (codecType == MP4PARSE_CODEC_MP3) {
// mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4
// specific box, which the rust parser recognizes). However, we don't
// handle any such data here.
mMimeType = "audio/mpeg"_ns;
// TODO(bug 1705812): parse the encoder delay values from the mp4.
mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}};
}
mRate = aAudio->sample_info[0].sample_rate;
mChannels = aAudio->sample_info[0].channels;
mBitDepth = aAudio->sample_info[0].bit_depth;
mExtendedProfile =
AssertedCast<int8_t>(aAudio->sample_info[0].extended_profile);
if (aTrack->duration > TimeUnit::MaxTicks()) {
mDuration = TimeUnit::FromInfinity();
} else {
mDuration =
TimeUnit(AssertedCast<int64_t>(aTrack->duration), aTrack->time_scale);
}
mMediaTime = TimeUnit(aTrack->media_time, aTrack->time_scale);
mTrackId = aTrack->track_id;
// In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT.
if (aAudio->sample_info[0].profile <= 4) {
mProfile = AssertedCast<int8_t>(aAudio->sample_info[0].profile);
}
if (mCodecSpecificConfig.is<NoCodecSpecificData>()) {
// Handle codecs that are not explicitly handled above.
MOZ_ASSERT(
extraData.length == 0,
"Codecs that use extra data should be explicitly handled already");
AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob;
// No codec specific metadata set, use the generic form.
codecSpecificBinaryBlob.mBinaryBlob->AppendElements(
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
mCodecSpecificConfig =
AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)};
}
return NS_OK;
}
bool MP4AudioInfo::IsValid() const {
return mChannels > 0 && mRate > 0 &&
// Accept any mime type here, but if it's aac, validate the profile.
(!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 ||
mExtendedProfile > 0);
}
MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track,
const Mp4parseTrackVideoInfo* video) {
auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video);
NS_ENSURE_SUCCESS(rv, rv);
Mp4parseCodec codecType = video->sample_info[0].codec_type;
for (uint32_t i = 0; i < video->sample_info_count; i++) {
if (video->sample_info[i].protected_data.is_encrypted) {
auto rv =
UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data);
NS_ENSURE_SUCCESS(rv, rv);
break;
}
}
// We assume that the members of the first sample info are representative of
// the entire track. This code will need to be updated should this assumption
// ever not hold. E.g. if we need to handle different codecs in a single
// track, or if we have different numbers or channels in a single track.
if (codecType == MP4PARSE_CODEC_AVC) {
mMimeType = "video/avc"_ns;
} else if (codecType == MP4PARSE_CODEC_VP9) {
mMimeType = "video/vp9"_ns;
} else if (codecType == MP4PARSE_CODEC_AV1) {
mMimeType = "video/av1"_ns;
} else if (codecType == MP4PARSE_CODEC_MP4V) {
mMimeType = "video/mp4v-es"_ns;
} else if (codecType == MP4PARSE_CODEC_HEVC) {
mMimeType = "video/hevc"_ns;
}
mTrackId = track->track_id;
if (track->duration > TimeUnit::MaxTicks()) {
mDuration = TimeUnit::FromInfinity();
} else {
mDuration =
TimeUnit(AssertedCast<int64_t>(track->duration), track->time_scale);
}
mMediaTime = TimeUnit(track->media_time, track->time_scale);
mDisplay.width = AssertedCast<int32_t>(video->display_width);
mDisplay.height = AssertedCast<int32_t>(video->display_height);
mImage.width = video->sample_info[0].image_width;
mImage.height = video->sample_info[0].image_height;
mRotation = ToSupportedRotation(video->rotation);
Mp4parseByteData extraData = video->sample_info[0].extra_data;
// If length is 0 we append nothing
mExtraData->AppendElements(extraData.data, extraData.length);
return NS_OK;
}
bool MP4VideoInfo::IsValid() const {
return (mDisplay.width > 0 && mDisplay.height > 0) ||
(mImage.width > 0 && mImage.height > 0);
}
} // namespace mozilla
#undef LOG
|