1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_
#define ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_
#include <memory>
#include <string>
#include <vector>
#include "ash/ash_export.h"
#include "base/time/time.h"
#include "base/values.h"
#include "media/mojo/mojom/speech_recognition_result.h"
namespace ash {
// The speech recognition status.
enum class ASH_EXPORT RecognitionStatus : int {
// Speech recognition was incomplete by the time the metadata
// was written.
kIncomplete = 0,
// Speech recognition was completed by the time the metadata was
// written to file.
kComplete = 1,
// Speech recognition had encountered an error.
kError = 2
};
enum class ASH_EXPORT MetadataVersionNumber : int {
kUnknown = 0,
kV1 = 1,
kV2 = 2
};
// Base class to describe a metadata item.
class MetadataItem {
public:
MetadataItem(const base::TimeDelta start_time,
const base::TimeDelta end_time,
const std::string& text);
MetadataItem(const MetadataItem&) = delete;
MetadataItem& operator=(const MetadataItem&) = delete;
virtual ~MetadataItem();
base::TimeDelta& start_time() { return start_time_; }
base::TimeDelta& end_time() { return end_time_; }
std::string& text() { return text_; }
// Return the serialized metadata item. This is used for storage.
virtual base::Value::Dict ToJson() = 0;
protected:
// The start time of the metadata item from the start of the recording
// session.
base::TimeDelta start_time_;
// The end time of the metadata item from the start of the recording session.
base::TimeDelta end_time_;
// Text data of the metadata item.
std::string text_;
};
// Class to describe a key idea.
class ASH_EXPORT ProjectorKeyIdea : public MetadataItem {
public:
ProjectorKeyIdea(const base::TimeDelta start_time,
const base::TimeDelta end_time,
const std::string& text = std::string());
ProjectorKeyIdea(const ProjectorKeyIdea&) = delete;
ProjectorKeyIdea& operator=(const ProjectorKeyIdea&) = delete;
~ProjectorKeyIdea() override;
base::Value::Dict ToJson() override;
};
// Class to describe a transcription.
class ASH_EXPORT ProjectorTranscript : public MetadataItem {
public:
ProjectorTranscript(
const base::TimeDelta start_time,
const base::TimeDelta end_time,
int group_id,
const std::string& text,
const std::vector<media::HypothesisParts>& hypothesis_parts);
ProjectorTranscript(const ProjectorTranscript&) = delete;
ProjectorTranscript& operator=(const ProjectorTranscript&) = delete;
~ProjectorTranscript() override;
base::Value::Dict ToJson() override;
std::vector<media::HypothesisParts>& hypothesis_parts() {
return hypothesis_parts_;
}
private:
const int group_id_;
std::vector<media::HypothesisParts> hypothesis_parts_;
};
// Class to describe a projector metadata of a screencast session, including
// name, transcriptions, key_ideas, etc
class ASH_EXPORT ProjectorMetadata {
public:
ProjectorMetadata();
ProjectorMetadata(const ProjectorMetadata&) = delete;
ProjectorMetadata& operator=(const ProjectorMetadata&) = delete;
~ProjectorMetadata();
// Sets the language of the transcript.
void SetCaptionLanguage(const std::string& language);
// Adds the transcript to the metadata.
void AddTranscript(std::unique_ptr<ProjectorTranscript> transcript);
// Notifies the metadata that transcription has completed.
void SetSpeechRecognitionStatus(RecognitionStatus status);
// Marks a beginning of a key idea. The timing info of the next transcript
// will be used as the timing of the key idea.
void SetMetadataVersionNumber(MetadataVersionNumber version);
void MarkKeyIdea();
// Serializes the metadata for storage.
std::string Serialize();
// Returns the number of transcripts.
size_t GetTranscriptsCount() const { return transcripts_.size(); }
private:
base::Value::Dict ToJson();
// Add sentence transcripts to the metadata.
void AddSentenceTranscripts(
std::vector<std::unique_ptr<ProjectorTranscript>> sentence_transcripts);
std::vector<std::unique_ptr<ProjectorTranscript>> transcripts_;
std::vector<std::unique_ptr<ProjectorKeyIdea>> key_ideas_;
std::string caption_language_;
// True if user mark the transcript as a key idea. It will be reset to false
// when the final recognition result is received and recorded as a key idea.
bool should_mark_key_idea_ = false;
// The speech recognition status.
RecognitionStatus speech_recognition_status_ = RecognitionStatus::kIncomplete;
MetadataVersionNumber metadata_version_number_;
};
} // namespace ash
#endif // ASH_PROJECTOR_PROJECTOR_METADATA_MODEL_H_
|