1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
|
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_LIVE_CAPTION_TRANSLATION_UTIL_H_
#define COMPONENTS_LIVE_CAPTION_TRANSLATION_UTIL_H_
#include <string>
#include <unordered_map>
#include <vector>
#include "base/functional/callback.h"
#include "base/types/expected.h"
namespace captions {
using TranslateEvent = base::expected<std::string, std::string>;
using TranslateEventCallback = base::OnceCallback<void(const TranslateEvent&)>;
// Split the transcription into sentences. Spaces are included in the preceding
// sentence.
std::vector<std::string> SplitSentences(const std::string& text,
const std::string& locale);
bool ContainsTrailingSpace(const std::string& str);
std::string RemoveTrailingSpace(const std::string& str);
std::string RemovePunctuationToLower(std::string str);
std::string GetTranslationCacheKey(const std::string& source_language,
const std::string& target_language,
const std::string& transcription);
bool IsIdeographicLocale(const std::string& locale);
// Used to cache translations to avoid retranslating the same string. The key
// is the source and target language codes followed by a `|` separator
// character and the original text. The value is the translated text. This
// cache is cleared upon receiving a final recognition event. The size of this
// cache depends on the frequency of partial and final recognition events, but
// is typically under ~10 entries.
class TranslationCache {
public:
TranslationCache();
virtual ~TranslationCache();
// returns a pair: first is remaining str to translate, the next is the cached
// value so far.
std::pair<std::string, std::string> FindCachedTranslationOrRemaining(
const std::string& transcript,
const std::string& source_language,
const std::string& target_language) const;
void InsertIntoCache(const std::string& original_transcription,
const std::string& translation,
const std::string& source_language,
const std::string& target_language);
void Clear();
private:
std::unordered_map<std::string, std::string> translation_cache_;
};
} // namespace captions
#endif // COMPONENTS_LIVE_CAPTION_TRANSLATION_UTIL_H_
|