|
|
// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DECODER_CTC_ENDPOINT_H_
#define DECODER_CTC_ENDPOINT_H_
#include <vector>
namespace wenet {
struct CtcEndpointRule { bool must_decoded_sth; int min_trailing_silence; int min_utterance_length;
CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000, int min_utterance_length = 0) : must_decoded_sth(must_decoded_sth), min_trailing_silence(min_trailing_silence), min_utterance_length(min_utterance_length) {} };
struct CtcEndpointConfig { /// We consider blank as silence for purposes of endpointing.
int blank = 0; // blank id
float blank_threshold = 0.8; // blank threshold to be silence
/// We support three rules. We terminate decoding if ANY of these rules
/// evaluates to "true". If you want to add more rules, do it by changing this
/// code. If you want to disable a rule, you can set the silence-timeout for
/// that rule to a very large number.
/// rule1 times out after 5000 ms of silence, even if we decoded nothing.
CtcEndpointRule rule1; /// rule2 times out after 1000 ms of silence after decoding something.
CtcEndpointRule rule2; /// rule3 times out after the utterance is 20000 ms long, regardless of
/// anything else.
CtcEndpointRule rule3;
CtcEndpointConfig() : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {} };
class CtcEndpoint { public: explicit CtcEndpoint(const CtcEndpointConfig& config);
void Reset(); /// This function returns true if this set of endpointing rules thinks we
/// should terminate decoding.
bool IsEndpoint(const std::vector<std::vector<float>>& ctc_log_probs, bool decoded_something);
void frame_shift_in_ms(int frame_shift_in_ms) { frame_shift_in_ms_ = frame_shift_in_ms; }
private: CtcEndpointConfig config_; int frame_shift_in_ms_ = -1; int num_frames_decoded_ = 0; int num_frames_trailing_blank_ = 0; };
} // namespace wenet
#endif // DECODER_CTC_ENDPOINT_H_
|