// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng) // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef DECODER_CTC_ENDPOINT_H_ #define DECODER_CTC_ENDPOINT_H_ #include namespace wenet { struct CtcEndpointRule { bool must_decoded_sth; int min_trailing_silence; int min_utterance_length; CtcEndpointRule(bool must_decoded_sth = true, int min_trailing_silence = 1000, int min_utterance_length = 0) : must_decoded_sth(must_decoded_sth), min_trailing_silence(min_trailing_silence), min_utterance_length(min_utterance_length) {} }; struct CtcEndpointConfig { /// We consider blank as silence for purposes of endpointing. int blank = 0; // blank id float blank_threshold = 0.8; // blank threshold to be silence /// We support three rules. We terminate decoding if ANY of these rules /// evaluates to "true". If you want to add more rules, do it by changing this /// code. If you want to disable a rule, you can set the silence-timeout for /// that rule to a very large number. /// rule1 times out after 5000 ms of silence, even if we decoded nothing. CtcEndpointRule rule1; /// rule2 times out after 1000 ms of silence after decoding something. CtcEndpointRule rule2; /// rule3 times out after the utterance is 20000 ms long, regardless of /// anything else. CtcEndpointRule rule3; CtcEndpointConfig() : rule1(false, 5000, 0), rule2(true, 1000, 0), rule3(false, 0, 20000) {} }; class CtcEndpoint { public: explicit CtcEndpoint(const CtcEndpointConfig& config); void Reset(); /// This function returns true if this set of endpointing rules thinks we /// should terminate decoding. bool IsEndpoint(const std::vector>& ctc_log_probs, bool decoded_something); void frame_shift_in_ms(int frame_shift_in_ms) { frame_shift_in_ms_ = frame_shift_in_ms; } private: CtcEndpointConfig config_; int frame_shift_in_ms_ = -1; int num_frames_decoded_ = 0; int num_frames_trailing_blank_ = 0; }; } // namespace wenet #endif // DECODER_CTC_ENDPOINT_H_