xiaoke
/
libtorch-runtime


								// Copyright (c) 2021 Ximalaya Speech Team (Xiang Lyu)

								//

								// Licensed under the Apache License, Version 2.0 (the "License");

								// you may not use this file except in compliance with the License.

								// You may obtain a copy of the License at

								//

								//   http://www.apache.org/licenses/LICENSE-2.0

								//

								// Unless required by applicable law or agreed to in writing, software

								// distributed under the License is distributed on an "AS IS" BASIS,

								// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

								// See the License for the specific language governing permissions and

								// limitations under the License.


								#include "grpc/grpc_server.h"


								namespace wenet {


								using grpc::ServerReaderWriter;

								using wenet::Request;

								using wenet::Response;


								GrpcConnectionHandler::GrpcConnectionHandler(

								    ServerReaderWriter<Response, Request>* stream,

								    std::shared_ptr<Request> request, std::shared_ptr<Response> response,

								    std::shared_ptr<FeaturePipelineConfig> feature_config,

								    std::shared_ptr<DecodeOptions> decode_config,

								    std::shared_ptr<DecodeResource> decode_resource)

								    : stream_(std::move(stream)),

								      request_(std::move(request)),

								      response_(std::move(response)),

								      feature_config_(std::move(feature_config)),

								      decode_config_(std::move(decode_config)),

								      decode_resource_(std::move(decode_resource)) {}


								void GrpcConnectionHandler::OnSpeechStart() {

								  LOG(INFO) << "Received speech start signal, start reading speech";

								  got_start_tag_ = true;

								  response_->set_status(Response::ok);

								  response_->set_type(Response::server_ready);

								  stream_->Write(*response_);

								  feature_pipeline_ = std::make_shared<FeaturePipeline>(*feature_config_);

								  decoder_ = std::make_shared<AsrDecoder>(feature_pipeline_, decode_resource_,

								                                          *decode_config_);

								  // Start decoder thread

								  decode_thread_ = std::make_shared<std::thread>(

								      &GrpcConnectionHandler::DecodeThreadFunc, this);

								}


								void GrpcConnectionHandler::OnSpeechEnd() {

								  LOG(INFO) << "Received speech end signal";

								  CHECK(feature_pipeline_ != nullptr);

								  feature_pipeline_->set_input_finished();

								  got_end_tag_ = true;

								}


								void GrpcConnectionHandler::OnPartialResult() {

								  LOG(INFO) << "Partial result";

								  response_->set_status(Response::ok);

								  response_->set_type(Response::partial_result);

								  stream_->Write(*response_);

								}


								void GrpcConnectionHandler::OnFinalResult() {

								  LOG(INFO) << "Final result";

								  response_->set_status(Response::ok);

								  response_->set_type(Response::final_result);

								  stream_->Write(*response_);

								}


								void GrpcConnectionHandler::OnFinish() {

								  // Send finish tag

								  response_->set_status(Response::ok);

								  response_->set_type(Response::speech_end);

								  stream_->Write(*response_);

								}


								void GrpcConnectionHandler::OnSpeechData() {

								  // Read binary PCM data

								  const int16_t* pcm_data =

								      reinterpret_cast<const int16_t*>(request_->audio_data().c_str());

								  int num_samples = request_->audio_data().length() / sizeof(int16_t);

								  VLOG(2) << "Received " << num_samples << " samples";

								  CHECK(feature_pipeline_ != nullptr);

								  CHECK(decoder_ != nullptr);

								  feature_pipeline_->AcceptWaveform(pcm_data, num_samples);

								}


								void GrpcConnectionHandler::SerializeResult(bool finish) {

								  for (const DecodeResult& path : decoder_->result()) {

								    Response_OneBest* one_best_ = response_->add_nbest();

								    one_best_->set_sentence(path.sentence);

								    if (finish) {

								      for (const WordPiece& word_piece : path.word_pieces) {

								        Response_OnePiece* one_piece_ = one_best_->add_wordpieces();

								        one_piece_->set_word(word_piece.word);

								        one_piece_->set_start(word_piece.start);

								        one_piece_->set_end(word_piece.end);

								      }

								    }

								    if (response_->nbest_size() == nbest_) {

								      break;

								    }

								  }

								  return;

								}


								void GrpcConnectionHandler::DecodeThreadFunc() {

								  while (true) {

								    DecodeState state = decoder_->Decode();

								    response_->clear_status();

								    response_->clear_type();

								    response_->clear_nbest();

								    if (state == DecodeState::kEndFeats) {

								      decoder_->Rescoring();

								      SerializeResult(true);

								      OnFinalResult();

								      OnFinish();

								      stop_recognition_ = true;

								      break;

								    } else if (state == DecodeState::kEndpoint) {

								      decoder_->Rescoring();

								      SerializeResult(true);

								      OnFinalResult();

								      // If it's not continuous decoding, continue to do next recognition

								      // otherwise stop the recognition

								      if (continuous_decoding_) {

								        decoder_->ResetContinuousDecoding();

								      } else {

								        OnFinish();

								        stop_recognition_ = true;

								        break;

								      }

								    } else {

								      if (decoder_->DecodedSomething()) {

								        SerializeResult(false);

								        OnPartialResult();

								      }

								    }

								  }

								}


								void GrpcConnectionHandler::operator()() {

								  try {

								    while (stream_->Read(request_.get())) {

								      if (!got_start_tag_) {

								        nbest_ = request_->decode_config().nbest_config();

								        continuous_decoding_ =

								            request_->decode_config().continuous_decoding_config();

								        OnSpeechStart();

								      } else {

								        OnSpeechData();

								      }

								    }

								    OnSpeechEnd();

								    LOG(INFO) << "Read all pcm data, wait for decoding thread";

								    if (decode_thread_ != nullptr) {

								      decode_thread_->join();

								    }

								  } catch (std::exception const& e) {

								    LOG(ERROR) << e.what();

								  }

								}


								Status GrpcServer::Recognize(ServerContext* context,

								                             ServerReaderWriter<Response, Request>* stream) {

								  LOG(INFO) << "Get Recognize request" << std::endl;

								  auto request = std::make_shared<Request>();

								  auto response = std::make_shared<Response>();

								  GrpcConnectionHandler handler(stream, request, response, feature_config_,

								                                decode_config_, decode_resource_);

								  std::thread t(std::move(handler));

								  t.join();

								  return Status::OK;

								}

								}  // namespace wenet