Browse Source

2024.5.16 测试整体构建

master
Administrator 1 year ago
parent
commit
b3b33b54e5
10 changed files with 30 additions and 31 deletions
  1. +1
    -1
      CMakeLists.txt
  2. +0
    -3
      decoder/asr_decoder.cc
  3. +1
    -1
      frontend/fbank.h
  4. +2
    -2
      frontend/feature_pipeline.cc
  5. +8
    -8
      kaldi/fstext/lattice-weight.h
  6. +0
    -1
      kaldi/lat/determinize-lattice-pruned.cc
  7. +14
    -13
      post_processor/processor/wetext_processor.cc
  8. +1
    -0
      post_processor/processor/wetext_token_parser.cc
  9. +2
    -1
      post_processor/utils/wetext_string.cc
  10. +1
    -1
      utils/wn_string.cc

+ 1
- 1
CMakeLists.txt

@ -33,7 +33,7 @@ include(openfst)
include_directories(
${OPENFST_INCLUDE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
# ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
${CMAKE_CURRENT_SOURCE_DIR}/kaldi
)
# Build all libraries
add_subdirectory(utils)

+ 0
- 3
decoder/asr_decoder.cc

@ -15,10 +15,7 @@
#include "decoder/asr_decoder.h"
#include <ctype.h>
#include <algorithm>
#include <limits>
#include <utility>
#include "utils/timer.h"

+ 1
- 1
frontend/fbank.h

@ -27,7 +27,7 @@
#endif
namespace wenet {
using namespace fst;
// This code is based on kaldi Fbank implementation, please see
// https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.cc

+ 2
- 2
frontend/feature_pipeline.cc

@ -18,8 +18,8 @@
#include <utility>
namespace wenet {
FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
using namespace fst;
FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config)
: config_(config),
feature_dim_(config.num_bins),
fbank_(config.num_bins, config.sample_rate, config.frame_length,

+ 8
- 8
kaldi/fstext/lattice-weight.h

@ -401,8 +401,8 @@ template
inline std::ostream& operator<<(std::ostream& strm,
const LatticeWeightTpl<FloatType>& w) {
LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
CHECK(FST_FLAGS_fst_weight_separator.size() == 1); // NOLINT
strm << FST_FLAGS_fst_weight_separator[0]; // comma by default;
CHECK(FLAGS_fst_weight_separator.size() == 1); // NOLINT
strm << FLAGS_fst_weight_separator[0]; // comma by default;
// may or may not be settable from Kaldi programs.
LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
return strm;
@ -411,9 +411,9 @@ inline std::ostream& operator<<(std::ostream& strm,
template <class FloatType>
inline std::istream& operator>>(std::istream& strm,
LatticeWeightTpl<FloatType>& w1) {
CHECK(FST_FLAGS_fst_weight_separator.size() == 1); // NOLINT
CHECK(FLAGS_fst_weight_separator.size() == 1); // NOLINT
// separator defaults to ','
return w1.ReadNoParen(strm, FST_FLAGS_fst_weight_separator[0]);
return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
}
// CompactLattice will be an acceptor (accepting the words/output-symbols),
@ -752,8 +752,8 @@ template
inline std::ostream& operator<<(
std::ostream& strm, const CompactLatticeWeightTpl<WeightType, IntType>& w) {
strm << w.Weight();
CHECK(FST_FLAGS_fst_weight_separator.size() == 1); // NOLINT
strm << FST_FLAGS_fst_weight_separator[0]; // comma by default.
CHECK(FLAGS_fst_weight_separator.size() == 1); // NOLINT
strm << FLAGS_fst_weight_separator[0]; // comma by default.
for (size_t i = 0; i < w.String().size(); i++) {
strm << w.String()[i];
if (i + 1 < w.String().size())
@ -771,8 +771,8 @@ inline std::istream& operator>>(
if (strm.fail()) {
return strm;
}
CHECK(FST_FLAGS_fst_weight_separator.size() == 1); // NOLINT
size_t pos = s.find_last_of(FST_FLAGS_fst_weight_separator); // normally ","
CHECK(FLAGS_fst_weight_separator.size() == 1); // NOLINT
size_t pos = s.find_last_of(FLAGS_fst_weight_separator); // normally ","
if (pos == std::string::npos) {
strm.clear(std::ios::badbit);
return strm;

+ 0
- 1
kaldi/lat/determinize-lattice-pruned.cc

@ -22,7 +22,6 @@
#include <climits>
#include <vector>
#include "fstext/determinize-lattice.h" // for LatticeStringRepository
#include "fstext/fstext-utils.h"
#include "lat/lattice-functions.h" // for PruneLattice
// #include "lat/minimize-lattice.h" // for minimization
// #include "lat/push-lattice.h" // for minimization

+ 14
- 13
post_processor/processor/wetext_processor.cc

@ -15,21 +15,22 @@
#include "wetext_processor.h"
#include "fst/string.h"
namespace wetext {
Processor::Processor(const std::string& tagger_path,
using namespace fst;
Processor::Processor(const std::string& tagger_path,
const std::string& verbalizer_path) {
tagger_.reset(StdVectorFst::Read(tagger_path));
verbalizer_.reset(StdVectorFst::Read(verbalizer_path));
compiler_ = std::make_shared<StringCompiler<StdArc>>(fst::StringTokenType::BYTE);
printer_ = std::make_shared<StringPrinter<StdArc>>(fst::StringTokenType::BYTE);
tagger_.reset(StdVectorFst::Read(tagger_path));
verbalizer_.reset(StdVectorFst::Read(verbalizer_path));
compiler_ = std::make_shared<StringCompiler<StdArc>>(fst::StringTokenType::BYTE);
printer_ = std::make_shared<StringPrinter<StdArc>>(fst::StringTokenType::BYTE);
if (tagger_path.find("_tn_") != tagger_path.npos) {
parse_type_ = ParseType::kTN;
} else if (tagger_path.find("_itn_") != tagger_path.npos) {
parse_type_ = ParseType::kITN;
} else {
LOG(FATAL) << "Invalid fst prefix, prefix should contain"
<< " either \"_tn_\" or \"_itn_\".";
}
if (tagger_path.find("_tn_") != tagger_path.npos) {
parse_type_ = ParseType::kTN;
} else if (tagger_path.find("_itn_") != tagger_path.npos) {
parse_type_ = ParseType::kITN;
} else {
LOG(FATAL) << "Invalid fst prefix, prefix should contain"
<< " either \"_tn_\" or \"_itn_\".";
}
}
std::string Processor::ShortestPath(const StdVectorFst& lattice) {

+ 1
- 0
post_processor/processor/wetext_token_parser.cc

@ -18,6 +18,7 @@
#include "../utils/wetext_string.h"
namespace wetext {
using namespace fst;
const char EOS[] = "<EOS>";
const std::set<std::string> UTF8_WHITESPACE = {" ", "\t", "\n", "\r",
"\x0b\x0c"};

+ 2
- 1
post_processor/utils/wetext_string.cc

@ -17,7 +17,8 @@
#include "wetext_log.h"
namespace wetext {
const char* WHITESPACE = " \n\r\t\f\v";
using namespace fst;
const char* WHITESPACE = " \n\r\t\f\v";
int UTF8CharLength(char ch) {
int num_bytes = 1;

+ 1
- 1
utils/wn_string.cc

@ -21,7 +21,7 @@
#include "wn_utils.h"
namespace wenet {
using namespace fst;
void SplitString(const std::string& str, std::vector<std::string>* strs) {
SplitStringToVector(Trim(str), " \t", true, strs);
}

Loading…
Cancel
Save