|
|
// Copyright (c) 2016 Personal (Binbin Zhang)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FRONTEND_WAV_H_
#define FRONTEND_WAV_H_
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "glog/logging.h"
#include <string>
namespace wenet {
struct WavHeader { char riff[4] = {'R', 'I', 'F', 'F'}; unsigned int size = 0; char wav[4] = {'W', 'A', 'V', 'E'}; char fmt[4] = {'f', 'm', 't', ' '}; unsigned int fmt_size = 16; uint16_t format = 1; uint16_t channels = 0; unsigned int sample_rate = 0; unsigned int bytes_per_second = 0; uint16_t block_size = 0; uint16_t bit = 0; char data[4] = {'d', 'a', 't', 'a'}; unsigned int data_size = 0;
WavHeader() {}
WavHeader(int num_samples, int num_channel, int sample_rate, int bits_per_sample) { data_size = num_samples * num_channel * (bits_per_sample / 8); size = sizeof(WavHeader) - 8 + data_size; channels = num_channel; this->sample_rate = sample_rate; bytes_per_second = sample_rate * num_channel * (bits_per_sample / 8); block_size = num_channel * (bits_per_sample / 8); bit = bits_per_sample; } };
class WavReader { public: WavReader() : data_(nullptr) {} explicit WavReader(const std::string& filename) { Open(filename); }
bool Open(const std::string& filename) { FILE* fp = fopen(filename.c_str(), "rb"); if (NULL == fp) { LOG(WARNING) << "Error in read " << filename; return false; }
WavHeader header; fread(&header, 1, sizeof(header), fp); if ((0 != strncmp(header.riff, "RIFF", 4)) || (0 != strncmp(header.wav, "WAVE", 4)) || (0 != strncmp(header.fmt, "fmt", 3))) { fprintf(stderr, "WaveData: expect audio format data.\n"); return false; } if (header.fmt_size < 16) { fprintf(stderr, "WaveData: expect PCM format data " "to have fmt chunk of at least size 16.\n"); fclose(fp); return false; } else if (header.fmt_size > 16) { int offset = 44 - 8 + header.fmt_size - 16; fseek(fp, offset, SEEK_SET); fread(header.data, 8, sizeof(char), fp); } // check "RIFF" "WAVE" "fmt " "data"
// Skip any sub-chunks between "fmt" and "data". Usually there will
// be a single "fact" sub chunk, but on Windows there can also be a
// "list" sub chunk.
while (0 != strncmp(header.data, "data", 4)) { // We will just ignore the data in these chunks.
fseek(fp, header.data_size, SEEK_CUR); // read next sub chunk
fread(header.data, 8, sizeof(char), fp); }
num_channel_ = header.channels; sample_rate_ = header.sample_rate; bits_per_sample_ = header.bit; int num_data = header.data_size / (bits_per_sample_ / 8); data_ = new float[num_data]; num_samples_ = num_data / num_channel_;
for (int i = 0; i < num_data; ++i) { switch (bits_per_sample_) { case 8: { char sample; fread(&sample, 1, sizeof(char), fp); data_[i] = static_cast<float>(sample); break; } case 16: { int16_t sample; fread(&sample, 1, sizeof(int16_t), fp); data_[i] = static_cast<float>(sample); break; } case 32: { int sample; fread(&sample, 1, sizeof(int), fp); data_[i] = static_cast<float>(sample); break; } default: fprintf(stderr, "unsupported quantization bits"); exit(1); } } fclose(fp); return true; }
int num_channel() const { return num_channel_; } int sample_rate() const { return sample_rate_; } int bits_per_sample() const { return bits_per_sample_; } int num_samples() const { return num_samples_; }
~WavReader() { delete[] data_; }
const float* data() const { return data_; }
private: int num_channel_; int sample_rate_; int bits_per_sample_; int num_samples_; // sample points per channel
float* data_; };
class WavWriter { public: WavWriter(const float* data, int num_samples, int num_channel, int sample_rate, int bits_per_sample) : data_(data), num_samples_(num_samples), num_channel_(num_channel), sample_rate_(sample_rate), bits_per_sample_(bits_per_sample) {}
void Write(const std::string& filename) { FILE* fp = fopen(filename.c_str(), "wb"); WavHeader header(num_samples_, num_channel_, sample_rate_, bits_per_sample_); fwrite(&header, 1, sizeof(header), fp);
for (int i = 0; i < num_samples_; ++i) { for (int j = 0; j < num_channel_; ++j) { switch (bits_per_sample_) { case 8: { char sample = static_cast<char>(data_[i * num_channel_ + j]); fwrite(&sample, 1, sizeof(sample), fp); break; } case 16: { int16_t sample = static_cast<int16_t>(data_[i * num_channel_ + j]); fwrite(&sample, 1, sizeof(sample), fp); break; } case 32: { int sample = static_cast<int>(data_[i * num_channel_ + j]); fwrite(&sample, 1, sizeof(sample), fp); break; } } } } fclose(fp); }
private: const float* data_; int num_samples_; // total float points in data_
int num_channel_; int sample_rate_; int bits_per_sample_; };
class StreamWavWriter { public: StreamWavWriter(int num_channel, int sample_rate, int bits_per_sample) : num_channel_(num_channel), sample_rate_(sample_rate), bits_per_sample_(bits_per_sample), total_num_samples_(0) {}
StreamWavWriter(const std::string& filename, int num_channel, int sample_rate, int bits_per_sample) : StreamWavWriter(num_channel, sample_rate, bits_per_sample) { Open(filename); }
void Open(const std::string& filename) { fp_ = fopen(filename.c_str(), "wb"); fseek(fp_, sizeof(WavHeader), SEEK_SET); }
void Write(const int16_t* sample_data, size_t num_samples) { fwrite(sample_data, sizeof(int16_t), num_samples, fp_); total_num_samples_ += num_samples; }
void Close() { WavHeader header(total_num_samples_, num_channel_, sample_rate_, bits_per_sample_); fseek(fp_, 0L, SEEK_SET); fwrite(&header, 1, sizeof(header), fp_); fclose(fp_); }
private: FILE* fp_; int num_channel_; int sample_rate_; int bits_per_sample_; size_t total_num_samples_; };
} // namespace wenet
#endif // FRONTEND_WAV_H_
|