// Copyright 2005-2024 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the 'License');
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an 'AS IS' BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// See www.openfst.org for extensive documentation on this weighted
|
|
// finite-state transducer library.
|
|
//
|
|
// Allocators for contiguous arrays of arcs.
|
|
|
|
#ifndef FST_ARC_ARENA_H_
|
|
#define FST_ARC_ARENA_H_
|
|
|
|
#include <algorithm>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <deque>
|
|
#include <list>
|
|
#include <memory>
|
|
#include <utility>
|
|
|
|
#include <fst/fst.h>
|
|
#include <fst/memory.h>
|
|
#include <unordered_map>
|
|
|
|
namespace fst {
|
|
|
|
// ArcArena is used for fast allocation of contiguous arrays of arcs.
|
|
//
|
|
// To create an arc array:
|
|
// for each state:
|
|
// for each arc:
|
|
// arena.PushArc();
|
|
// // Commits these arcs and returns pointer to them.
|
|
// Arc *arcs = arena.GetArcs();
|
|
//
|
|
// OR
|
|
//
|
|
// arena.DropArcs(); // Throws away current arcs, reuse the space.
|
|
//
|
|
// The arcs returned are guaranteed to be contiguous and the pointer returned
|
|
// will never be invalidated until the arena is cleared for reuse.
|
|
//
|
|
// The contents of the arena can be released with a call to arena.Clear() after
|
|
// which the arena will restart with an initial allocation capable of holding at
|
|
// least all of the arcs requested in the last usage before Clear() making
|
|
// subsequent uses of the Arena more efficient.
|
|
//
|
|
// The max_retained_size option can limit the amount of arc space requested on
|
|
// Clear() to avoid excess growth from intermittent high usage.
|
|
template <typename Arc>
|
|
class ArcArena {
|
|
public:
|
|
explicit ArcArena(size_t block_size = 256, size_t max_retained_size = 1e6)
|
|
: block_size_(block_size), max_retained_size_(max_retained_size) {
|
|
blocks_.emplace_back(MakeSharedBlock(block_size_));
|
|
first_block_size_ = block_size_;
|
|
total_size_ = block_size_;
|
|
arcs_ = blocks_.back().get();
|
|
end_ = arcs_ + block_size_;
|
|
next_ = arcs_;
|
|
}
|
|
|
|
ArcArena(const ArcArena ©)
|
|
: arcs_(copy.arcs_),
|
|
next_(copy.next_),
|
|
end_(copy.end_),
|
|
block_size_(copy.block_size_),
|
|
first_block_size_(copy.first_block_size_),
|
|
total_size_(copy.total_size_),
|
|
max_retained_size_(copy.max_retained_size_),
|
|
blocks_(copy.blocks_) {
|
|
NewBlock(block_size_);
|
|
}
|
|
|
|
void ReserveArcs(size_t n) {
|
|
if (next_ + n < end_) return;
|
|
NewBlock(n);
|
|
}
|
|
|
|
void PushArc(const Arc &arc) {
|
|
if (next_ == end_) {
|
|
size_t length = next_ - arcs_;
|
|
NewBlock(length * 2);
|
|
}
|
|
*next_ = arc;
|
|
++next_;
|
|
}
|
|
|
|
const Arc *GetArcs() {
|
|
const auto *arcs = arcs_;
|
|
arcs_ = next_;
|
|
return arcs;
|
|
}
|
|
|
|
void DropArcs() { next_ = arcs_; }
|
|
|
|
size_t Size() { return total_size_; }
|
|
|
|
void Clear() {
|
|
blocks_.resize(1);
|
|
if (total_size_ > first_block_size_) {
|
|
first_block_size_ = std::min(max_retained_size_, total_size_);
|
|
blocks_.back() = MakeSharedBlock(first_block_size_);
|
|
}
|
|
total_size_ = first_block_size_;
|
|
arcs_ = blocks_.back().get();
|
|
end_ = arcs_ + first_block_size_;
|
|
next_ = arcs_;
|
|
}
|
|
|
|
private:
|
|
// Allocates a new block with capacity of at least n or block_size,
|
|
// copying incomplete arc sequence from old block to new block.
|
|
void NewBlock(size_t n) {
|
|
const auto length = next_ - arcs_;
|
|
const auto new_block_size = std::max(n, block_size_);
|
|
total_size_ += new_block_size;
|
|
blocks_.emplace_back(MakeSharedBlock(new_block_size));
|
|
std::copy(arcs_, next_, blocks_.back().get());
|
|
arcs_ = blocks_.back().get();
|
|
next_ = arcs_ + length;
|
|
end_ = arcs_ + new_block_size;
|
|
}
|
|
|
|
std::shared_ptr<Arc[]> MakeSharedBlock(size_t size) {
|
|
return std::shared_ptr<Arc[]>(new Arc[size]);
|
|
}
|
|
|
|
Arc *arcs_;
|
|
Arc *next_;
|
|
const Arc *end_;
|
|
size_t block_size_;
|
|
size_t first_block_size_;
|
|
size_t total_size_;
|
|
size_t max_retained_size_;
|
|
std::list<std::shared_ptr<Arc[]>> blocks_;
|
|
};
|
|
|
|
// ArcArenaStateStore uses a resusable ArcArena to store arc arrays and does not
|
|
// require that the Expander call ReserveArcs first.
|
|
//
|
|
// TODO(tombagby): Make cache type configurable.
|
|
// TODO(tombagby): Provide ThreadLocal/Concurrent configuration.
|
|
template <class A>
|
|
class ArcArenaStateStore {
|
|
public:
|
|
using Arc = A;
|
|
using Weight = typename Arc::Weight;
|
|
using StateId = typename Arc::StateId;
|
|
|
|
class State {
|
|
public:
|
|
Weight Final() const { return final_weight_; }
|
|
|
|
size_t NumInputEpsilons() const { return niepsilons_; }
|
|
|
|
size_t NumOutputEpsilons() const { return noepsilons_; }
|
|
|
|
size_t NumArcs() const { return narcs_; }
|
|
|
|
const Arc &GetArc(size_t n) const { return arcs_[n]; }
|
|
|
|
const Arc *Arcs() const { return arcs_; }
|
|
|
|
int *MutableRefCount() const { return nullptr; }
|
|
|
|
private:
|
|
State(Weight final_weight, int32_t niepsilons, int32_t noepsilons,
|
|
int32_t narcs, const Arc *arcs)
|
|
: final_weight_(std::move(final_weight)),
|
|
niepsilons_(niepsilons),
|
|
noepsilons_(noepsilons),
|
|
narcs_(narcs),
|
|
arcs_(arcs) {}
|
|
|
|
Weight final_weight_;
|
|
size_t niepsilons_;
|
|
size_t noepsilons_;
|
|
size_t narcs_;
|
|
const Arc *arcs_;
|
|
|
|
friend class ArcArenaStateStore<Arc>;
|
|
};
|
|
|
|
template <class Expander>
|
|
State *FindOrExpand(Expander &expander, StateId state_id) {
|
|
const auto &[it, success] = cache_.emplace(state_id, nullptr);
|
|
if (!success) return it->second;
|
|
// Needs a new state.
|
|
StateBuilder builder(&arena_);
|
|
expander.Expand(state_id, &builder);
|
|
const auto arcs = arena_.GetArcs();
|
|
size_t narcs = builder.narcs_;
|
|
size_t niepsilons = 0;
|
|
size_t noepsilons = 0;
|
|
for (size_t i = 0; i < narcs; ++i) {
|
|
if (arcs[i].ilabel == 0) ++niepsilons;
|
|
if (arcs[i].olabel == 0) ++noepsilons;
|
|
}
|
|
states_.emplace_back(
|
|
State(builder.final_weight_, niepsilons, noepsilons, narcs, arcs));
|
|
// Places it in the cache.
|
|
auto state = &states_.back();
|
|
it->second = state;
|
|
return state;
|
|
}
|
|
|
|
State *Find(StateId state_id) const {
|
|
auto it = cache_.find(state_id);
|
|
return (it == cache_.end()) ? nullptr : it->second;
|
|
}
|
|
|
|
private:
|
|
class StateBuilder {
|
|
public:
|
|
explicit StateBuilder(ArcArena<Arc> *arena)
|
|
: arena_(arena), final_weight_(Weight::Zero()), narcs_(0) {}
|
|
|
|
void SetFinal(Weight weight) { final_weight_ = std::move(weight); }
|
|
|
|
void ReserveArcs(size_t n) { arena_->ReserveArcs(n); }
|
|
|
|
void AddArc(const Arc &arc) {
|
|
++narcs_;
|
|
arena_->PushArc(arc);
|
|
}
|
|
|
|
private:
|
|
friend class ArcArenaStateStore<Arc>;
|
|
|
|
ArcArena<Arc> *arena_;
|
|
Weight final_weight_;
|
|
size_t narcs_;
|
|
};
|
|
|
|
std::unordered_map<StateId, State *> cache_;
|
|
std::deque<State> states_;
|
|
ArcArena<Arc> arena_;
|
|
};
|
|
|
|
} // namespace fst
|
|
|
|
#endif // FST_ARC_ARENA_H_
|