22#ifndef LIBSEMIGROUPS_DETAIL_AHO_CORASICK_IMPL_HPP_
23#define LIBSEMIGROUPS_DETAIL_AHO_CORASICK_IMPL_HPP_
30#include <unordered_set>
33#include "libsemigroups/constants.hpp"
34#include "libsemigroups/debug.hpp"
35#include "libsemigroups/exception.hpp"
36#include "libsemigroups/ranges.hpp"
37#include "libsemigroups/types.hpp"
39#include "containers.hpp"
53 class AhoCorasickImpl {
55 using index_type = uint32_t;
57 static constexpr const index_type root = 0;
61 friend class AhoCorasickImpl;
70 std::unordered_set<index_type> _suffix_link_sources;
73 Node& init() noexcept {
77 Node& init(index_type parent,
letter_type a)
noexcept;
87 Node(Node
const&) =
default;
88 Node& operator=(Node
const&) =
default;
89 Node(Node&&) =
default;
90 Node& operator=(Node&&) =
default;
98 [[nodiscard]]
size_t height() const noexcept {
102 [[nodiscard]]
index_type suffix_link() const noexcept {
106 [[nodiscard]]
bool terminal() const noexcept {
110 [[nodiscard]]
index_type parent() const noexcept {
114 [[nodiscard]]
letter_type parent_letter() const noexcept {
115 return _parent_letter;
125 Node
const& height(
size_t val)
noexcept {
130 Node
const& suffix_link(index_type val)
noexcept {
135 Node& terminal(
bool val)
noexcept {
140 std::unordered_set<index_type>& suffix_link_sources() noexcept {
141 return _suffix_link_sources;
150 std::vector<Node> _all_nodes;
151 detail::DynamicArray2<index_type> _children;
152 std::unordered_set<index_type> _active_nodes_index;
153 std::vector<index_type> _inactive_nodes_index;
154 std::vector<index_type> _node_indices_to_update;
163 AhoCorasickImpl& init();
165 explicit AhoCorasickImpl(
size_t num_letters);
166 AhoCorasickImpl& init(
size_t num_letters);
168 AhoCorasickImpl(AhoCorasickImpl
const&);
169 AhoCorasickImpl& operator=(AhoCorasickImpl
const&);
170 AhoCorasickImpl(AhoCorasickImpl&&);
171 AhoCorasickImpl& operator=(AhoCorasickImpl&&);
175 size_t alphabet_size() const noexcept {
176 return _children.number_of_cols();
179 AhoCorasickImpl& increase_alphabet_size_by(
size_t val);
181 [[nodiscard]]
size_t number_of_nodes() const noexcept {
182 LIBSEMIGROUPS_ASSERT(_children.number_of_rows() == _all_nodes.size());
183 return _active_nodes_index.size();
186 template <
typename Iterator>
187 index_type add_word(Iterator first, Iterator last);
189 template <
typename Iterator>
190 index_type add_word_no_checks(Iterator first, Iterator last);
192 template <
typename Iterator>
193 index_type rm_word(Iterator first, Iterator last);
195 template <
typename Iterator>
196 index_type rm_word_no_checks(Iterator first, Iterator last);
200 [[nodiscard]] index_type traverse_no_checks(index_type current,
202 LIBSEMIGROUPS_ASSERT(current < _all_nodes.size());
203 LIBSEMIGROUPS_ASSERT(_active_nodes_index.count(current) == 1);
204 index_type
next = _children.get(current, a);
207 }
else if (current == root) {
210 return traverse_no_checks(suffix_link_no_checks(current), a);
213 [[nodiscard]] index_type traverse(index_type current,
215 throw_if_node_index_not_active(current);
216 return traverse_no_checks(current, a);
219 [[nodiscard]]
size_t height_no_checks(index_type i)
const;
221 [[nodiscard]]
size_t height(index_type i)
const {
222 throw_if_node_index_not_active(i);
223 return height_no_checks(i);
226 [[nodiscard]]
bool terminal_no_checks(index_type i)
const;
228 [[nodiscard]]
bool terminal(index_type i)
const {
229 throw_if_node_index_not_active(i);
230 return terminal_no_checks(i);
233 [[nodiscard]] index_type suffix_link_no_checks(index_type i)
const {
234 LIBSEMIGROUPS_ASSERT(i < _all_nodes.size());
235 LIBSEMIGROUPS_ASSERT(_active_nodes_index.count(i) == 1);
236 return _all_nodes[i].suffix_link();
239 [[nodiscard]] index_type suffix_link(index_type current)
const {
240 throw_if_node_index_not_active(current);
241 return suffix_link_no_checks(current);
244 [[nodiscard]] Node
const& node_no_checks(index_type i)
const {
245 LIBSEMIGROUPS_ASSERT(i < _all_nodes.size());
246 return _all_nodes[i];
249 [[nodiscard]] Node
const& node(index_type i)
const {
250 throw_if_node_index_out_of_range(i);
251 return node_no_checks(i);
254 [[nodiscard]] index_type child_no_checks(index_type parent,
256 LIBSEMIGROUPS_ASSERT(parent < _all_nodes.size());
257 LIBSEMIGROUPS_ASSERT(_active_nodes_index.count(parent) == 1);
258 return _children.get(parent, letter);
261 [[nodiscard]] index_type child(index_type parent,
263 throw_if_node_index_not_active(parent);
264 return child_no_checks(parent, letter);
268 number_of_children_no_checks(index_type i)
const noexcept {
269 return _children.number_of_cols()
271 _children.cbegin_row(i), _children.cend_row(i),
UNDEFINED);
274 [[nodiscard]]
size_t number_of_children(index_type i)
const noexcept {
275 throw_if_node_index_not_active(i);
276 return number_of_children_no_checks(i);
279 template <
typename Iterator>
280 [[nodiscard]] index_type traverse_trie_no_checks(Iterator first,
281 Iterator last)
const;
283 template <
typename Iterator>
284 [[nodiscard]] index_type traverse_trie(Iterator first,
285 Iterator last)
const {
286 throw_if_any_letter_out_of_range(first, last);
287 return traverse_trie_no_checks(first, last);
290 [[nodiscard]]
bool empty() const noexcept {
291 return number_of_nodes() == 1;
294 void throw_if_node_index_out_of_range(index_type i)
const;
295 void throw_if_node_index_not_active(index_type i)
const;
302 void throw_if_letter_out_of_range(index_type i)
const;
304 template <
typename Iterator>
305 void throw_if_any_letter_out_of_range(Iterator first,
306 Iterator last)
const {
307 for (
auto it = first; it != last; ++it) {
308 throw_if_letter_out_of_range(*it);
316 [[nodiscard]]
bool is_active_node(index_type i) {
317 return _active_nodes_index.find(i) != _active_nodes_index.end();
320 [[nodiscard]] index_type new_active_node_no_checks(index_type parent,
323 void deactivate_node_no_checks(index_type i);
331 void add_suffix_link_source(index_type source_index,
332 index_type target_index);
336 void rm_suffix_link_source(index_type source_index,
337 index_type target_index);
339 void populate_node_indices_to_update(index_type target_index,
340 index_type new_node_index,
344 namespace aho_corasick_impl {
346 template <
typename Word>
347 AhoCorasickImpl::index_type add_word_no_checks(AhoCorasickImpl& ac,
349 return ac.add_word_no_checks(w.begin(), w.end());
352 template <
typename Word>
353 AhoCorasickImpl::index_type rm_word_no_checks(AhoCorasickImpl& ac,
355 return ac.rm_word_no_checks(w.begin(), w.end());
359 template <
typename Iterator>
360 [[nodiscard]]
bool contains_no_checks(AhoCorasickImpl
const& ac,
363 auto index = ac.traverse_trie_no_checks(first, last);
364 return index ==
UNDEFINED ? false : ac.node_no_checks(index).terminal();
367 template <
typename Word>
368 [[nodiscard]] AhoCorasickImpl::index_type
369 contains_no_checks(AhoCorasickImpl& ac, Word
const& w) {
370 return contains_no_checks(ac, w.begin(), w.end());
373 template <
typename Iterator>
374 AhoCorasickImpl::index_type
375 traverse_word_no_checks(AhoCorasickImpl
const& ac,
376 AhoCorasickImpl::index_type start,
380 template <
typename Iterator>
381 AhoCorasickImpl::index_type
382 traverse_word_no_checks(AhoCorasickImpl
const& ac,
385 return traverse_word_no_checks(ac, ac.root, first, last);
388 template <
typename Word>
389 [[nodiscard]] AhoCorasickImpl::index_type
390 traverse_word_no_checks(AhoCorasickImpl& ac, Word
const& w) {
391 return traverse_word_no_checks(ac, w.begin(), w.end());
394 template <
typename Iterator>
395 class SearchIterator {
396 using index_type = AhoCorasickImpl::index_type;
402 AhoCorasickImpl
const& _trie;
405 using iterator_category = std::input_iterator_tag;
406 using value_type = index_type;
407 using difference_type = std::ptrdiff_t;
408 using pointer = value_type
const*;
409 using reference = value_type
const&;
411 SearchIterator(AhoCorasickImpl
const& trie,
415 explicit SearchIterator(AhoCorasickImpl
const& trie);
417 reference operator*()
const {
424 SearchIterator& operator++();
427 SearchIterator operator++(
int) {
428 SearchIterator tmp = *
this;
433 friend bool operator==(SearchIterator
const& a,
434 SearchIterator
const& b) {
436 return a._prefix == b._prefix && a._suffix == b._suffix;
439 friend bool operator!=(SearchIterator
const& a,
440 SearchIterator
const& b) {
446 template <
typename Iterator>
447 SearchIterator(AhoCorasickImpl
const& ac, Iterator first, Iterator last)
448 -> SearchIterator<Iterator>;
450 template <
typename Iterator>
451 [[nodiscard]]
auto begin_search_no_checks(AhoCorasickImpl
const& ac,
454 return SearchIterator(ac, first, last);
457 template <
typename Iterator>
458 [[nodiscard]]
auto end_search_no_checks(AhoCorasickImpl
const& ac,
461 return SearchIterator<Iterator>(ac);
464 template <
typename Word>
465 [[nodiscard]]
auto begin_search_no_checks(AhoCorasickImpl& ac,
467 return begin_search_no_checks(ac, w.begin(), w.end());
470 template <
typename Word>
471 [[nodiscard]]
auto end_search_no_checks(AhoCorasickImpl& ac,
473 return end_search_no_checks(ac, w.begin(), w.end());
480#include "aho-corasick-impl.tpp"
Undefined const UNDEFINED
Value for something undefined.
size_t letter_type
Type for the index of a generator of a semigroup.
Definition types.hpp:96
AhoCorasick::index_type index_type
Alias for the index of a node in the trie.
Definition aho-corasick.hpp:750
Namespace for everything in the libsemigroups library.
Definition action.hpp:44