22#ifndef LIBSEMIGROUPS_AHO_CORASICK_HPP_
23#define LIBSEMIGROUPS_AHO_CORASICK_HPP_
30#include <unordered_map>
33#include "constants.hpp"
35#include "exception.hpp"
97 mutable size_t _height;
115 Node(
const Node&) =
default;
117 Node(Node&&) =
default;
122 Node&
init()
noexcept {
128 [[nodiscard]]
size_t height()
const noexcept {
136 void set_suffix_link(
index_type val)
const noexcept {
140 void clear_suffix_link()
const noexcept;
142 [[nodiscard]]
decltype(_children)& children()
const noexcept {
146 [[nodiscard]]
size_t number_of_children()
const noexcept {
147 return _children.
size();
150 [[nodiscard]]
bool is_terminal()
const noexcept {
154 Node& set_terminal(
bool val)
noexcept {
159 [[nodiscard]]
index_type parent()
const noexcept {
163 [[nodiscard]]
letter_type parent_letter()
const noexcept {
164 return _parent_letter;
167 void set_height(
size_t val)
const noexcept {
175 mutable bool _valid_links;
233 return _active_nodes_index.size();
248 [[nodiscard]] rx::iterator_range<std::set<index_type>::const_iterator>
267 return _active_nodes_index.
cbegin();
284 return _active_nodes_index.
cend();
300 return _active_nodes_index.
begin();
316 return _active_nodes_index.
end();
329 template <
typename Iterator>
356 template <
typename Iterator>
369 template <
typename Iterator>
403 template <
typename Iterator>
609 LIBSEMIGROUPS_ASSERT(i < _all_nodes.size());
610 return _all_nodes[i];
649 LIBSEMIGROUPS_ASSERT(parent < _all_nodes.size());
650 LIBSEMIGROUPS_ASSERT(_active_nodes_index.count(parent) == 1);
651 return _all_nodes[parent].child(letter);
666 return _all_nodes[parent].child(letter);
707 return new_active_node_no_checks(parent, a);
714 deactivate_node_no_checks(i);
717 template <
typename Iterator>
718 [[nodiscard]]
index_type traverse_trie(Iterator first, Iterator last)
const;
720 void clear_suffix_links()
const;
769 template <
typename Word>
793 template <
typename Word>
816 template <
typename Word>
818 return ac.
add_word(w.cbegin(), w.cend());
839 template <
typename Word>
841 return ac.
rm_word(w.cbegin(), w.cend());
867 template <
typename Iterator>
898 template <
typename Iterator>
914 template <
typename Word>
933 template <
typename Iterator>
952 template <
typename Word>
968#include "aho-corasick.tpp"
For an implementation of the Aho-Corasick algorithm.
Definition aho-corasick.hpp:80
index_type suffix_link_no_checks(index_type current) const
Calculate the index of the suffix link of a node.
word_type signature(index_type i) const
Find the signature of a node (out-of-place).
Definition aho-corasick.hpp:512
typename word_type::const_iterator const_iterator
Used for word_type iterators.
Definition aho-corasick.hpp:86
AhoCorasick & operator=(AhoCorasick &&)=default
Default move assignment.
std::set< index_type >::const_iterator cbegin_nodes() const noexcept
Return a const iterator pointing to the first active node.
Definition aho-corasick.hpp:266
void signature(word_type &w, index_type i) const
Find the signature of a node (in-place).
Definition aho-corasick.hpp:498
AhoCorasick & operator=(AhoCorasick const &)=default
Default copy assignment.
void signature_no_checks(word_type &w, index_type i) const
Find the signature of a node (in-place).
size_t number_of_nodes() const noexcept
Returns the number of nodes in the trie.
Definition aho-corasick.hpp:232
Node const & node_no_checks(index_type i) const
Return the node given an index.
Definition aho-corasick.hpp:608
size_t height(index_type i) const
Calculate the height of a node.
Definition aho-corasick.hpp:546
index_type suffix_link(index_type current) const
Calculate the index of the suffix link of a node.
Definition aho-corasick.hpp:582
Node const & node(index_type i) const
Return the node given an index.
Definition aho-corasick.hpp:622
AhoCorasick()
Construct an empty AhoCorasick.
index_type add_word_no_checks(Iterator first, Iterator last)
Add a word to the trie.
void throw_if_node_index_out_of_range(index_type i) const
Check if an index corresponds to a node.
index_type child(index_type parent, letter_type letter) const
Return the child of parent with edge-label letter.
Definition aho-corasick.hpp:663
size_t index_type
Alias for the index of a node in the trie.
Definition aho-corasick.hpp:83
std::set< index_type >::iterator end_nodes() const noexcept
Return an iterator pointing one beyond the last active node.
Definition aho-corasick.hpp:315
size_t height_no_checks(index_type i) const
Calculate the height of a node.
void throw_if_node_index_not_active(index_type i) const
Check if an index corresponds to a node currently in the trie.
index_type traverse_no_checks(index_type current, letter_type a) const
Traverse the trie using suffix links where necessary.
index_type traverse(index_type current, letter_type a) const
Traverse the trie using suffix links where necessary.
Definition aho-corasick.hpp:437
std::set< index_type >::const_iterator cend_nodes() const noexcept
Return a const iterator pointing one beyond the last active node.
Definition aho-corasick.hpp:283
index_type rm_word_no_checks(Iterator first, Iterator last)
Remove a word from the trie.
rx::iterator_range< std::set< index_type >::const_iterator > active_nodes() const
Return the active nodes.
Definition aho-corasick.hpp:249
AhoCorasick(AhoCorasick &&)=default
Default move constructor.
static constexpr index_type root
Constant for the root of the trie.
Definition aho-corasick.hpp:89
index_type rm_word(Iterator first, Iterator last)
Check and add a word to the trie.
std::set< index_type >::iterator begin_nodes() const noexcept
Return an iterator pointing to the first active node.
Definition aho-corasick.hpp:299
AhoCorasick(AhoCorasick const &)=default
Default copy constructor.
AhoCorasick & init()
Reinitialise an existing AhoCorasick object.
word_type signature_no_checks(index_type i) const
Find the signature of a node (out-of-place).
Definition aho-corasick.hpp:483
index_type child_no_checks(index_type parent, letter_type letter) const
Return the child of parent with edge-label letter.
Definition aho-corasick.hpp:647
index_type add_word(Iterator first, Iterator last)
Check and add a word to the trie.
A representation of a graph in the DOT language of Graphviz.
Definition dot.hpp:115
std::string to_human_readable_repr(AhoCorasick const &ac)
Return a string representation.
Undefined const UNDEFINED
Value for something undefined.
std::vector< letter_type > word_type
Type for a word over the generators of a semigroup.
Definition types.hpp:101
size_t letter_type
Type for the index of a generator of a semigroup.
Definition types.hpp:98
Namespace for AhoCorasick helper functions.
Definition aho-corasick.hpp:746
AhoCorasick::index_type index_type
Alias for the index of a node in the trie.
Definition aho-corasick.hpp:748
index_type rm_word_no_checks(AhoCorasick &ac, Word const &w)
Remove a word from the trie of ac.
Definition aho-corasick.hpp:794
index_type traverse_word_no_checks(AhoCorasick const &ac, index_type start, Iterator first, Iterator last)
Traverse the trie of ac using suffix links where necessary.
index_type add_word_no_checks(AhoCorasick &ac, Word const &w)
Add a word to the trie of ac.
Definition aho-corasick.hpp:770
index_type add_word(AhoCorasick &ac, Word const &w)
Add a word to the trie of ac.
Definition aho-corasick.hpp:817
Dot dot(AhoCorasick &ac)
Construct a dot object of ac.
index_type traverse_word(AhoCorasick const &ac, index_type start, Iterator first, Iterator last)
Traverse the trie of ac using suffix links where necessary.
Definition aho-corasick.hpp:899
index_type rm_word(AhoCorasick &ac, Word const &w)
Remove a word from the trie of ac.
Definition aho-corasick.hpp:840
Namespace for everything in the libsemigroups library.
Definition action.hpp:44