22#ifndef LIBSEMIGROUPS_AHO_CORASICK_HPP_
23#define LIBSEMIGROUPS_AHO_CORASICK_HPP_
30#include <unordered_map>
33#include "constants.hpp"
35#include "exception.hpp"
39#include "detail/print.hpp"
99 mutable size_t _height;
117 Node(
const Node&) =
default;
119 Node(Node&&) =
default;
124 Node&
init()
noexcept {
130 [[nodiscard]]
size_t height()
const noexcept {
138 void set_suffix_link(
index_type val)
const noexcept {
142 void clear_suffix_link()
const noexcept;
144 [[nodiscard]]
decltype(_children)& children()
const noexcept {
148 [[nodiscard]]
size_t number_of_children()
const noexcept {
149 return _children.
size();
152 [[nodiscard]]
bool is_terminal()
const noexcept {
156 Node& set_terminal(
bool val)
noexcept {
161 [[nodiscard]]
index_type parent()
const noexcept {
165 [[nodiscard]]
letter_type parent_letter()
const noexcept {
166 return _parent_letter;
169 void set_height(
size_t val)
const noexcept {
177 mutable bool _valid_links;
235 return _active_nodes_index.size();
250 [[nodiscard]] rx::iterator_range<std::set<index_type>::const_iterator>
269 return _active_nodes_index.
cbegin();
286 return _active_nodes_index.
cend();
302 return _active_nodes_index.
begin();
318 return _active_nodes_index.
end();
331 template <
typename Iterator>
358 template <
typename Iterator>
371 template <
typename Iterator>
405 template <
typename Iterator>
611 LIBSEMIGROUPS_ASSERT(i < _all_nodes.size());
612 return _all_nodes[i];
651 LIBSEMIGROUPS_ASSERT(parent < _all_nodes.size());
652 LIBSEMIGROUPS_ASSERT(_active_nodes_index.count(parent) == 1);
653 return _all_nodes[parent].child(letter);
668 return _all_nodes[parent].child(letter);
709 return new_active_node_no_checks(parent, a);
716 deactivate_node_no_checks(i);
719 template <
typename Iterator>
720 [[nodiscard]]
index_type traverse_trie(Iterator first, Iterator last)
const;
722 void clear_suffix_links()
const;
771 template <
typename Word>
795 template <
typename Word>
818 template <
typename Word>
820 return ac.
add_word(w.cbegin(), w.cend());
862 template <
typename Word>
864 return ac.
rm_word(w.cbegin(), w.cend());
911 template <
typename Iterator>
942 template <
typename Iterator>
958 template <
typename Word>
977 template <
typename Iterator>
996 template <
typename Word>
1012#include "aho-corasick.tpp"
For an implementation of the Aho-Corasick algorithm.
Definition aho-corasick.hpp:82
index_type suffix_link_no_checks(index_type current) const
Calculate the index of the suffix link of a node.
word_type signature(index_type i) const
Find the signature of a node (out-of-place).
Definition aho-corasick.hpp:514
typename word_type::const_iterator const_iterator
Used for word_type iterators.
Definition aho-corasick.hpp:88
AhoCorasick & operator=(AhoCorasick &&)=default
Default move assignment.
std::set< index_type >::const_iterator cbegin_nodes() const noexcept
Return a const iterator pointing to the first active node.
Definition aho-corasick.hpp:268
void signature(word_type &w, index_type i) const
Find the signature of a node (in-place).
Definition aho-corasick.hpp:500
AhoCorasick & operator=(AhoCorasick const &)=default
Default copy assignment.
void signature_no_checks(word_type &w, index_type i) const
Find the signature of a node (in-place).
size_t number_of_nodes() const noexcept
Returns the number of nodes in the trie.
Definition aho-corasick.hpp:234
Node const & node_no_checks(index_type i) const
Return the node given an index.
Definition aho-corasick.hpp:610
size_t height(index_type i) const
Calculate the height of a node.
Definition aho-corasick.hpp:548
index_type suffix_link(index_type current) const
Calculate the index of the suffix link of a node.
Definition aho-corasick.hpp:584
Node const & node(index_type i) const
Return the node given an index.
Definition aho-corasick.hpp:624
AhoCorasick()
Construct an empty AhoCorasick.
index_type add_word_no_checks(Iterator first, Iterator last)
Add a word to the trie.
void throw_if_node_index_out_of_range(index_type i) const
Check if an index corresponds to a node.
index_type child(index_type parent, letter_type letter) const
Return the child of parent with edge-label letter.
Definition aho-corasick.hpp:665
size_t index_type
Alias for the index of a node in the trie.
Definition aho-corasick.hpp:85
std::set< index_type >::iterator end_nodes() const noexcept
Return an iterator pointing one beyond the last active node.
Definition aho-corasick.hpp:317
size_t height_no_checks(index_type i) const
Calculate the height of a node.
void throw_if_node_index_not_active(index_type i) const
Check if an index corresponds to a node currently in the trie.
index_type traverse_no_checks(index_type current, letter_type a) const
Traverse the trie using suffix links where necessary.
index_type traverse(index_type current, letter_type a) const
Traverse the trie using suffix links where necessary.
Definition aho-corasick.hpp:439
std::set< index_type >::const_iterator cend_nodes() const noexcept
Return a const iterator pointing one beyond the last active node.
Definition aho-corasick.hpp:285
index_type rm_word_no_checks(Iterator first, Iterator last)
Remove a word from the trie.
rx::iterator_range< std::set< index_type >::const_iterator > active_nodes() const
Return the active nodes.
Definition aho-corasick.hpp:251
AhoCorasick(AhoCorasick &&)=default
Default move constructor.
static constexpr index_type root
Constant for the root of the trie.
Definition aho-corasick.hpp:91
index_type rm_word(Iterator first, Iterator last)
Check and add a word to the trie.
std::set< index_type >::iterator begin_nodes() const noexcept
Return an iterator pointing to the first active node.
Definition aho-corasick.hpp:301
AhoCorasick(AhoCorasick const &)=default
Default copy constructor.
AhoCorasick & init()
Reinitialise an existing AhoCorasick object.
word_type signature_no_checks(index_type i) const
Find the signature of a node (out-of-place).
Definition aho-corasick.hpp:485
index_type child_no_checks(index_type parent, letter_type letter) const
Return the child of parent with edge-label letter.
Definition aho-corasick.hpp:649
index_type add_word(Iterator first, Iterator last)
Check and add a word to the trie.
A representation of a graph in the DOT language of Graphviz.
Definition dot.hpp:115
std::string to_human_readable_repr(AhoCorasick const &ac)
Return a string representation.
Undefined const UNDEFINED
Value for something undefined.
std::vector< letter_type > word_type
Type for a word over the generators of a semigroup.
Definition types.hpp:99
size_t letter_type
Type for the index of a generator of a semigroup.
Definition types.hpp:96
Namespace for AhoCorasick helper functions.
Definition aho-corasick.hpp:748
AhoCorasick::index_type index_type
Alias for the index of a node in the trie.
Definition aho-corasick.hpp:750
index_type rm_word_no_checks(AhoCorasick &ac, Word const &w)
Remove a word from the trie of ac.
Definition aho-corasick.hpp:796
index_type traverse_word_no_checks(AhoCorasick const &ac, index_type start, Iterator first, Iterator last)
Traverse the trie of ac using suffix links where necessary.
index_type add_word_no_checks(AhoCorasick &ac, Word const &w)
Add a word to the trie of ac.
Definition aho-corasick.hpp:772
index_type add_word(AhoCorasick &ac, Word const &w)
Add a word.
Definition aho-corasick.hpp:819
Dot dot(AhoCorasick &ac)
Construct a dot object of ac.
index_type traverse_word(AhoCorasick const &ac, index_type start, Iterator first, Iterator last)
Traverse the trie of ac using suffix links where necessary.
Definition aho-corasick.hpp:943
index_type rm_word(AhoCorasick &ac, Word const &w)
Remove a word.
Definition aho-corasick.hpp:863
Namespace for everything in the libsemigroups library.
Definition action.hpp:44