32#ifndef LIBSEMIGROUPS_UKKONEN_HPP_
33#define LIBSEMIGROUPS_UKKONEN_HPP_
45#include "constants.hpp"
48#include "exception.hpp"
51#include "detail/fmt.hpp"
52#include "detail/string.hpp"
83 using node_index_type = size_t;
86 using edge_index_type = size_t;
159 State(node_index_type vv, edge_index_type ppos) :
v(vv),
pos(ppos) {}
176 return v == that.v &&
pos == that.pos;
202#ifndef LIBSEMIGROUPS_PARSED_BY_DOXYGEN
204 node_index_type link;
209 mutable bool is_real_suffix;
338 size_t _max_word_length;
471 return -1 - _next_unique_letter;
522 return std::accumulate(_multiplicity.cbegin(), _multiplicity.cend(), 0);
537 return _max_word_length;
554 return _word.begin();
571 return _word.cbegin();
646 "expected the parent of the parameter to not be UNDEFINED");
671 LIBSEMIGROUPS_ASSERT(i < _word.size());
672 return _word_index_lookup[i];
691 if (i >= _word.size()) {
693 "expected the parameter to be in the range [0, {}), found {}",
744 return _multiplicity[i];
762 if (i >= _multiplicity.size()) {
764 "expected the parameter to be in the range [0, {}), found {}",
765 _multiplicity.size(),
807 return l >= _next_unique_letter;
835 template <
typename Iterator>
847 template <
typename Iterator>
880 template <
typename Iterator>
889 template <
typename Iterator>
920 template <
typename Iterator>
922 Iterator last)
const {
933 template <
typename Iterator>
959 template <
typename Iterator>
975 bool is_real_suffix(Node
const& n)
const;
978 LIBSEMIGROUPS_ASSERT(
index < _word_begin.
size() - 1);
979 return (_word_begin[
index + 1] - _word_begin[
index]) - 1;
1001 node_index_type split(State
const& st);
1004 node_index_type get_link(node_index_type v);
1022 template <
typename Iterator>
1033 template <
typename Word>
1061 template <
typename Iterator>
1077 template <
typename Word>
1124 template <
typename Iterator1,
typename Iterator2>
1126 for (
auto it = first; it != last; ++it) {
1149 template <
typename Iterator1,
typename Iterator2>
1151 for (
auto it = first; it != last; ++it) {
1159 template <
typename Word>
1184 template <
typename Word>
1186 return u.
traverse(w.cbegin(), w.cend());
1217 template <
typename Word>
1252 template <
typename Word>
1254 return u.
traverse(st, w.cbegin(), w.cend());
1306 template <
typename Iterator>
1313 template <
typename Word>
1339 template <
typename Iterator>
1354 template <
typename Word>
1404 template <
typename Iterator>
1411 template <
typename Word>
1439 template <
typename Iterator>
1454 template <
typename Word>
1509 template <
typename Iterator>
1519 template <
typename Word>
1520 typename Word::const_iterator
1530 inline typename word_type::const_iterator
1555 template <
typename Iterator>
1573 template <
typename Word>
1589 inline typename word_type::const_iterator
1633 template <
typename Iterator>
1646 template <
typename Word>
1682 template <
typename Iterator>
1699 template <
typename Word>
1758 template <
typename Iterator>
1766 template <
typename Word>
1792 template <
typename Iterator>
1805 template <
typename Word>
1862 template <
typename Iterator>
1872 template <
typename Word>
1873 typename Word::const_iterator
1883 inline typename word_type::const_iterator
1908 template <
typename Iterator>
1926 template <
typename Word>
1942 inline typename word_type::const_iterator
1986 template <
typename Iterator>
1999 template <
typename Word>
2035 template <
typename Iterator>
2052 template <
typename Word>
2111 template <
typename Iterator>
2121 template <
typename Word>
2157 template <
typename Iterator>
2173 template <
typename Word>
2250 template <
typename Iterator>
2258 template <
typename Word>
2280 template <
typename Iterator>
2296 template <
typename Word>
2371 template <
typename T>
2387 return fmt::format(
"<Ukkonen with {} distinct words>",
2407 "<Ukkonen{}State with pos = {} and v = {}>", sep, st.
pos, st.
v);
2426 "<Ukkonen{}Node with {} children and parent edge label [{}, {})>",
2437#include "ukkonen.tpp"
A representation of a graph in the DOT language of Graphviz.
Definition dot.hpp:115
For an implementation of Ukkonen's algorithm.
Definition ukkonen.hpp:81
Iterator traverse_no_checks(State &st, Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Ukkonen(Ukkonen &&)
Default move constructor.
typename word_type::const_iterator const_iterator
Alias for word_type iterators.
Definition ukkonen.hpp:97
word_index_type word_index(index_type i) const
Returns the index of the word corresponding to a position.
Definition ukkonen.hpp:690
Ukkonen & operator=(Ukkonen &&)
Default move assignment.
const_iterator begin() const noexcept
Returns an iterator pointing to the first letter of the first word in the suffix tree.
Definition ukkonen.hpp:553
void throw_if_contains_unique_letter(Iterator first, Iterator last) const
Throw if the word [first, last) contains a letter equal to any of the unique letters added to the end...
size_t length_of_words() const noexcept
Returns the sum of the lengths of all of the words in the suffix tree.
size_t word_index_type
Alias for order that words were added.
Definition ukkonen.hpp:94
void add_word(const_iterator first, const_iterator last)
Check and add a word to the suffix tree.
Definition ukkonen.hpp:431
std::pair< State, Iterator > traverse_no_checks(Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Definition ukkonen.hpp:921
std::vector< Node > const & nodes() const noexcept
Returns the nodes in the suffix tree.
Definition ukkonen.hpp:452
size_t distance_from_root(Node const &n) const
Returns the distance of a node from the root.
word_index_type word_index_no_checks(Node const &n) const
Returns the index of the word corresponding to a node.
Definition ukkonen.hpp:625
void add_word_no_checks(const_iterator first, const_iterator last)
Add a word to the suffix tree.
Ukkonen & init()
Initialize an existing Ukkonen object.
word_index_type index_no_checks(Iterator first, Iterator last) const
Find the index of a word in the suffix tree.
const_iterator end() const noexcept
Returns an iterator pointing one past the last letter of the last word in the suffix tree.
Definition ukkonen.hpp:587
size_t index_type
Alias for an index between begin and end.
Definition ukkonen.hpp:100
size_t max_word_length() const noexcept
Returns the maximum length of word in the suffix tree.
Definition ukkonen.hpp:536
word_index_type word_index_no_checks(index_type i) const
Returns the index of the word corresponding to a position.
Definition ukkonen.hpp:670
size_t multiplicity_no_checks(word_index_type i) const
Returns the multiplicity of a word by index.
Definition ukkonen.hpp:743
Ukkonen & operator=(Ukkonen const &)
Default copy assignment.
void throw_if_contains_unique_letter(word_type const &w) const
Throw if w contains a letter equal to any of the unique letters added to the end of words in the suff...
Definition ukkonen.hpp:966
size_t length_of_distinct_words() const noexcept
Returns the sum of the lengths of the distinct words in the suffix tree.
Definition ukkonen.hpp:486
const_iterator cend() const noexcept
Returns an iterator pointing one past the last letter of the last word in the suffix tree.
Definition ukkonen.hpp:604
word_index_type is_suffix(State const &st) const
Check if a state corresponds to a suffix.
word_index_type word_index(Node const &n) const
Returns the index of the word corresponding to a node.
Definition ukkonen.hpp:643
size_t number_of_words() const noexcept
Returns the number of non-empty words in the suffix tree.
Definition ukkonen.hpp:521
word_index_type index(Iterator first, Iterator last) const
Find the index of a word in the suffix tree.
Definition ukkonen.hpp:848
size_t unique_letter_type
Alias for any letter that is added by Ukkonen (so that unique strings end in unique letters).
Definition ukkonen.hpp:91
const_iterator cbegin() const noexcept
Returns an iterator pointing to the first letter of the first word in the suffix tree.
Definition ukkonen.hpp:570
Ukkonen(Ukkonen const &)
Default copy constructor.
Ukkonen()
Default constructor.
Iterator traverse(State &st, Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Definition ukkonen.hpp:890
size_t multiplicity(word_index_type i) const
Returns the multiplicity of a word by index.
Definition ukkonen.hpp:761
std::pair< State, Iterator > traverse(Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Definition ukkonen.hpp:934
unique_letter_type unique_letter(word_index_type i) const noexcept
Returns the unique letter added to the end of a word in the suffix tree.
Definition ukkonen.hpp:786
bool is_unique_letter(letter_type l) const noexcept
Check if a letter is a unique letter added to the end of a word in the suffix tree.
Definition ukkonen.hpp:806
size_t number_of_distinct_words() const noexcept
Returns the number of distinct non-empty words in the suffix tree.
Definition ukkonen.hpp:470
std::string to_human_readable_repr(AhoCorasick const &ac)
Return a string representation.
Undefined const UNDEFINED
Value for something undefined.
#define LIBSEMIGROUPS_EXCEPTION(...)
Throw a LibsemigroupsException.
Definition exception.hpp:95
std::vector< letter_type > word_type
Type for a word over the generators of a semigroup.
Definition types.hpp:101
size_t letter_type
Type for the index of a generator of a semigroup.
Definition types.hpp:98
Namespace for Ukkonen helper functions.
Definition ukkonen.hpp:1013
Iterator maximal_piece_suffix(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal suffix of a word occurring in two different places in a word in a suffix tree.
Definition ukkonen.hpp:1909
bool is_subword_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a subword of any word in a suffix tree.
auto traverse(Ukkonen const &u, Word const &w)
Traverse the suffix tree from the root.
Definition ukkonen.hpp:1185
Iterator maximal_piece_suffix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal suffix of a word occurring in two different places in a word in a suffix tree.
size_t length_maximal_piece_suffix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal suffix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:1987
auto dfs(Ukkonen const &u, T &helper)
Perform a depth first search in a suffix tree.
size_t number_of_distinct_subwords(Ukkonen const &u)
Returns the number of distinct subwords of the words in a suffix tree.
Iterator maximal_piece_prefix(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal prefix of a word occurring in two different places in a word in a suffix tree.
Definition ukkonen.hpp:1556
void add_word(Ukkonen &u, word_type const &w)
Add a word to the suffix tree.
bool is_subword(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a subword of any word in a suffix tree.
Definition ukkonen.hpp:1340
Dot dot(Ukkonen const &u)
Returns a Dot object representing a suffix tree.
size_t number_of_pieces_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the number of pieces in a decomposition of a word (if any).
std::vector< Iterator > pieces(Ukkonen const &u, Iterator first, Iterator last)
Find the pieces in a decomposition of a word (if any).
Definition ukkonen.hpp:2281
size_t length_maximal_piece_prefix(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal prefix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:1683
size_t length_maximal_piece_suffix(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal suffix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:2036
void add_words(Ukkonen &u, std::vector< word_type > const &words)
Add all words in a range to a Ukkonen object.
auto traverse_no_checks(Ukkonen const &u, Word const &w)
Traverse the suffix tree from the root.
Definition ukkonen.hpp:1160
bool is_suffix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a suffix of any word in a suffix tree.
std::vector< Iterator > pieces_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the pieces in a decomposition of a word (if any).
Iterator maximal_piece_prefix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal prefix of a word occurring in two different places in a word in a suffix tree.
bool is_piece(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a piece (occurs in two distinct places in the words of the suffix tree).
Definition ukkonen.hpp:1793
bool is_piece_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a piece (occurs in two distinct places in the words of the suffix tree).
Definition ukkonen.hpp:1759
size_t length_maximal_piece_prefix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal prefix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:1634
size_t number_of_pieces(Ukkonen const &u, Iterator first, Iterator last)
Find the number of pieces in a decomposition of a word (if any).
Definition ukkonen.hpp:2158
void add_word_no_checks(Ukkonen &u, word_type const &w)
Add a word to the suffix tree.
bool is_suffix(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a suffix of any word in a suffix tree.
Definition ukkonen.hpp:1440
void add_words_no_checks(Ukkonen &u, std::vector< word_type > const &words)
Add all words in a std::vector to a Ukkonen object.
Namespace containing some operators for creating words.
Definition word-range.hpp:2072
Namespace for everything in the libsemigroups library.
Definition action.hpp:44
The type of the nodes in the tree.
Definition ukkonen.hpp:185
bool is_root() const noexcept
Returns true if the node is the root and false if not.
Definition ukkonen.hpp:328
Node & operator=(Node &&)=default
Default move assignment.
Node(Node &&)=default
Default move constructor.
size_t length() const noexcept
The length of the edge leading into the current node.
Definition ukkonen.hpp:266
node_index_type & child(letter_type c)
The index of the child node corresponding to a letter (if any).
index_type l
The index of the first letter in the edge leading to the node.
Definition ukkonen.hpp:189
index_type r
The index of one past the last letter in the edge leading to the node.
Definition ukkonen.hpp:195
std::map< letter_type, node_index_type > children
The children of the current node.
Definition ukkonen.hpp:215
Node(Node const &)=default
Default copy constructor.
node_index_type parent
The index of the parent node.
Definition ukkonen.hpp:200
Node(index_type l=0, index_type r=0, node_index_type parent=UNDEFINED)
Construct a node from left most index, right most index, and parent.
Node & operator=(Node const &)=default
Default copy assignment.
bool is_leaf() const noexcept
Returns true if the node is a leaf and false if not.
Definition ukkonen.hpp:313
node_index_type child(letter_type c) const
The index of the child node corresponding to a letter (if any).
The return type of traverse.
Definition ukkonen.hpp:112
State(node_index_type vv, edge_index_type ppos)
Construct from index and position.
Definition ukkonen.hpp:159
State & operator=(State const &)=default
Default copy assignment.
node_index_type v
The index in Ukkonen::nodes of the node at the end of the position reached.
Definition ukkonen.hpp:118
bool operator==(State const &that) const noexcept
Compare states.
Definition ukkonen.hpp:175
edge_index_type pos
The position in the edge leading to the node v reached.
Definition ukkonen.hpp:123
State()=default
Default constructor.
State(State &&)=default
Default move constructor.
State & operator=(State &&)=default
Default move assignment.
State(State const &)=default
Default copy constructor.