32#ifndef LIBSEMIGROUPS_UKKONEN_HPP_
33#define LIBSEMIGROUPS_UKKONEN_HPP_
46#include "constants.hpp"
49#include "exception.hpp"
52#include "detail/fmt.hpp"
53#include "detail/string.hpp"
58 template <
typename Iterator>
59 [[nodiscard]]
letter_type deref_as_unsigned(Iterator it) {
61 return static_cast<std::make_unsigned_t<decltype(val)
>>(*it);
92 using node_index_type = size_t;
95 using edge_index_type = size_t;
168 State(node_index_type vv, edge_index_type ppos) :
v(vv),
pos(ppos) {}
185 return v == that.v &&
pos == that.pos;
211#ifndef LIBSEMIGROUPS_PARSED_BY_DOXYGEN
213 node_index_type link;
218 mutable bool is_real_suffix;
347 size_t _max_word_length;
480 return -1 - _next_unique_letter;
531 return std::accumulate(_multiplicity.cbegin(), _multiplicity.cend(), 0);
546 return _max_word_length;
563 return _word.begin();
580 return _word.cbegin();
655 "expected the parent of the parameter to not be UNDEFINED");
680 LIBSEMIGROUPS_ASSERT(i < _word.size());
681 return _word_index_lookup[i];
700 if (i >= _word.size()) {
702 "expected the parameter to be in the range [0, {}), found {}",
753 return _multiplicity[i];
771 if (i >= _multiplicity.size()) {
773 "expected the parameter to be in the range [0, {}), found {}",
774 _multiplicity.size(),
816 return l >= _next_unique_letter;
847 template <
typename Iterator>
859 template <
typename Iterator>
895 template <
typename Iterator>
904 template <
typename Iterator>
938 template <
typename Iterator>
940 Iterator last)
const {
951 template <
typename Iterator>
980 template <
typename Iterator>
996 bool is_real_suffix(Node
const& n)
const;
999 LIBSEMIGROUPS_ASSERT(
index < _word_begin.
size() - 1);
1000 return (_word_begin[
index + 1] - _word_begin[
index]) - 1;
1022 node_index_type split(State
const& st);
1025 node_index_type get_link(node_index_type v);
1046 template <
typename Iterator>
1052 for (
auto it = first; it != last; ++it) {
1053 w.
push_back(detail::deref_as_unsigned(it));
1064 template <
typename Word>
1095 template <
typename Iterator>
1109 template <
typename Word>
1159 template <
typename Iterator1,
typename Iterator2>
1161 for (
auto it = first; it != last; ++it) {
1187 template <
typename Iterator1,
typename Iterator2>
1189 for (
auto it = first; it != last; ++it) {
1200 template <
typename Word>
1228 template <
typename Word>
1230 return u.
traverse(w.cbegin(), w.cend());
1264 template <
typename Word>
1302 template <
typename Word>
1304 return u.
traverse(st, w.cbegin(), w.cend());
1359 template <
typename Iterator>
1366 template <
typename Word>
1392 template <
typename Iterator>
1407 template <
typename Word>
1457 template <
typename Iterator>
1464 template <
typename Word>
1492 template <
typename Iterator>
1507 template <
typename Word>
1562 template <
typename Iterator>
1572 template <
typename Word>
1573 typename Word::const_iterator
1583 inline typename word_type::const_iterator
1608 template <
typename Iterator>
1626 template <
typename Word>
1642 inline typename word_type::const_iterator
1686 template <
typename Iterator>
1699 template <
typename Word>
1735 template <
typename Iterator>
1752 template <
typename Word>
1811 template <
typename Iterator>
1819 template <
typename Word>
1845 template <
typename Iterator>
1858 template <
typename Word>
1915 template <
typename Iterator>
1925 template <
typename Word>
1926 typename Word::const_iterator
1936 inline typename word_type::const_iterator
1961 template <
typename Iterator>
1979 template <
typename Word>
1995 inline typename word_type::const_iterator
2039 template <
typename Iterator>
2052 template <
typename Word>
2088 template <
typename Iterator>
2105 template <
typename Word>
2164 template <
typename Iterator>
2174 template <
typename Word>
2210 template <
typename Iterator>
2226 template <
typename Word>
2303 template <
typename Iterator>
2311 template <
typename Word>
2333 template <
typename Iterator>
2349 template <
typename Word>
2424 template <
typename T>
2440 using detail::group_digits;
2441 return fmt::format(
"<Ukkonen with {} distinct words>",
2460 using detail::group_digits;
2461 return fmt::format(
"<Ukkonen{}State with pos = {} and v = {}>",
2463 group_digits(st.
pos),
2464 group_digits(st.
v));
2482 using detail::group_digits;
2484 "<Ukkonen{}Node with {} children and parent edge label [{}, {})>",
2487 group_digits(node.
l),
2488 group_digits(node.
r));
2495#include "ukkonen.tpp"
A representation of a graph in the DOT language of Graphviz.
Definition dot.hpp:115
For an implementation of Ukkonen's algorithm.
Definition ukkonen.hpp:90
Iterator traverse_no_checks(State &st, Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Ukkonen(Ukkonen &&)
Default move constructor.
typename word_type::const_iterator const_iterator
Alias for word_type iterators.
Definition ukkonen.hpp:106
word_index_type word_index(index_type i) const
Returns the index of the word corresponding to a position.
Definition ukkonen.hpp:699
Ukkonen & operator=(Ukkonen &&)
Default move assignment.
const_iterator begin() const noexcept
Returns an iterator pointing to the first letter of the first word in the suffix tree.
Definition ukkonen.hpp:562
void throw_if_contains_unique_letter(Iterator first, Iterator last) const
Throw if the word [first, last) contains a letter equal to any of the unique letters added to the end...
size_t length_of_words() const noexcept
Returns the sum of the lengths of all of the words in the suffix tree.
size_t word_index_type
Alias for order that words were added.
Definition ukkonen.hpp:103
void add_word(const_iterator first, const_iterator last)
Check and add a word to the suffix tree.
Definition ukkonen.hpp:440
std::pair< State, Iterator > traverse_no_checks(Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Definition ukkonen.hpp:939
std::vector< Node > const & nodes() const noexcept
Returns the nodes in the suffix tree.
Definition ukkonen.hpp:461
size_t distance_from_root(Node const &n) const
Returns the distance of a node from the root.
word_index_type word_index_no_checks(Node const &n) const
Returns the index of the word corresponding to a node.
Definition ukkonen.hpp:634
void add_word_no_checks(const_iterator first, const_iterator last)
Add a word to the suffix tree.
Ukkonen & init()
Initialize an existing Ukkonen object.
word_index_type index_no_checks(Iterator first, Iterator last) const
Find the index of a word in the suffix tree.
const_iterator end() const noexcept
Returns an iterator pointing one past the last letter of the last word in the suffix tree.
Definition ukkonen.hpp:596
size_t index_type
Alias for an index between begin and end.
Definition ukkonen.hpp:109
size_t max_word_length() const noexcept
Returns the maximum length of word in the suffix tree.
Definition ukkonen.hpp:545
word_index_type word_index_no_checks(index_type i) const
Returns the index of the word corresponding to a position.
Definition ukkonen.hpp:679
size_t multiplicity_no_checks(word_index_type i) const
Returns the multiplicity of a word by index.
Definition ukkonen.hpp:752
Ukkonen & operator=(Ukkonen const &)
Default copy assignment.
void throw_if_contains_unique_letter(word_type const &w) const
Throw if w contains a letter equal to any of the unique letters added to the end of words in the suff...
Definition ukkonen.hpp:987
size_t length_of_distinct_words() const noexcept
Returns the sum of the lengths of the distinct words in the suffix tree.
Definition ukkonen.hpp:495
const_iterator cend() const noexcept
Returns an iterator pointing one past the last letter of the last word in the suffix tree.
Definition ukkonen.hpp:613
word_index_type is_suffix(State const &st) const
Check if a state corresponds to a suffix.
word_index_type word_index(Node const &n) const
Returns the index of the word corresponding to a node.
Definition ukkonen.hpp:652
size_t number_of_words() const noexcept
Returns the number of non-empty words in the suffix tree.
Definition ukkonen.hpp:530
word_index_type index(Iterator first, Iterator last) const
Find the index of a word in the suffix tree.
Definition ukkonen.hpp:860
size_t unique_letter_type
Alias for any letter that is added by Ukkonen (so that unique strings end in unique letters).
Definition ukkonen.hpp:100
const_iterator cbegin() const noexcept
Returns an iterator pointing to the first letter of the first word in the suffix tree.
Definition ukkonen.hpp:579
Ukkonen(Ukkonen const &)
Default copy constructor.
Ukkonen()
Default constructor.
Iterator traverse(State &st, Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Definition ukkonen.hpp:905
size_t multiplicity(word_index_type i) const
Returns the multiplicity of a word by index.
Definition ukkonen.hpp:770
std::pair< State, Iterator > traverse(Iterator first, Iterator last) const
Traverse the suffix tree from the root.
Definition ukkonen.hpp:952
unique_letter_type unique_letter(word_index_type i) const noexcept
Returns the unique letter added to the end of a word in the suffix tree.
Definition ukkonen.hpp:795
bool is_unique_letter(letter_type l) const noexcept
Check if a letter is a unique letter added to the end of a word in the suffix tree.
Definition ukkonen.hpp:815
size_t number_of_distinct_words() const noexcept
Returns the number of distinct non-empty words in the suffix tree.
Definition ukkonen.hpp:479
std::string to_human_readable_repr(AhoCorasick const &ac)
Return a string representation.
Undefined const UNDEFINED
Value for something undefined.
#define LIBSEMIGROUPS_EXCEPTION(...)
Throw a LibsemigroupsException.
Definition exception.hpp:99
std::vector< letter_type > word_type
Type for a word over the generators of a semigroup.
Definition types.hpp:99
size_t letter_type
Type for the index of a generator of a semigroup.
Definition types.hpp:96
Namespace for Ukkonen helper functions.
Definition ukkonen.hpp:1034
Iterator maximal_piece_suffix(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal suffix of a word occurring in two different places in a word in a suffix tree.
Definition ukkonen.hpp:1962
bool is_subword_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a subword of any word in a suffix tree.
auto traverse(Ukkonen const &u, Word const &w)
Traverse the suffix tree from the root.
Definition ukkonen.hpp:1229
Iterator maximal_piece_suffix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal suffix of a word occurring in two different places in a word in a suffix tree.
size_t length_maximal_piece_suffix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal suffix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:2040
auto dfs(Ukkonen const &u, T &helper)
Perform a depth first search in a suffix tree.
size_t number_of_distinct_subwords(Ukkonen const &u)
Returns the number of distinct subwords of the words in a suffix tree.
Iterator maximal_piece_prefix(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal prefix of a word occurring in two different places in a word in a suffix tree.
Definition ukkonen.hpp:1609
void add_word(Ukkonen &u, word_type const &w)
Add a word to the suffix tree.
bool is_subword(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a subword of any word in a suffix tree.
Definition ukkonen.hpp:1393
Dot dot(Ukkonen const &u)
Returns a Dot object representing a suffix tree.
size_t number_of_pieces_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the number of pieces in a decomposition of a word (if any).
std::vector< Iterator > pieces(Ukkonen const &u, Iterator first, Iterator last)
Find the pieces in a decomposition of a word (if any).
Definition ukkonen.hpp:2334
size_t length_maximal_piece_prefix(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal prefix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:1736
size_t length_maximal_piece_suffix(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal suffix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:2089
void add_words(Ukkonen &u, std::vector< word_type > const &words)
Add all words in a range to a Ukkonen object.
auto traverse_no_checks(Ukkonen const &u, Word const &w)
Traverse the suffix tree from the root.
Definition ukkonen.hpp:1201
bool is_suffix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a suffix of any word in a suffix tree.
std::vector< Iterator > pieces_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the pieces in a decomposition of a word (if any).
Iterator maximal_piece_prefix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the maximal prefix of a word occurring in two different places in a word in a suffix tree.
bool is_piece(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a piece (occurs in two distinct places in the words of the suffix tree).
Definition ukkonen.hpp:1846
bool is_piece_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a piece (occurs in two distinct places in the words of the suffix tree).
Definition ukkonen.hpp:1812
size_t length_maximal_piece_prefix_no_checks(Ukkonen const &u, Iterator first, Iterator last)
Find the length of the maximal prefix of a word occurring in two different places in a word in a suff...
Definition ukkonen.hpp:1687
size_t number_of_pieces(Ukkonen const &u, Iterator first, Iterator last)
Find the number of pieces in a decomposition of a word (if any).
Definition ukkonen.hpp:2211
void add_word_no_checks(Ukkonen &u, word_type const &w)
Add a word to the suffix tree.
bool is_suffix(Ukkonen const &u, Iterator first, Iterator last)
Check if a word is a suffix of any word in a suffix tree.
Definition ukkonen.hpp:1493
void add_words_no_checks(Ukkonen &u, std::vector< word_type > const &words)
Add all words in a std::vector to a Ukkonen object.
Namespace containing some operators for creating words.
Definition word-range.hpp:2095
Namespace for everything in the libsemigroups library.
Definition action.hpp:44
The type of the nodes in the tree.
Definition ukkonen.hpp:194
bool is_root() const noexcept
Returns true if the node is the root and false if not.
Definition ukkonen.hpp:337
Node & operator=(Node &&)=default
Default move assignment.
Node(Node &&)=default
Default move constructor.
size_t length() const noexcept
The length of the edge leading into the current node.
Definition ukkonen.hpp:275
node_index_type & child(letter_type c)
The index of the child node corresponding to a letter (if any).
index_type l
The index of the first letter in the edge leading to the node.
Definition ukkonen.hpp:198
index_type r
The index of one past the last letter in the edge leading to the node.
Definition ukkonen.hpp:204
std::map< letter_type, node_index_type > children
The children of the current node.
Definition ukkonen.hpp:224
Node(Node const &)=default
Default copy constructor.
node_index_type parent
The index of the parent node.
Definition ukkonen.hpp:209
Node(index_type l=0, index_type r=0, node_index_type parent=UNDEFINED)
Construct a node from left most index, right most index, and parent.
Node & operator=(Node const &)=default
Default copy assignment.
bool is_leaf() const noexcept
Returns true if the node is a leaf and false if not.
Definition ukkonen.hpp:322
node_index_type child(letter_type c) const
The index of the child node corresponding to a letter (if any).
The return type of traverse.
Definition ukkonen.hpp:121
State(node_index_type vv, edge_index_type ppos)
Construct from index and position.
Definition ukkonen.hpp:168
State & operator=(State const &)=default
Default copy assignment.
node_index_type v
The index in Ukkonen::nodes of the node at the end of the position reached.
Definition ukkonen.hpp:127
bool operator==(State const &that) const noexcept
Compare states.
Definition ukkonen.hpp:184
edge_index_type pos
The position in the edge leading to the node v reached.
Definition ukkonen.hpp:132
State()=default
Default constructor.
State(State &&)=default
Default move constructor.
State & operator=(State &&)=default
Default move assignment.
State(State const &)=default
Default copy constructor.