libsemigroups  v3.1.2
C++ library for semigroups and monoids
Loading...
Searching...
No Matches
rewriters.hpp
1//
2// libsemigroups - C++ library for semigroups and monoids
3// Copyright (C) 2023-2025 Joseph Edwards + James D. Mitchell
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU General Public License for more details.
14//
15// You should have received a copy of the GNU General Public License
16// along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18// This file contains the implementation of a Rule object containers for Rule
19// objects. It also includes rewriter classes that can be used to rewrite
20// strings relative to a collection of rules.
21
22#ifndef LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_
23#define LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_
24
25#include <atomic> // for atomic
26#include <chrono> // for time_point
27#include <list> // for list
28#include <set> // for set
29#include <string> // for basic_string, operator==
30#include <unordered_map> // for unordered_map
31
32#include "libsemigroups/debug.hpp" // for LIBSEMIGROUPS_ASSERT
33#include "libsemigroups/order.hpp" // for shortlex_compare
34#include "libsemigroups/types.hpp" // for u8string
35
36#include "aho-corasick-impl.hpp" // for AhoCorasickImpl
37#include "multi-view.hpp" // for MultiView
38
39// TODO(2) Add a KnuthBendixImpl pointer to the rewriter class so that overlap
40// detection can be handled by the rewriter (and therefore depend on the
41// implementation) rather than on the KB object.
42
43namespace libsemigroups {
44 namespace detail {
45
47 // Rule
49
50 class Rule {
51 public:
52 using native_word_type = std::string;
53
54 private:
55 native_word_type _lhs;
56 native_word_type _rhs;
57 int64_t _id;
58
59 public:
60 explicit Rule(int64_t id);
61
62 Rule() = delete;
63 Rule& operator=(Rule const& copy) = delete;
64 Rule(Rule const& copy) = delete;
65 Rule(Rule&& copy) = delete;
66 Rule& operator=(Rule&& copy) = delete;
67
68 ~Rule() = default;
69
70 [[nodiscard]] native_word_type const& lhs() const noexcept {
71 return _lhs;
72 }
73
74 [[nodiscard]] native_word_type const& rhs() const noexcept {
75 return _rhs;
76 }
77
78 [[nodiscard]] native_word_type& lhs() noexcept {
79 return _lhs;
80 }
81
82 [[nodiscard]] native_word_type& rhs() noexcept {
83 return _rhs;
84 }
85
86 [[nodiscard]] bool empty() const noexcept {
87 return _lhs.empty() && _rhs.empty();
88 }
89
90 [[nodiscard]] inline bool active() const noexcept {
91 LIBSEMIGROUPS_ASSERT(_id != 0);
92 return _id > 0;
93 }
94
95 void activate_no_checks() noexcept;
96 void deactivate_no_checks() noexcept;
97
98 void set_id_no_checks(int64_t id) noexcept {
99 LIBSEMIGROUPS_ASSERT(id > 0);
100 LIBSEMIGROUPS_ASSERT(!active());
101 _id = -1 * id;
102 }
103
104 [[nodiscard]] int64_t id() const noexcept {
105 LIBSEMIGROUPS_ASSERT(_id != 0);
106 return _id;
107 }
108
109 void reorder() {
110 if (shortlex_compare(_lhs, _rhs)) {
111 std::swap(_lhs, _rhs);
112 }
113 }
114 }; // class Rule
115
117 // RuleLookup
119
120 class RuleLookup {
121 public:
122 using native_word_type = Rule::native_word_type;
123
124 RuleLookup() : _rule(nullptr) {}
125
126 explicit RuleLookup(Rule* rule)
127 : _first(rule->lhs().cbegin()),
128 _last(rule->lhs().cend()),
129 _rule(rule) {}
130
131 RuleLookup& operator()(native_word_type::iterator first,
132 native_word_type::iterator last) {
133 _first = first;
134 _last = last;
135 return *this;
136 }
137
138 Rule const* rule() const {
139 return _rule;
140 }
141
142 // This implements reverse lex comparison of this and that, which
143 // satisfies the requirement of std::set that equivalent items be
144 // incomparable, so, for example bcbc and abcbc are considered
145 // equivalent, but abcba and bcbc are not.
146 bool operator<(RuleLookup const& that) const;
147
148 private:
149 native_word_type::const_iterator _first;
150 native_word_type::const_iterator _last;
151 Rule const* _rule;
152 }; // class RuleLookup
153
155 // Rules
157
158 class Rules {
159 public:
160 using iterator = std::list<Rule*>::iterator;
161 using const_iterator = std::list<Rule*>::const_iterator;
162 using const_reverse_iterator = std::list<Rule*>::const_reverse_iterator;
163
164 private:
165 struct Stats {
166 Stats() noexcept;
167 Stats& init() noexcept;
168
169 Stats(Stats const&) noexcept = default;
170 Stats(Stats&&) noexcept = default;
171 Stats& operator=(Stats const&) noexcept = default;
172 Stats& operator=(Stats&&) noexcept = default;
173
174 size_t max_word_length;
175 size_t max_active_word_length;
176 size_t max_active_rules;
177 size_t min_length_lhs_rule;
178 uint64_t total_rules;
179 };
180
181 std::list<Rule*> _active_rules;
182 std::array<iterator, 2> _cursors;
183 std::list<Rule*> _inactive_rules;
184 mutable Stats _stats;
185
186 public:
187 Rules() = default;
188
189 Rules(Rules const& that) : Rules() {
190 *this = that;
191 }
192 Rules(Rules&& that) = default;
193
194 Rules& operator=(Rules const&);
195 Rules& operator=(Rules&& that);
196
197 ~Rules();
198
199 Rules& init();
200
201 const_iterator begin() const noexcept {
202 return _active_rules.cbegin();
203 }
204
205 const_iterator end() const noexcept {
206 return _active_rules.cend();
207 }
208
209 iterator begin() noexcept {
210 return _active_rules.begin();
211 }
212
213 iterator end() noexcept {
214 return _active_rules.end();
215 }
216
217 const_reverse_iterator rbegin() const noexcept {
218 return _active_rules.crbegin();
219 }
220
221 const_reverse_iterator rend() const noexcept {
222 return _active_rules.crend();
223 }
224
225 [[nodiscard]] size_t number_of_active_rules() const noexcept {
226 return _active_rules.size();
227 }
228
229 [[nodiscard]] size_t number_of_inactive_rules() const noexcept {
230 return _inactive_rules.size();
231 }
232
233 [[nodiscard]] size_t max_active_word_length() const;
234
235 iterator& cursor(size_t index) {
236 LIBSEMIGROUPS_ASSERT(index < _cursors.size());
237 return _cursors[index];
238 }
239
240 Stats const& stats() const {
241 return _stats;
242 }
243
244 void add_rule(Rule* rule);
245
246 protected:
247 template <typename Iterator>
248 [[nodiscard]] Rule* new_rule(Iterator begin_lhs,
249 Iterator end_lhs,
250 Iterator begin_rhs,
251 Iterator end_rhs) {
252 Rule* rule = new_rule();
253 rule->lhs().assign(begin_lhs, end_lhs);
254 rule->rhs().assign(begin_rhs, end_rhs);
255 rule->reorder();
256 return rule;
257 }
258
259 [[nodiscard]] Rule* copy_rule(Rule const* rule);
260 [[nodiscard]] iterator erase_from_active_rules(iterator it);
261 void add_inactive_rule(Rule* rule) {
262 _inactive_rules.push_back(rule);
263 }
264
265 private:
266 [[nodiscard]] Rule* new_rule();
267 }; // class Rules
268
270 // RewriteBase
272
273 class RewriteBase : public Rules {
274 mutable std::atomic<bool> _cached_confluent;
275 mutable std::atomic<bool> _confluence_known;
276 size_t _max_pending_rules;
277
278 protected:
279 enum class State : uint8_t {
280 none,
281 adding_pending_rules,
282 reducing_pending_rules,
283 checking_confluence
284 };
285
286 std::vector<Rule*> _pending_rules;
287 State _state;
288 bool _ticker_running;
289
290 public:
291 using native_word_type = Rule::native_word_type;
292
294 // Constructors + inits
296
297 RewriteBase();
298 RewriteBase& init();
299 RewriteBase(RewriteBase const& that) : RewriteBase() {
300 *this = that;
301 }
302 RewriteBase(RewriteBase&& that);
303 RewriteBase& operator=(RewriteBase const& that);
304 RewriteBase& operator=(RewriteBase&& that);
305
306 virtual ~RewriteBase();
307
309 // Public mem fns
311
312 // Some rewriters require knowledge of the alphabet size, and some do not.
313 // For those that do not we provide a default implementation that does
314 // nothing.
315 RewriteBase& increase_alphabet_size_by(size_t) {
316 return *this;
317 }
318
319 [[nodiscard]] bool confluent();
320
321 bool cached_confluent() const noexcept {
322 return _cached_confluent;
323 }
324
325 void set_cached_confluent(tril val) const;
326
327 [[nodiscard]] bool confluence_known() const {
328 return _confluence_known;
329 }
330
331 [[nodiscard]] size_t max_pending_rules() const {
332 return _max_pending_rules;
333 }
334
335 size_t number_of_pending_rules() const noexcept {
336 return _pending_rules.size();
337 }
338
339 Rule* next_pending_rule();
340
341 template <typename StringLike>
342 void add_rule(StringLike const& lhs, StringLike const& rhs);
343
344 protected:
346 // Member functions - protected
348
349 void report_progress_from_thread(
350 std::atomic_uint64_t const& seen,
351 std::chrono::high_resolution_clock::time_point const& start_time);
352
353 void report_progress_from_thread(
354 std::chrono::high_resolution_clock::time_point const& start_time) {
355 report_progress_from_thread(0, start_time);
356 }
357
358 bool add_pending_rule(Rule* rule);
359
360 private:
361 virtual bool confluent_impl(std::atomic_uint64_t& seen) = 0;
362
363 virtual void report_checking_confluence(
364 std::atomic_uint64_t const& seen,
365 std::chrono::high_resolution_clock::time_point const& start_time)
366 const
367 = 0;
368
369 virtual void report_reducing_rules(
370 std::atomic_uint64_t const&,
371 std::chrono::high_resolution_clock::time_point const&) const {}
372 }; // class RewriteBase
373
374 // RewriteBase out-of-lined mem fn template
375 template <typename StringLike>
376 void RewriteBase::add_rule(StringLike const& lhs, StringLike const& rhs) {
377 if (lhs != rhs) {
378 add_pending_rule(
379 new_rule(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()));
380 }
381 }
382
384 // RewriteFromLeft
386
387 class RewriteFromLeft : public RewriteBase {
388 std::set<RuleLookup> _set_rules;
389
390 public:
391 using native_word_type = Rule::native_word_type;
392
393 using RewriteBase::add_rule;
394
395 RewriteFromLeft() = default;
396
397 RewriteFromLeft(RewriteFromLeft const& that) : RewriteFromLeft() {
398 *this = that;
399 }
400 RewriteFromLeft(RewriteFromLeft&&) = default;
401
402 RewriteFromLeft& operator=(RewriteFromLeft const&);
403 RewriteFromLeft& operator=(RewriteFromLeft&&) = default;
404
405 ~RewriteFromLeft();
406
407 RewriteFromLeft& init();
408
409 bool process_pending_rules();
410
411 void rewrite(native_word_type& u);
412
413 void rewrite(native_word_type& u) const {
414 const_cast<RewriteFromLeft*>(this)->rewrite(u);
415 }
416
417 private:
418 void rewrite(Rule* rule) const {
419 rewrite(rule->lhs());
420 rewrite(rule->rhs());
421 rule->reorder();
422 }
423
424 void add_rule(Rule* rule);
425
426 iterator make_active_rule_pending(iterator);
427
428 void report_checking_confluence(
429 std::atomic_uint64_t const&,
430 std::chrono::high_resolution_clock::time_point const&) const override;
431
432 bool confluent_impl(std::atomic_uint64_t&) override;
433 };
434
436 // RewriteTrie
438
439 class RewriteTrie : public RewriteBase {
440 public:
441 using index_type = AhoCorasickImpl::index_type;
442 using iterator = native_word_type::iterator;
443 using rule_iterator = std::unordered_map<index_type, Rule*>::iterator;
444 using native_word_type = Rule::native_word_type;
445
446 private:
447 std::unordered_map<index_type, Rule*> _new_rule_map;
448 AhoCorasickImpl _new_rule_trie;
449 std::vector<index_type> _rewrite_tmp_buf;
450 std::unordered_map<index_type, Rule*> _rule_map;
451 AhoCorasickImpl _rule_trie;
452 bool _ticker_running;
453
454 public:
455 using Rules::stats;
456
457 using RewriteBase::add_rule;
458 using RewriteBase::cached_confluent;
459
460 RewriteTrie();
461
462 RewriteTrie& init();
463
464 RewriteTrie(RewriteTrie const& that) : RewriteTrie() {
465 *this = that;
466 }
467 RewriteTrie(RewriteTrie&& that) = default;
468 RewriteTrie& operator=(RewriteTrie const& that);
469 RewriteTrie& operator=(RewriteTrie&& that) = default;
470
471 ~RewriteTrie();
472
473 RewriteTrie& increase_alphabet_size_by(size_t val) {
474 _rule_trie.increase_alphabet_size_by(val);
475 return *this;
476 }
477
478 bool process_pending_rules();
479
480 // TODO(1) iterators
481 void rewrite(native_word_type& u);
482
483 void rewrite(Rule* rule) const {
484 rewrite(rule->lhs());
485 rewrite(rule->rhs());
486 rule->reorder();
487 }
488
489 void rewrite(native_word_type& u) const {
490 const_cast<RewriteTrie*>(this)->rewrite(u);
491 }
492
493 private:
494 void add_rule(Rule* rule) {
495 Rules::add_rule(rule);
496 index_type node = _rule_trie.add_word_no_checks(rule->lhs().cbegin(),
497 rule->lhs().cend());
498 _rule_map.emplace(node, rule);
499 set_cached_confluent(tril::unknown);
500 }
501
502 [[nodiscard]] bool descendants_confluent(Rule const* rule1,
503 index_type current_node,
504 size_t backtrack_depth) const;
505
506 Rules::iterator make_active_rule_pending(Rules::iterator it);
507
508 bool confluent_impl(std::atomic_uint64_t&) override;
509
510 void report_checking_confluence(
511 std::atomic_uint64_t const&,
512 std::chrono::high_resolution_clock::time_point const&) const override;
513
514 void report_reducing_rules(
515 std::atomic_uint64_t const&,
516 std::chrono::high_resolution_clock::time_point const&) const override;
517 };
518 } // namespace detail
519} // namespace libsemigroups
520#endif // LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_
T begin(T... args)
T emplace(T... args)
T end(T... args)
bool shortlex_compare(T const &first1, T const &last1, T const &first2, T const &last2)
Compare two objects of the same type using the short-lex reduction ordering.
Definition order.hpp:273
@ none
No ordering.
Definition order.hpp:47
AhoCorasick::index_type index_type
Alias for the index of a node in the trie.
Definition aho-corasick.hpp:750
FroidurePin< typename Container::value_type > & init(FroidurePin< typename Container::value_type > &fp, Container const &gens)
Re-initialize a FroidurePin object from a container of generators.
Definition froidure-pin.hpp:1636
void add_rule(Presentation< Word > &p, Word const &lhop, Word const &rhop)
Add a rule to the presentation by reference and check.
Definition presentation.hpp:841
Namespace for everything in the libsemigroups library.
Definition action.hpp:44
T size(T... args)
T swap(T... args)