libsemigroups  v3.0.0
C++ library for semigroups and monoids
Loading...
Searching...
No Matches
rewriters.hpp
1//
2// libsemigroups - C++ library for semigroups and monoids
3// Copyright (C) 2023-2025 Joseph Edwards + James D. Mitchell
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU General Public License for more details.
14//
15// You should have received a copy of the GNU General Public License
16// along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18// This file contains the implementation of a Rule object containers for Rule
19// objects. It also includes rewriter classes that can be used to rewrite
20// strings relative to a collection of rules.
21
22#ifndef LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_
23#define LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_
24
25#include <atomic> // for atomic
26#include <chrono> // for time_point
27#include <set> // for set
28#include <string> // for basic_string, operator==
29#include <unordered_map> // for unordered map
30#include <unordered_set> // for unordered set
31
32#include "../aho-corasick.hpp"
33#include "../debug.hpp" // for LIBSEMIGROUPS_ASSERT
34#include "../order.hpp" // for shortlex_compare
35
36#include "multi-string-view.hpp" // for MultiStringView
37
38// TODO(2) Add a KnuthBendixImpl pointer to the rewriter class so that overlap
39// detection can be handled by the rewriter (and therefore depend on the
40// implementation) rather than on the KB object.
41
46namespace libsemigroups {
47 namespace detail {
48 // TODO(2) remove from libsemigroups namespace and put into relevant class
49
54
59
63 using external_char_type = char;
64
68 using internal_char_type = char;
69
83 class Rule {
86 int64_t _id;
87
88 public:
97 explicit Rule(int64_t id);
98
99 Rule& operator=(Rule const& copy) = delete;
100 Rule(Rule const& copy) = delete;
101 Rule(Rule&& copy) = delete;
102 Rule& operator=(Rule&& copy) = delete;
103
108 ~Rule() {
109 delete _lhs;
110 delete _rhs;
111 }
112
130 [[nodiscard]] internal_string_type* lhs() const noexcept {
131 return _lhs;
132 }
133
150 [[nodiscard]] internal_string_type* rhs() const noexcept {
151 return _rhs;
152 }
153
166 [[nodiscard]] bool empty() const noexcept {
167 return _lhs->empty() && _rhs->empty();
168 }
169
181 [[nodiscard]] inline bool active() const noexcept {
182 LIBSEMIGROUPS_ASSERT(_id != 0);
183 return (_id > 0);
184 }
185
198 void deactivate() noexcept;
199
212 void activate() noexcept;
213
230 void set_id_no_checks(int64_t id) noexcept {
231 LIBSEMIGROUPS_ASSERT(id > 0);
232 LIBSEMIGROUPS_ASSERT(!active());
233 _id = -1 * id;
234 }
235
243 void set_id(int64_t id) {
244 if (id <= 0) {
246 "invalid id, expected a value greater than 0, found {}", id);
247 }
248 if (active()) {
249 LIBSEMIGROUPS_EXCEPTION("cannot set the id of an active rule");
250 }
252 }
253
265 [[nodiscard]] int64_t id() const noexcept {
266 LIBSEMIGROUPS_ASSERT(_id != 0);
267 return _id;
268 }
269
283 void reorder() {
284 if (shortlex_compare(_lhs, _rhs)) {
285 std::swap(_lhs, _rhs);
286 }
287 }
288 }; // class Rule
289
290 class RuleLookup {
291 public:
292 RuleLookup() : _rule(nullptr) {}
293
294 explicit RuleLookup(Rule* rule)
295 : _first(rule->lhs()->cbegin()),
296 _last(rule->lhs()->cend()),
297 _rule(rule) {}
298
299 RuleLookup& operator()(internal_string_type::iterator const& first,
300 internal_string_type::iterator const& last) {
301 _first = first;
302 _last = last;
303 return *this;
304 }
305
306 Rule const* rule() const {
307 return _rule;
308 }
309
310 // This implements reverse lex comparison of this and that, which
311 // satisfies the requirement of std::set that equivalent items be
312 // incomparable, so, for example bcbc and abcbc are considered
313 // equivalent, but abcba and bcbc are not.
314 bool operator<(RuleLookup const& that) const;
315
316 private:
317 internal_string_type::const_iterator _first;
318 internal_string_type::const_iterator _last;
319 Rule const* _rule;
320 }; // class RuleLookup
321
322 class Rules {
323 public:
324 using iterator = std::list<Rule const*>::iterator;
325 using const_iterator = std::list<Rule const*>::const_iterator;
326 using const_reverse_iterator
327 = std::list<Rule const*>::const_reverse_iterator;
328
329 private:
330 struct Stats {
331 Stats() noexcept;
332 Stats& init() noexcept;
333
334 Stats(Stats const&) noexcept = default;
335 Stats(Stats&&) noexcept = default;
336 Stats& operator=(Stats const&) noexcept = default;
337 Stats& operator=(Stats&&) noexcept = default;
338
339 size_t max_word_length;
340 size_t max_active_word_length;
341 size_t max_active_rules;
342 size_t min_length_lhs_rule;
343 uint64_t total_rules;
344 // std::unordered_set<internal_string_type> unique_lhs_rules;
345 };
346
347 // TODO(2) remove const?
348 std::list<Rule const*> _active_rules;
349 std::array<iterator, 2> _cursors;
350 std::list<Rule*> _inactive_rules;
351 mutable Stats _stats;
352
353 public:
354 Rules() = default;
355
356 // Rules(Rules const& that);
357 // Rules(Rules&& that);
358 Rules& operator=(Rules const&);
359
360 // TODO(1) the other constructors
361
362 ~Rules();
363
364 Rules& init();
365
366 const_iterator begin() const noexcept {
367 return _active_rules.cbegin();
368 }
369
370 const_iterator end() const noexcept {
371 return _active_rules.cend();
372 }
373
374 iterator begin() noexcept {
375 return _active_rules.begin();
376 }
377
378 iterator end() noexcept {
379 return _active_rules.end();
380 }
381
382 const_reverse_iterator rbegin() const noexcept {
383 return _active_rules.crbegin();
384 }
385
386 const_reverse_iterator rend() const noexcept {
387 return _active_rules.crend();
388 }
389
390 [[nodiscard]] size_t number_of_active_rules() const noexcept {
391 return _active_rules.size();
392 }
393
394 [[nodiscard]] size_t number_of_inactive_rules() const noexcept {
395 return _inactive_rules.size();
396 }
397
398 [[nodiscard]] size_t max_active_word_length() const;
399
400 iterator& cursor(size_t index) {
401 LIBSEMIGROUPS_ASSERT(index < _cursors.size());
402 return _cursors[index];
403 }
404
405 // TODO(1) is this ever called?
406 void add_active_rule(Rule* rule) {
407 _active_rules.push_back(rule);
408 }
409
410 void add_inactive_rule(Rule* rule) {
411 _inactive_rules.push_back(rule);
412 }
413
414 Stats const& stats() const {
415 return _stats;
416 }
417
418 [[nodiscard]] iterator erase_from_active_rules(iterator it);
419
420 // TODO(1) this feels like it should be add_active rule. The above
421 // add_active_rule seems a bit dangerous
422 void add_rule(Rule* rule);
423
424 [[nodiscard]] Rule* copy_rule(Rule const* rule);
425
426 // private:
427 [[nodiscard]] Rule* new_rule();
428
429 protected:
430 template <typename Iterator>
431 [[nodiscard]] Rule* new_rule(Iterator begin_lhs,
432 Iterator end_lhs,
433 Iterator begin_rhs,
434 Iterator end_rhs) {
435 Rule* rule = new_rule();
436 rule->lhs()->assign(begin_lhs, end_lhs);
437 rule->rhs()->assign(begin_rhs, end_rhs);
438 rule->reorder();
439 return rule;
440 }
441 };
442
443 class RewriterBase : public Rules {
444 std::unordered_set<internal_char_type> _alphabet;
445 mutable std::atomic<bool> _cached_confluent;
446 mutable std::atomic<bool> _confluence_known;
447 size_t _max_stack_depth;
448 std::stack<Rule*> _pending_rules;
449 std::atomic<bool> _requires_alphabet;
450
451 using alphabet_citerator
452 = std::unordered_set<internal_char_type>::const_iterator;
453
454 public:
455 // TODO(1) to cpp
456 RewriterBase()
457 : _alphabet(),
458 _cached_confluent(false),
459 _confluence_known(false),
460 _max_stack_depth(0),
461 _pending_rules(),
462 _requires_alphabet() {}
463
464 RewriterBase& init();
465
466 explicit RewriterBase(bool requires_alphabet) : RewriterBase() {
467 _requires_alphabet = requires_alphabet;
468 }
469
470 ~RewriterBase();
471
472 RewriterBase& operator=(RewriterBase const& that) {
473 Rules::operator=(that);
474 _cached_confluent = that._cached_confluent.load();
475 _confluence_known = that._confluence_known.load();
476 _requires_alphabet = that._requires_alphabet.load();
477 while (!_pending_rules.empty()) {
478 _pending_rules.pop();
479 }
480 decltype(_pending_rules) tmp = that._pending_rules;
481 while (!tmp.empty()) {
482 auto const* rule = tmp.top();
483 _pending_rules.push(copy_rule(rule));
484 tmp.pop();
485 }
486
487 if (_requires_alphabet) {
488 _alphabet = that._alphabet;
489 }
490 return *this;
491 }
492
493 bool requires_alphabet() const {
494 return _requires_alphabet;
495 }
496
497 decltype(_alphabet) alphabet() const {
498 return _alphabet;
499 }
500
501 alphabet_citerator alphabet_cbegin() const {
502 return _alphabet.cbegin();
503 }
504
505 alphabet_citerator alphabet_cend() const {
506 return _alphabet.cend();
507 }
508
509 void set_cached_confluent(tril val) const;
510
511 bool cached_confluent() const noexcept {
512 return _cached_confluent;
513 }
514
515 [[nodiscard]] bool consistent() const noexcept {
516 return _pending_rules.empty();
517 }
518
519 [[nodiscard]] bool confluence_known() const {
520 return _confluence_known;
521 }
522
523 [[nodiscard]] size_t max_stack_depth() const {
524 return _max_stack_depth;
525 }
526
527 bool add_pending_rule(Rule* rule);
528
529 bool process_pending_rules();
530
531 void reduce();
532
533 void reduce_rhs();
534
535 void rewrite(Rule* rule) const {
536 rewrite(*rule->lhs());
537 rewrite(*rule->rhs());
538 rule->reorder();
539 }
540
541 // TODO(2) remove virtual functions
542 virtual void rewrite(internal_string_type& u) const = 0;
543
544 virtual void add_rule(Rule* rule) = 0;
545
546 virtual Rules::iterator make_active_rule_pending(Rules::iterator it) = 0;
547
548 size_t number_of_pending_rules() const noexcept {
549 return _pending_rules.size();
550 }
551
552 Rule* next_pending_rule() {
553 LIBSEMIGROUPS_ASSERT(_pending_rules.size() != 0);
554 Rule* rule = _pending_rules.top();
555 _pending_rules.pop();
556 return rule;
557 }
558
559 template <typename StringLike>
560 void add_rule(StringLike const& lhs, StringLike const& rhs) {
561 if (lhs != rhs) {
562 if (add_pending_rule(new_rule(
563 lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()))) {
564 // TODO(1) only process_pending_rules when ready to run
565 process_pending_rules();
566 }
567 }
568 }
569
570 template <typename StringLike>
571 void add_pending_rule(StringLike const& lhs, StringLike const& rhs) {
572 if (lhs != rhs) {
573 add_pending_rule(
574 new_rule(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()));
575 }
576 }
577
578 void add_to_alphabet(internal_char_type letter) {
579 _alphabet.emplace(letter);
580 }
581 };
582
583 class RewriteFromLeft : public RewriterBase {
584 std::set<RuleLookup> _set_rules;
585
586 public:
587 using RewriterBase::cached_confluent;
588 using Rules::stats;
589
590 RewriteFromLeft() = default;
591
592 RewriteFromLeft& operator=(RewriteFromLeft const&);
593
594 // TODO(2) the other constructors
595
596 ~RewriteFromLeft();
597
598 RewriteFromLeft& init();
599
600 void rewrite(internal_string_type& u) const;
601
602 [[nodiscard]] bool confluent() const;
603
604 // TODO(1) private?
605 void add_rule(Rule* rule);
606
607 using RewriterBase::add_rule;
608
609 private:
610 void rewrite(Rule* rule) const;
611
612 iterator make_active_rule_pending(iterator);
613
614 void report_from_confluent(
615 std::atomic_uint64_t const&,
616 std::chrono::high_resolution_clock::time_point const&) const;
617
618 bool confluent_impl(std::atomic_uint64_t&) const;
619 };
620
621 class RewriteTrie : public RewriterBase {
622 using index_type = AhoCorasick::index_type;
623
624 std::map<index_type, Rule*> _rules;
625 AhoCorasick _trie;
626
627 public:
628 using RewriterBase::cached_confluent;
629 using Rules::stats;
630 using iterator = internal_string_type::iterator;
631 using rule_iterator = std::map<index_type, Rule*>::iterator;
632
633 RewriteTrie() : RewriterBase(true), _rules(), _trie() {}
634
635 RewriteTrie(RewriteTrie const& that);
636
637 RewriteTrie& operator=(RewriteTrie const& that);
638 // TODO(1) move constructor, and move assignment operator
639
640 ~RewriteTrie();
641
642 RewriteTrie& init();
643
644 rule_iterator rules_begin() {
645 return _rules.begin();
646 }
647
648 rule_iterator rules_end() {
649 return _rules.end();
650 }
651
652 void all_overlaps();
653
654 void rule_overlaps(index_type node);
655
656 void add_overlaps(Rule* rule, index_type node, size_t overlap_length);
657
658 void rewrite(internal_string_type& u) const;
659
660 [[nodiscard]] bool confluent() const;
661
662 void add_rule(Rule* rule) {
663 Rules::add_rule(rule);
664 add_rule_to_trie(rule);
665 set_cached_confluent(tril::unknown);
666 }
667
668 using RewriterBase::add_rule;
669
670 private:
671 [[nodiscard]] bool descendants_confluent(Rule const* rule1,
672 index_type current_node,
673 size_t backtrack_depth) const;
674
675 // TODO(2) (After removing virtual functions) Put in base
676 void rewrite(Rule* rule) const {
677 rewrite(*rule->lhs());
678 rewrite(*rule->rhs());
679 rule->reorder();
680 }
681
682 void add_rule_to_trie(Rule* rule) {
683 index_type node = _trie.add_word_no_checks(rule->lhs()->cbegin(),
684 rule->lhs()->cend());
685 _rules.emplace(node, rule);
686 }
687
688 Rules::iterator make_active_rule_pending(Rules::iterator it);
689
690 void report_from_confluent(
691 std::atomic_uint64_t const&,
692 std::chrono::high_resolution_clock::time_point const&) const;
693
694 bool confluent_impl(std::atomic_uint64_t&) const;
695 };
696 } // namespace detail
697} // namespace libsemigroups
698#endif // LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_
T begin(T... args)
internal_string_type * lhs() const noexcept
Return the left-hand side of the rule.
Definition rewriters.hpp:130
void activate() noexcept
Activate a rule.
bool empty() const noexcept
Check if the left-hand and right-hand sides are empty.
Definition rewriters.hpp:166
void reorder()
Reorder the left-hand and right-hand sides.
Definition rewriters.hpp:283
bool active() const noexcept
Check if the Rule is active.
Definition rewriters.hpp:181
void deactivate() noexcept
Deactivate a rule.
void set_id(int64_t id)
Set the id of a rule.
Definition rewriters.hpp:243
int64_t id() const noexcept
Return the id of a rule.
Definition rewriters.hpp:265
void set_id_no_checks(int64_t id) noexcept
Set the id of a rule.
Definition rewriters.hpp:230
Rule(int64_t id)
Construct with new empty left-hand and right-hand sides.
internal_string_type * rhs() const noexcept
Return the right-hand side of the rule.
Definition rewriters.hpp:150
T emplace(T... args)
T empty(T... args)
T end(T... args)
Thing::native_word_type reduce(Thing const &thing, typename Thing::native_word_type const &w)
Reduce a word.
#define LIBSEMIGROUPS_EXCEPTION(...)
Throw a LibsemigroupsException.
Definition exception.hpp:95
bool shortlex_compare(T const &first1, T const &last1, T const &first2, T const &last2)
Compare two objects of the same type using the short-lex reduction ordering.
Definition order.hpp:267
std::string internal_string_type
Definition rewriters.hpp:58
char external_char_type
Definition rewriters.hpp:63
std::string external_string_type
Definition rewriters.hpp:53
char internal_char_type
Definition rewriters.hpp:68
AhoCorasick::index_type index_type
Alias for the index of a node in the trie.
Definition aho-corasick.hpp:748
FroidurePin< typename Container::value_type > & init(FroidurePin< typename Container::value_type > &fp, Container const &gens)
Re-initialize a FroidurePin object from a container of generators.
Definition froidure-pin.hpp:1634
void add_rule(Presentation< Word > &p, Word const &lhop, Word const &rhop)
Add a rule to the presentation by reference and check.
Definition presentation.hpp:806
Namespace for everything in the libsemigroups library.
Definition action.hpp:44
T pop(T... args)
T push(T... args)
T size(T... args)
T swap(T... args)
T top(T... args)