HPCombi
High Performance Combinatorics in C++ using vector instructions v1.0.0
Loading...
Searching...
No Matches
epu8.hpp
Go to the documentation of this file.
1//****************************************************************************//
2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
3// //
4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
5// //
6// HP-Combi is free software: you can redistribute it and/or modify it //
7// under the terms of the GNU General Public License as published by the //
8// Free Software Foundation, either version 3 of the License, or //
9// (at your option) any later version. //
10// //
11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
14// for more details. //
15// //
16// You should have received a copy of the GNU General Public License along //
17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
18//****************************************************************************//
19
20#ifndef HPCOMBI_EPU8_HPP_
21#define HPCOMBI_EPU8_HPP_
22
23#include <array> // for array
24#include <cstddef> // for size_t
25#include <cstdint> // for uint8_t, uint64_t, int8_t
26#include <ostream> // for ostream
27#include <string> // for string
28
29#include "builder.hpp" // for TPUBuild
30#include "debug.hpp" // for HPCOMBI_ASSERT
31#include "vect_generic.hpp" // for VectGeneric
32
33#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde...
34#include "simde/x86/sse4.2.h" // for ???
35
36namespace HPCombi {
37
39inline constexpr uint8_t
40operator"" _u8(unsigned long long arg) noexcept { // NOLINT
41 return static_cast<uint8_t>(arg);
42}
43
45using epu8 = uint8_t __attribute__((vector_size(16)));
46
47static_assert(alignof(epu8) == 16,
48 "epu8 type is not properly aligned by the compiler !");
49
53constexpr TPUBuild<epu8> Epu8{};
54
56inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); }
58inline bool is_all_one(epu8 a) noexcept {
59 return simde_mm_testc_si128(a, Epu8(0xFF));
60}
61
63inline bool equal(epu8 a, epu8 b) noexcept {
64 return is_all_zero(simde_mm_xor_si128(a, b));
65}
67inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); }
68
70inline epu8 permuted_ref(epu8 a, epu8 b) noexcept;
72inline epu8 permuted(epu8 a, epu8 b) noexcept {
73 return simde_mm_shuffle_epi8(a, b);
74}
78inline epu8 shifted_right(epu8 a) noexcept {
79 return simde_mm_bslli_si128(a, 1);
80}
84inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); }
86inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); }
87
89inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); }
91inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); }
92
94inline bool is_sorted(epu8 a) noexcept;
100inline epu8 sorted(epu8 a) noexcept;
105inline epu8 sorted8(epu8 a) noexcept;
111inline epu8 revsorted(epu8 a) noexcept;
116inline epu8 revsorted8(epu8 a) noexcept;
117
122inline epu8 sort_perm(epu8 &a) noexcept;
127inline epu8 sort8_perm(epu8 &a) noexcept;
128
138inline void merge(epu8 &a, epu8 &b) noexcept;
139
148#ifdef SIMDE_X86_SSE4_2_NATIVE
152inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept;
153#endif
157inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept;
161inline epu8 permutation_of(epu8 a, epu8 b) noexcept;
162
164constexpr uint64_t prime = 0x9e3779b97f4a7bb9;
165
173inline epu8 random_epu8(uint16_t bnd);
174
182inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept;
183
199inline uint8_t horiz_sum_ref(epu8) noexcept;
205inline uint8_t horiz_sum_gen(epu8) noexcept;
210inline uint8_t horiz_sum4(epu8) noexcept;
215inline uint8_t horiz_sum3(epu8) noexcept;
217inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); }
218
233inline epu8 partial_sums_ref(epu8) noexcept;
239inline epu8 partial_sums_gen(epu8) noexcept;
244inline epu8 partial_sums_round(epu8) noexcept;
246inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); }
247
262inline uint8_t horiz_max_ref(epu8) noexcept;
268inline uint8_t horiz_max_gen(epu8) noexcept;
273inline uint8_t horiz_max4(epu8) noexcept;
278inline uint8_t horiz_max3(epu8) noexcept;
280inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); }
281
296inline epu8 partial_max_ref(epu8) noexcept;
302inline epu8 partial_max_gen(epu8) noexcept;
307inline epu8 partial_max_round(epu8) noexcept;
309inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); }
310
325inline uint8_t horiz_min_ref(epu8) noexcept;
331inline uint8_t horiz_min_gen(epu8) noexcept;
336inline uint8_t horiz_min4(epu8) noexcept;
341inline uint8_t horiz_min3(epu8) noexcept;
343inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); }
344
359inline epu8 partial_min_ref(epu8) noexcept;
365inline epu8 partial_min_gen(epu8) noexcept;
370inline epu8 partial_min_round(epu8) noexcept;
372inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); }
373
391inline epu8 eval16_ref(epu8 v) noexcept;
396inline epu8 eval16_arr(epu8 v) noexcept;
401inline epu8 eval16_cycle(epu8 v) noexcept;
406inline epu8 eval16_popcount(epu8 v) noexcept;
408inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); }
409
432inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
433#ifdef SIMDE_X86_SSE4_2_NATIVE
438inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
439#endif
444inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
446inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
447 return first_diff_mask(a, b, bound);
448}
449
472inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
473#ifdef SIMDE_X86_SSE4_2_NATIVE
478inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
479#endif
484inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
486inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
487 return last_diff_mask(a, b, bound);
488}
489
491inline bool less(epu8 a, epu8 b) noexcept;
497inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept;
498
502inline uint64_t first_zero(epu8 v, int bnd) noexcept;
506inline uint64_t last_zero(epu8 v, int bnd) noexcept;
510inline uint64_t first_non_zero(epu8 v, int bnd) noexcept;
514inline uint64_t last_non_zero(epu8 v, int bnd) noexcept;
515
518inline epu8 popcount16(epu8 v) noexcept;
519
535inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept;
536
552inline bool is_transformation(epu8 v, const size_t k = 16) noexcept;
553
570inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept;
571
587#ifdef SIMDE_X86_SSE4_2_NATIVE
591inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept;
592#endif
596inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept;
600inline bool is_permutation_eval(epu8 v, const size_t k = 16) noexcept;
604inline bool is_permutation(epu8 v, const size_t k = 16) noexcept;
605
606} // namespace HPCombi
607
608namespace std {
609
610inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a);
611
612inline std::string to_string(HPCombi::epu8 const &a);
613
620} // namespace std
621
622#include "epu8_impl.hpp"
623
624#endif // HPCOMBI_EPU8_HPP_
std::ostream & operator<<(std::ostream &out, const std::vector< T > &v)
Definition image.cpp:35
Definition bmat8.hpp:41
uint8_t horiz_min4(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:418
epu8 max(epu8 a, epu8 b) noexcept
Vector max between two HPCombi::epu8 0.
Definition epu8.hpp:91
uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
The last difference between two HPCombi::epu8.
Definition epu8_impl.hpp:92
uint64_t first_non_zero(epu8 v, int bnd) noexcept
return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
Definition epu8_impl.hpp:126
uint8_t horiz_min_ref(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:409
epu8 eval16_arr(epu8 v) noexcept
Evaluation of a HPCombi::epu8.
Definition epu8_impl.hpp:452
epu8 permuted(epu8 a, epu8 b) noexcept
Permuting a HPCombi::epu8.
Definition epu8.hpp:72
epu8 sort8_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:219
epu8 shifted_right(epu8 a) noexcept
Left shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:78
uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
The first difference between two HPCombi::epu8.
Definition epu8_impl.hpp:77
uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
The first difference between two HPCombi::epu8.
Definition epu8_impl.hpp:88
epu8 partial_sums_ref(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:357
epu8 partial_sums(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:246
uint8_t horiz_min(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:343
epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
Remove duplicates in a sorted HPCombi::epu8.
Definition epu8_impl.hpp:260
epu8 revsorted8(epu8 a) noexcept
Return a HPCombi::epu8 with the two half reverse sorted.
Definition epu8_impl.hpp:212
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of one other.
Definition epu8_impl.hpp:303
bool is_permutation(epu8 v, const size_t k=16) noexcept
Definition epu8_impl.hpp:530
int8_t less_partial(epu8 a, epu8 b, int k) noexcept
Partial lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:113
uint8_t horiz_max4(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:383
uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
The last difference between two HPCombi::epu8.
Definition epu8_impl.hpp:105
uint8_t horiz_max3(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:384
bool is_all_one(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are one.
Definition epu8.hpp:58
constexpr uint64_t prime
A prime number good for hashing.
Definition epu8.hpp:164
uint8_t horiz_min_gen(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:415
bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
Test for partial permutations.
Definition epu8_impl.hpp:499
bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
Definition epu8_impl.hpp:521
uint64_t last_zero(epu8 v, int bnd) noexcept
return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
Definition epu8_impl.hpp:123
void merge(epu8 &a, epu8 &b) noexcept
Merge two sorted epu8.
Definition epu8_impl.hpp:240
uint8_t horiz_sum4(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:348
epu8 popcount16(epu8 v) noexcept
a vector popcount function
Definition epu8_impl.hpp:480
epu8 partial_sums_round(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:368
uint8_t horiz_sum3(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:349
uint8_t horiz_sum_ref(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:339
epu8 partial_sums_gen(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:364
bool equal(epu8 a, epu8 b) noexcept
Equality of HPCombi::epu8.
Definition epu8.hpp:63
epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of one other.
Definition epu8_impl.hpp:294
epu8 min(epu8 a, epu8 b) noexcept
Vector min between two HPCombi::epu8 0.
Definition epu8.hpp:89
epu8 partial_max_ref(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:392
epu8 sorted8(epu8 a) noexcept
Return a HPCombi::epu8 with the two half sorted.
Definition epu8_impl.hpp:206
epu8 partial_max_round(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:403
epu8 eval16(epu8 v) noexcept
Evaluation of a HPCombi::epu8.
Definition epu8.hpp:408
uint8_t horiz_sum(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:217
uint8_t horiz_max(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:280
epu8 reverted(epu8 a) noexcept
Reverting a HPCombi::epu8.
Definition epu8.hpp:86
epu8 eval16_cycle(epu8 v) noexcept
Evaluation of a HPCombi::epu8.
Definition epu8_impl.hpp:463
epu8 eval16_ref(epu8 v) noexcept
Evaluation of a HPCombi::epu8.
Definition epu8_impl.hpp:444
epu8 partial_max_gen(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:399
bool less(epu8 a, epu8 b) noexcept
Lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:109
uint8_t horiz_max_ref(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:374
epu8 sorted(epu8 a) noexcept
Return a sorted HPCombi::epu8.
Definition epu8_impl.hpp:203
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:53
bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
Definition epu8_impl.hpp:525
uint64_t first_zero(epu8 v, int bnd) noexcept
return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
Definition epu8_impl.hpp:120
bool is_all_zero(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are zero.
Definition epu8.hpp:56
epu8 random_epu8(uint16_t bnd)
A random HPCombi::epu8.
Definition epu8_impl.hpp:248
epu8 partial_min(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:372
epu8 revsorted(epu8 a) noexcept
Return a reverse sorted HPCombi::epu8.
Definition epu8_impl.hpp:209
epu8 partial_min_round(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:438
bool is_sorted(epu8 a) noexcept
Testing if a HPCombi::epu8 is sorted.
Definition epu8_impl.hpp:200
uint8_t __attribute__((vector_size(16))) epu8
SIMD vector of 16 unsigned bytes.
Definition epu8.hpp:45
uint64_t last_non_zero(epu8 v, int bnd) noexcept
return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
Definition epu8_impl.hpp:129
uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The last difference between two HPCombi::epu8.
Definition epu8.hpp:486
epu8 eval16_popcount(epu8 v) noexcept
Evaluation of a HPCombi::epu8.
Definition epu8_impl.hpp:471
epu8 sort_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:216
epu8 partial_min_gen(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:434
epu8 partial_min_ref(epu8) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8_impl.hpp:427
uint8_t horiz_max_gen(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:380
epu8 shifted_left(epu8 a) noexcept
Right shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:84
bool is_transformation(epu8 v, const size_t k=16) noexcept
Test for transformation.
Definition epu8_impl.hpp:493
uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The first difference between two HPCombi::epu8.
Definition epu8.hpp:446
bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
Test for partial transformation.
Definition epu8_impl.hpp:485
epu8 permuted_ref(epu8 a, epu8 b) noexcept
Permuting a HPCombi::epu8.
Definition epu8_impl.hpp:59
epu8 partial_max(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:309
uint8_t horiz_sum_gen(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:345
uint8_t horiz_min3(epu8) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8_impl.hpp:419
bool not_equal(epu8 a, epu8 b) noexcept
Non equality of HPCombi::epu8.
Definition epu8.hpp:67
Definition bmat8.hpp:364
Class for factory object associated to a SIMD packed unsigned integers.
Definition builder.hpp:43