HPCombi
High Performance Combinatorics in C++ using vector instructions v1.0.3
Loading...
Searching...
No Matches
epu8.hpp
Go to the documentation of this file.
1//****************************************************************************//
2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
3// //
4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
5// //
6// HP-Combi is free software: you can redistribute it and/or modify it //
7// under the terms of the GNU General Public License as published by the //
8// Free Software Foundation, either version 3 of the License, or //
9// (at your option) any later version. //
10// //
11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
14// for more details. //
15// //
16// You should have received a copy of the GNU General Public License along //
17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
18//****************************************************************************//
19
25
26#ifndef HPCOMBI_EPU8_HPP_
27#define HPCOMBI_EPU8_HPP_
28
29#include <array> // for array
30#include <cstddef> // for size_t
31#include <cstdint> // for uint8_t, uint64_t, int8_t
32#include <ostream> // for ostream
33#include <string> // for string
34
35#include "builder.hpp" // for TPUBuild
36#include "debug.hpp" // for HPCOMBI_ASSERT
37#include "vect_generic.hpp" // for VectGeneric
38
39#if defined(__GNUC__) && !defined(__clang__)
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wswitch-default"
42#pragma GCC diagnostic ignored "-Wpacked"
43#endif
44#if defined(__clang__)
45#pragma clang diagnostic push
46#pragma clang diagnostic ignored "-Wbitwise-instead-of-logical"
47#endif
48#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde...
49#include "simde/x86/sse4.2.h" // for ???
50#if defined(__clang__)
51#pragma clang diagnostic pop
52#endif
53#if defined(__GNUC__) && !defined(__clang__)
54#pragma GCC diagnostic pop
55#endif
56
57namespace HPCombi {
58
60inline constexpr uint8_t
61operator"" _u8(unsigned long long arg) noexcept { // NOLINT
62 return static_cast<uint8_t>(arg);
63}
64
73using epu8 = uint8_t __attribute__((vector_size(16)));
74
75static_assert(alignof(epu8) == 16,
76 "epu8 type is not properly aligned by the compiler !");
77
81constexpr TPUBuild<epu8> Epu8{};
82
84inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); }
86inline bool is_all_one(epu8 a) noexcept {
87 return simde_mm_testc_si128(a, Epu8(0xFF));
88}
89
91inline bool equal(epu8 a, epu8 b) noexcept {
92 return is_all_zero(simde_mm_xor_si128(a, b));
93}
94
95inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); }
96
99inline epu8 permuted_ref(epu8 a, epu8 b) noexcept;
100
103inline epu8 permuted(epu8 a, epu8 b) noexcept {
104 return simde_mm_shuffle_epi8(a, b);
105}
106
110inline epu8 shifted_right(epu8 a) noexcept {
111 return simde_mm_bslli_si128(a, 1);
112}
113
117inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); }
118
120inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); }
121
123inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); }
125inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); }
126
128inline bool is_sorted(epu8 a) noexcept;
134inline epu8 sorted(epu8 a) noexcept;
139inline epu8 sorted8(epu8 a) noexcept;
145inline epu8 revsorted(epu8 a) noexcept;
150inline epu8 revsorted8(epu8 a) noexcept;
151
156inline epu8 sort_perm(epu8 &a) noexcept;
161inline epu8 sort8_perm(epu8 &a) noexcept;
162
170inline void merge(epu8 &a, epu8 &b) noexcept;
171
172#ifdef SIMDE_X86_SSE4_2_NATIVE
177inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept;
178#endif
179
184inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept;
185
195inline epu8 permutation_of(epu8 a, epu8 b) noexcept;
196
198constexpr uint64_t prime = 0x9e3779b97f4a7bb9;
199
207inline epu8 random_epu8(uint16_t bnd);
208
216inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept;
217
223
224inline uint8_t horiz_sum_ref(epu8) noexcept;
225
232
233inline uint8_t horiz_sum_gen(epu8) noexcept;
234
240inline uint8_t horiz_sum4(epu8) noexcept;
241
247inline uint8_t horiz_sum3(epu8) noexcept;
248
260inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); }
261
267inline epu8 partial_sums_ref(epu8) noexcept;
268
275inline epu8 partial_sums_gen(epu8) noexcept;
276
282inline epu8 partial_sums_round(epu8) noexcept;
283
294inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); }
295
301inline uint8_t horiz_max_ref(epu8) noexcept;
302
309inline uint8_t horiz_max_gen(epu8) noexcept;
310
316inline uint8_t horiz_max4(epu8) noexcept;
317
323inline uint8_t horiz_max3(epu8) noexcept;
324
335inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); }
336
342inline epu8 partial_max_ref(epu8) noexcept;
343
350inline epu8 partial_max_gen(epu8) noexcept;
351
357inline epu8 partial_max_round(epu8) noexcept;
358
369inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); }
370
376inline uint8_t horiz_min_ref(epu8) noexcept;
377
384inline uint8_t horiz_min_gen(epu8) noexcept;
385
391inline uint8_t horiz_min4(epu8) noexcept;
392
398inline uint8_t horiz_min3(epu8) noexcept;
399
410inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); }
411
417inline epu8 partial_min_ref(epu8) noexcept;
418
425inline epu8 partial_min_gen(epu8) noexcept;
426
432inline epu8 partial_min_round(epu8) noexcept;
433
444inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); }
445
451inline epu8 eval16_ref(epu8 v) noexcept;
452
458inline epu8 eval16_arr(epu8 v) noexcept;
459
465inline epu8 eval16_cycle(epu8 v) noexcept;
466
472inline epu8 eval16_popcount(epu8 v) noexcept;
473
488inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); }
489
495inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
496
497#ifdef SIMDE_X86_SSE4_2_NATIVE
503inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
504#endif
505
511inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
512
531inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
532 return first_diff_mask(a, b, bound);
533}
534
540inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
541
542#ifdef SIMDE_X86_SSE4_2_NATIVE
548inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
549#endif
550
556inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
557
576inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
577 return last_diff_mask(a, b, bound);
578}
579
581inline bool less(epu8 a, epu8 b) noexcept;
587inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept;
588
592inline uint64_t first_zero(epu8 v, int bnd) noexcept;
596inline uint64_t last_zero(epu8 v, int bnd) noexcept;
600inline uint64_t first_non_zero(epu8 v, int bnd) noexcept;
604inline uint64_t last_non_zero(epu8 v, int bnd) noexcept;
605
608inline epu8 popcount16(epu8 v) noexcept;
609
625inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept;
626
642inline bool is_transformation(epu8 v, const size_t k = 16) noexcept;
643
660inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept;
661
662#ifdef SIMDE_X86_SSE4_2_NATIVE
667inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept;
668#endif
673inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept;
674
679inline bool is_permutation_eval(epu8 v, const size_t k = 16) noexcept;
680
697inline bool is_permutation(epu8 v, const size_t k = 16) noexcept;
698
699} // namespace HPCombi
700
701namespace std {
702
703inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a);
704
705inline std::string to_string(HPCombi::epu8 const &a);
706
713} // namespace std
714
715#include "epu8_impl.hpp"
716
717#endif // HPCOMBI_EPU8_HPP_
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73
HPCombi::TPUBuild and casts from HPCombi::TPU.
defines the macro HPCOMBI_ASSERT
implementation of epu8.hpp ; this file should not be included directly.
std::ostream & operator<<(std::ostream &out, const std::vector< T > &v)
Definition image.cpp:35
Definition bmat16.hpp:39
uint8_t horiz_min4(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:419
epu8 max(epu8 a, epu8 b) noexcept
Vector max between two HPCombi::epu8 0.
Definition epu8.hpp:125
uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as last_diff but with a different implementation.
Definition epu8_impl.hpp:93
uint64_t first_non_zero(epu8 v, int bnd) noexcept
return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
Definition epu8_impl.hpp:127
uint8_t horiz_min_ref(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:410
epu8 eval16_arr(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:453
epu8 permuted(epu8 a, epu8 b) noexcept
Same as permuted_ref but with an optimized implementation using intrinsics.
Definition epu8.hpp:103
epu8 sort8_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:220
epu8 shifted_right(epu8 a) noexcept
Left shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:110
uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as first_diff but with a different implementation.
Definition epu8_impl.hpp:78
uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as first_diff but with a different implementation.
Definition epu8_impl.hpp:89
epu8 partial_sums_ref(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:358
epu8 partial_sums(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:294
uint8_t horiz_min(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:410
epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
Remove duplicates in a sorted HPCombi::epu8.
Definition epu8_impl.hpp:261
epu8 revsorted8(epu8 a) noexcept
Return a HPCombi::epu8 with both halves reverse sorted.
Definition epu8_impl.hpp:213
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of another one.
Definition epu8_impl.hpp:304
bool is_permutation(epu8 v, const size_t k=16) noexcept
Definition epu8_impl.hpp:531
int8_t less_partial(epu8 a, epu8 b, int k) noexcept
Partial lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:114
uint8_t horiz_max4(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:384
uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as last_diff but with a different implementation.
Definition epu8_impl.hpp:106
uint8_t horiz_max3(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:385
bool is_all_one(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are one.
Definition epu8.hpp:86
constexpr uint64_t prime
A prime number good for hashing.
Definition epu8.hpp:198
uint8_t horiz_min_gen(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:416
bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
Test for partial permutations.
Definition epu8_impl.hpp:500
bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
Same interface as is_permutation but with a different implementation.
Definition epu8_impl.hpp:522
uint64_t last_zero(epu8 v, int bnd) noexcept
return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
Definition epu8_impl.hpp:124
void merge(epu8 &a, epu8 &b) noexcept
Merge two sorted epu8.
Definition epu8_impl.hpp:241
uint8_t horiz_sum4(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:349
epu8 popcount16(epu8 v) noexcept
a vector popcount function
Definition epu8_impl.hpp:481
epu8 partial_sums_round(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:369
uint8_t horiz_sum3(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:350
uint8_t horiz_sum_ref(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:340
epu8 partial_sums_gen(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:365
bool equal(epu8 a, epu8 b) noexcept
Equality of HPCombi::epu8.
Definition epu8.hpp:91
epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
Same interface as permutation_of but with a different implementation.
Definition epu8_impl.hpp:295
epu8 min(epu8 a, epu8 b) noexcept
Vector min between two HPCombi::epu8 0.
Definition epu8.hpp:123
epu8 partial_max_ref(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:393
epu8 sorted8(epu8 a) noexcept
Return a HPCombi::epu8 with both halves sorted.
Definition epu8_impl.hpp:207
epu8 partial_max_round(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:404
epu8 eval16(epu8 v) noexcept
Evaluation of a HPCombi::epu8: count how many times each int of 0..15 appears in the input.
Definition epu8.hpp:488
uint8_t horiz_sum(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:260
uint8_t horiz_max(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:335
epu8 reverted(epu8 a) noexcept
Reverting a HPCombi::epu8.
Definition epu8.hpp:120
epu8 eval16_cycle(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:464
epu8 eval16_ref(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:445
epu8 partial_max_gen(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:400
bool less(epu8 a, epu8 b) noexcept
Lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:110
uint8_t horiz_max_ref(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:375
epu8 sorted(epu8 a) noexcept
Return a sorted HPCombi::epu8.
Definition epu8_impl.hpp:204
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:81
bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
Same interface as is_permutation but with a different implementation.
Definition epu8_impl.hpp:526
uint64_t first_zero(epu8 v, int bnd) noexcept
return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
Definition epu8_impl.hpp:121
bool is_all_zero(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are zero.
Definition epu8.hpp:84
epu8 random_epu8(uint16_t bnd)
A random HPCombi::epu8.
Definition epu8_impl.hpp:249
epu8 partial_min(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:444
epu8 revsorted(epu8 a) noexcept
Return a reverse sorted HPCombi::epu8.
Definition epu8_impl.hpp:210
epu8 partial_min_round(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:439
bool is_sorted(epu8 a) noexcept
Testing if a HPCombi::epu8 is sorted.
Definition epu8_impl.hpp:201
uint64_t last_non_zero(epu8 v, int bnd) noexcept
return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
Definition epu8_impl.hpp:130
uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The last difference between two HPCombi::epu8.
Definition epu8.hpp:576
epu8 eval16_popcount(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:472
epu8 sort_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:217
epu8 partial_min_gen(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:435
epu8 partial_min_ref(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:428
uint8_t horiz_max_gen(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:381
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73
epu8 shifted_left(epu8 a) noexcept
Right shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:117
bool is_transformation(epu8 v, const size_t k=16) noexcept
Test for transformation.
Definition epu8_impl.hpp:494
uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The first difference between two HPCombi::epu8.
Definition epu8.hpp:531
bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
Test for partial transformation.
Definition epu8_impl.hpp:486
epu8 permuted_ref(epu8 a, epu8 b) noexcept
Apply a permutation b on the vector a: for i=0..16 {result[i] = a[b[i]}.
Definition epu8_impl.hpp:60
epu8 partial_max(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:369
uint8_t horiz_sum_gen(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:346
uint8_t horiz_min3(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:420
bool not_equal(epu8 a, epu8 b) noexcept
Non equality of HPCombi::epu8.
Definition epu8.hpp:95
Definition bmat16_impl.hpp:362
Given a transformation from 0..15 → 0..15, build at compile-time the array representing the transform...
Definition builder.hpp:49
HPCombi::VectGeneric.