26#ifndef HPCOMBI_EPU8_HPP_
27#define HPCOMBI_EPU8_HPP_
39#if defined(__GNUC__) && !defined(__clang__)
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wswitch-default"
42#pragma GCC diagnostic ignored "-Wpacked"
45#pragma clang diagnostic push
46#pragma clang diagnostic ignored "-Wbitwise-instead-of-logical"
48#include "simde/x86/sse4.1.h"
49#include "simde/x86/sse4.2.h"
51#pragma clang diagnostic pop
53#if defined(__GNUC__) && !defined(__clang__)
54#pragma GCC diagnostic pop
60inline constexpr uint8_t
61operator"" _u8(
unsigned long long arg)
noexcept {
62 return static_cast<uint8_t
>(arg);
73using epu8 = uint8_t __attribute__((vector_size(16)));
75static_assert(
alignof(
epu8) == 16,
76 "epu8 type is not properly aligned by the compiler !");
87 return simde_mm_testc_si128(a,
Epu8(0xFF));
104 return simde_mm_shuffle_epi8(a, b);
111 return simde_mm_bslli_si128(a, 1);
172#ifdef SIMDE_X86_SSE4_2_NATIVE
177inline epu8 permutation_of_cmpestrm(
epu8 a,
epu8 b)
noexcept;
198constexpr uint64_t
prime = 0x9e3779b97f4a7bb9;
497#ifdef SIMDE_X86_SSE4_2_NATIVE
503inline uint64_t first_diff_cmpstr(
epu8 a,
epu8 b,
size_t bound = 16) noexcept;
542#ifdef SIMDE_X86_SSE4_2_NATIVE
548inline uint64_t last_diff_cmpstr(
epu8 a,
epu8 b,
size_t bound = 16) noexcept;
662#ifdef SIMDE_X86_SSE4_2_NATIVE
667inline bool is_permutation_cpmestri(
epu8 v,
const size_t k = 16) noexcept;
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73
HPCombi::TPUBuild and casts from HPCombi::TPU.
defines the macro HPCOMBI_ASSERT
implementation of epu8.hpp ; this file should not be included directly.
std::ostream & operator<<(std::ostream &out, const std::vector< T > &v)
Definition image.cpp:35
uint8_t horiz_min4(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:419
epu8 max(epu8 a, epu8 b) noexcept
Vector max between two HPCombi::epu8 0.
Definition epu8.hpp:125
uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as last_diff but with a different implementation.
Definition epu8_impl.hpp:93
uint64_t first_non_zero(epu8 v, int bnd) noexcept
return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
Definition epu8_impl.hpp:127
uint8_t horiz_min_ref(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:410
epu8 eval16_arr(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:453
epu8 permuted(epu8 a, epu8 b) noexcept
Same as permuted_ref but with an optimized implementation using intrinsics.
Definition epu8.hpp:103
epu8 sort8_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:220
epu8 shifted_right(epu8 a) noexcept
Left shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:110
uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as first_diff but with a different implementation.
Definition epu8_impl.hpp:78
uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as first_diff but with a different implementation.
Definition epu8_impl.hpp:89
epu8 partial_sums_ref(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:358
epu8 partial_sums(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:294
uint8_t horiz_min(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:410
epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
Remove duplicates in a sorted HPCombi::epu8.
Definition epu8_impl.hpp:261
epu8 revsorted8(epu8 a) noexcept
Return a HPCombi::epu8 with both halves reverse sorted.
Definition epu8_impl.hpp:213
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of another one.
Definition epu8_impl.hpp:304
bool is_permutation(epu8 v, const size_t k=16) noexcept
Definition epu8_impl.hpp:531
int8_t less_partial(epu8 a, epu8 b, int k) noexcept
Partial lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:114
uint8_t horiz_max4(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:384
uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as last_diff but with a different implementation.
Definition epu8_impl.hpp:106
uint8_t horiz_max3(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:385
bool is_all_one(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are one.
Definition epu8.hpp:86
constexpr uint64_t prime
A prime number good for hashing.
Definition epu8.hpp:198
uint8_t horiz_min_gen(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:416
bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
Test for partial permutations.
Definition epu8_impl.hpp:500
bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
Same interface as is_permutation but with a different implementation.
Definition epu8_impl.hpp:522
uint64_t last_zero(epu8 v, int bnd) noexcept
return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
Definition epu8_impl.hpp:124
void merge(epu8 &a, epu8 &b) noexcept
Merge two sorted epu8.
Definition epu8_impl.hpp:241
uint8_t horiz_sum4(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:349
epu8 popcount16(epu8 v) noexcept
a vector popcount function
Definition epu8_impl.hpp:481
epu8 partial_sums_round(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:369
uint8_t horiz_sum3(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:350
uint8_t horiz_sum_ref(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:340
epu8 partial_sums_gen(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:365
bool equal(epu8 a, epu8 b) noexcept
Equality of HPCombi::epu8.
Definition epu8.hpp:91
epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
Same interface as permutation_of but with a different implementation.
Definition epu8_impl.hpp:295
epu8 min(epu8 a, epu8 b) noexcept
Vector min between two HPCombi::epu8 0.
Definition epu8.hpp:123
epu8 partial_max_ref(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:393
epu8 sorted8(epu8 a) noexcept
Return a HPCombi::epu8 with both halves sorted.
Definition epu8_impl.hpp:207
epu8 partial_max_round(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:404
epu8 eval16(epu8 v) noexcept
Evaluation of a HPCombi::epu8: count how many times each int of 0..15 appears in the input.
Definition epu8.hpp:488
uint8_t horiz_sum(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:260
uint8_t horiz_max(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:335
epu8 reverted(epu8 a) noexcept
Reverting a HPCombi::epu8.
Definition epu8.hpp:120
epu8 eval16_cycle(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:464
epu8 eval16_ref(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:445
epu8 partial_max_gen(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:400
bool less(epu8 a, epu8 b) noexcept
Lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:110
uint8_t horiz_max_ref(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:375
epu8 sorted(epu8 a) noexcept
Return a sorted HPCombi::epu8.
Definition epu8_impl.hpp:204
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:81
bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
Same interface as is_permutation but with a different implementation.
Definition epu8_impl.hpp:526
uint64_t first_zero(epu8 v, int bnd) noexcept
return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
Definition epu8_impl.hpp:121
bool is_all_zero(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are zero.
Definition epu8.hpp:84
epu8 random_epu8(uint16_t bnd)
A random HPCombi::epu8.
Definition epu8_impl.hpp:249
epu8 partial_min(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:444
epu8 revsorted(epu8 a) noexcept
Return a reverse sorted HPCombi::epu8.
Definition epu8_impl.hpp:210
epu8 partial_min_round(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:439
bool is_sorted(epu8 a) noexcept
Testing if a HPCombi::epu8 is sorted.
Definition epu8_impl.hpp:201
uint64_t last_non_zero(epu8 v, int bnd) noexcept
return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
Definition epu8_impl.hpp:130
uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The last difference between two HPCombi::epu8.
Definition epu8.hpp:576
epu8 eval16_popcount(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:472
epu8 sort_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:217
epu8 partial_min_gen(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:435
epu8 partial_min_ref(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:428
uint8_t horiz_max_gen(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:381
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73
epu8 shifted_left(epu8 a) noexcept
Right shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:117
bool is_transformation(epu8 v, const size_t k=16) noexcept
Test for transformation.
Definition epu8_impl.hpp:494
uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The first difference between two HPCombi::epu8.
Definition epu8.hpp:531
bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
Test for partial transformation.
Definition epu8_impl.hpp:486
epu8 permuted_ref(epu8 a, epu8 b) noexcept
Apply a permutation b on the vector a: for i=0..16 {result[i] = a[b[i]}.
Definition epu8_impl.hpp:60
epu8 partial_max(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:369
uint8_t horiz_sum_gen(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:346
uint8_t horiz_min3(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:420
bool not_equal(epu8 a, epu8 b) noexcept
Non equality of HPCombi::epu8.
Definition epu8.hpp:95
Definition bmat16_impl.hpp:362
Given a transformation from 0..15 → 0..15, build at compile-time the array representing the transform...
Definition builder.hpp:49