Namespaces
namespace	detail

namespace	power_helper

Classes
class	BMat16
	Class for fast boolean matrices of dimension up to 16 x 16. More...

class	BMat8
	Boolean matrices of dimension up to 8×8, stored as a single uint64; isomorph to binary relations with methods for composition. More...

struct	Perm16
	Permutations of \(\{0\dots 15\}\): A permutation is a bijective mapping of a set of n elements onto itself. More...

struct	PermGeneric
	Vanilla (ie NOT optimized) implementation of a permutation, used to check for test correctness and as baseline to measure speedup. More...

struct	PPerm16
	Partial permutation of \(\{0\dots 15\}\); see also HPCombi::Perm16; partial means it might not be defined everywhere (but where it's defined, it's injective). More...

struct	PTransf16
	Partial transformation of \(\{0\dots 15\}\); see HPCombi::Transf16; partial means it might not be defined everywhere. More...

struct	TPUBuild
	Given a transformation from 0..15 → 0..15, build at compile-time the array representing the transformation. More...

struct	Transf16
	Full transformation of \(\{0\dots 15\}\): a transformation is a mapping of a set of n elements into itself; ie as opposed to a permutation, it is not necessarily injective. More...

struct	Vect16
	Vector of 16 bytes, with some optimized methods, superclass of HPCombi::Transf16. More...

struct	VectGeneric
	VectGeneric is to Vect16 what PermGeneric is to Perm16; see PermGeneric. More...

Typedefs
using	xpu16 = uint16_t __attribute__((vector_size(32)))

using	xpu64 = uint64_t __attribute__((vector_size(32)))

using	epu64 = uint64_t __attribute__((__vector_size__(16), __may_alias__))

using	epu8 = uint8_t __attribute__((vector_size(16)))
	epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Intel for their API to intrinsics, ie a SIMD vector of 16 unsigned bytes (16×8 = 128bits).

Functions
xpu64	to_line (xpu64 vect)
	Converting storage type from blocks to rows of a xpu64 representing a 16x16 matrix (used in BMat16).

xpu64	to_block (xpu64 vect)
	Converting storage type from rows to blocks of a xpu64 representing a 16x16 matrix (used in BMat16).

template<class TPU>
TPUBuild< TPU >::array &	as_array (TPU &v) noexcept
	Cast a TPU to a c++ `std::array`.

template<class TPU>
const TPUBuild< TPU >::array &	as_array (const TPU &v) noexcept
	Cast a constant TPU to a constant c++ `std::array`.

template<class TPU>
VectGeneric< TPUBuild< TPU >::size > &	as_VectGeneric (TPU &v)
	Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.

template<class TPU>
const VectGeneric< TPUBuild< TPU >::size > &	as_VectGeneric (const TPU &v)
	Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.

constexpr uint8_t	operator""_u8 (unsigned long long arg) noexcept
	Unsigned 8 bits int constant.

bool	is_all_zero (epu8 a) noexcept
	Test whether all the entries of a HPCombi::epu8 are zero.

bool	is_all_one (epu8 a) noexcept
	Test whether all the entries of a HPCombi::epu8 are one.

bool	equal (epu8 a, epu8 b) noexcept
	Equality of HPCombi::epu8.

bool	not_equal (epu8 a, epu8 b) noexcept
	Non equality of HPCombi::epu8.

epu8	permuted_ref (epu8 a, epu8 b) noexcept
	Apply a permutation `b` on the vector `a:` for i=0..16 {result[i] = a[b[i]}.

epu8	permuted (epu8 a, epu8 b) noexcept
	Same as permuted_ref but with an optimized implementation using intrinsics.

epu8	shifted_right (epu8 a) noexcept
	Left shifted of a HPCombi::epu8 inserting a 0.

epu8	shifted_left (epu8 a) noexcept
	Right shifted of a HPCombi::epu8 inserting a 0.

epu8	reverted (epu8 a) noexcept
	Reverting a HPCombi::epu8.

epu8	min (epu8 a, epu8 b) noexcept
	Vector min between two HPCombi::epu8 0.

epu8	max (epu8 a, epu8 b) noexcept
	Vector max between two HPCombi::epu8 0.

bool	is_sorted (epu8 a) noexcept
	Testing if a HPCombi::epu8 is sorted.

epu8	sorted (epu8 a) noexcept
	Return a sorted HPCombi::epu8.

epu8	sorted8 (epu8 a) noexcept
	Return a HPCombi::epu8 with both halves sorted.

epu8	revsorted (epu8 a) noexcept
	Return a reverse sorted HPCombi::epu8.

epu8	revsorted8 (epu8 a) noexcept
	Return a HPCombi::epu8 with both halves reverse sorted.

epu8	sort_perm (epu8 &a) noexcept
	Sort `this` and return the sorting permutation.

epu8	sort8_perm (epu8 &a) noexcept
	Sort `this` and return the sorting permutation.

void	merge (epu8 &a, epu8 &b) noexcept
	Merge two sorted epu8.

epu8	permutation_of_ref (epu8 a, epu8 b) noexcept
	Same interface as permutation_of but with a different implementation.

epu8	permutation_of (epu8 a, epu8 b) noexcept
	Find if a vector is a permutation of another one.

epu8	random_epu8 (uint16_t bnd)
	A random HPCombi::epu8.

epu8	remove_dups (epu8 a, uint8_t repl=0) noexcept
	Remove duplicates in a sorted HPCombi::epu8.

uint8_t	horiz_sum_ref (epu8) noexcept
	Same interface as horiz_sum but with a different implementation.

uint8_t	horiz_sum_gen (epu8) noexcept
	Same interface as horiz_sum but with a different implementation.

uint8_t	horiz_sum4 (epu8) noexcept
	Same interface as horiz_sum but with a different implementation.

uint8_t	horiz_sum3 (epu8) noexcept
	Same interface as horiz_sum but with a different implementation.

uint8_t	horiz_sum (epu8 v) noexcept
	Horizontal sum of a HPCombi::epu8.

epu8	partial_sums_ref (epu8) noexcept
	Same interface as partial_sums but with a different implementation.

epu8	partial_sums_gen (epu8) noexcept
	Same interface as partial_sums but with a different implementation.

epu8	partial_sums_round (epu8) noexcept
	Same interface as partial_sums but with a different implementation.

epu8	partial_sums (epu8 v) noexcept
	Horizontal partial sum of a HPCombi::epu8.

uint8_t	horiz_max_ref (epu8) noexcept
	Same interface as horiz_max but with a different implementation.

uint8_t	horiz_max_gen (epu8) noexcept
	Same interface as horiz_max but with a different implementation.

uint8_t	horiz_max4 (epu8) noexcept
	Same interface as horiz_max but with a different implementation.

uint8_t	horiz_max3 (epu8) noexcept
	Same interface as horiz_max but with a different implementation.

uint8_t	horiz_max (epu8 v) noexcept
	Horizontal sum of a HPCombi::epu8.

epu8	partial_max_ref (epu8) noexcept
	Same interface as partial_max but with a different implementation.

epu8	partial_max_gen (epu8) noexcept
	Same interface as partial_max but with a different implementation.

epu8	partial_max_round (epu8) noexcept
	Same interface as partial_max but with a different implementation.

epu8	partial_max (epu8 v) noexcept
	Horizontal partial sum of a HPCombi::epu8.

uint8_t	horiz_min_ref (epu8) noexcept
	Same interface as horiz_min but with a different implementation.

uint8_t	horiz_min_gen (epu8) noexcept
	Same interface as horiz_min but with a different implementation.

uint8_t	horiz_min4 (epu8) noexcept
	Same interface as horiz_min but with a different implementation.

uint8_t	horiz_min3 (epu8) noexcept
	Same interface as horiz_min but with a different implementation.

uint8_t	horiz_min (epu8 v) noexcept
	Horizontal sum of a HPCombi::epu8.

epu8	partial_min_ref (epu8) noexcept
	Same interface as partial_min but with a different implementation.

epu8	partial_min_gen (epu8) noexcept
	Same interface as partial_min but with a different implementation.

epu8	partial_min_round (epu8) noexcept
	Same interface as partial_min but with a different implementation.

epu8	partial_min (epu8 v) noexcept
	Horizontal partial sum of a HPCombi::epu8.

epu8	eval16_ref (epu8 v) noexcept
	Same interface as eval16 but with a different implementation.

epu8	eval16_arr (epu8 v) noexcept
	Same interface as eval16 but with a different implementation.

epu8	eval16_cycle (epu8 v) noexcept
	Same interface as eval16 but with a different implementation.

epu8	eval16_popcount (epu8 v) noexcept
	Same interface as eval16 but with a different implementation.

epu8	eval16 (epu8 v) noexcept
	Evaluation of a HPCombi::epu8: count how many times each int of 0..15 appears in the input.

uint64_t	first_diff_ref (epu8 a, epu8 b, size_t bound=16) noexcept
	Same interface as first_diff but with a different implementation.

uint64_t	first_diff_mask (epu8 a, epu8 b, size_t bound=16) noexcept
	Same interface as first_diff but with a different implementation.

uint64_t	first_diff (epu8 a, epu8 b, size_t bound=16) noexcept
	The first difference between two HPCombi::epu8.

uint64_t	last_diff_ref (epu8 a, epu8 b, size_t bound=16) noexcept
	Same interface as last_diff but with a different implementation.

uint64_t	last_diff_mask (epu8 a, epu8 b, size_t bound=16) noexcept
	Same interface as last_diff but with a different implementation.

uint64_t	last_diff (epu8 a, epu8 b, size_t bound=16) noexcept
	The last difference between two HPCombi::epu8.

bool	less (epu8 a, epu8 b) noexcept
	Lexicographic comparison between two HPCombi::epu8.

int8_t	less_partial (epu8 a, epu8 b, int k) noexcept
	Partial lexicographic comparison between two HPCombi::epu8.

uint64_t	first_zero (epu8 v, int bnd) noexcept
	return the index of the first zero entry or 16 if there are none Only index smaller than bound are taken into account.

uint64_t	last_zero (epu8 v, int bnd) noexcept
	return the index of the last zero entry or 16 if there are none Only index smaller than bound are taken into account.

uint64_t	first_non_zero (epu8 v, int bnd) noexcept
	return the index of the first non zero entry or 16 if there are none Only index smaller than bound are taken into account.

uint64_t	last_non_zero (epu8 v, int bnd) noexcept
	return the index of the last non zero entry or 16 if there are none Only index smaller than bound are taken into account.

epu8	popcount16 (epu8 v) noexcept
	a vector popcount function

bool	is_partial_transformation (epu8 v, const size_t k=16) noexcept
	Test for partial transformation.

bool	is_transformation (epu8 v, const size_t k=16) noexcept
	Test for transformation.

bool	is_partial_permutation (epu8 v, const size_t k=16) noexcept
	Test for partial permutations.

bool	is_permutation_sort (epu8 v, const size_t k=16) noexcept
	Same interface as is_permutation but with a different implementation.

bool	is_permutation_eval (epu8 v, const size_t k=16) noexcept
	Same interface as is_permutation but with a different implementation.

bool	is_permutation (epu8 v, const size_t k=16) noexcept

uint64_t	first_mask (epu8 msk, size_t bound)

uint64_t	last_mask (epu8 msk, size_t bound)

template<bool Increasing = true, size_t sz>
epu8	network_sort (epu8 res, std::array< epu8, sz > rounds)
	Apply a sorting network.

template<bool Increasing = true, size_t sz>
epu8	network_sort_perm (epu8 &v, std::array< epu8, sz > rounds)
	Apply a sorting network in place and return the permutation.

void	merge_rev (epu8 &a, epu8 &b) noexcept

epu8	eval16_gen (epu8 v) noexcept

template<typename T, typename M = power_helper::Monoid<T>>
const T	square (const T x)
	A generic compile time squaring function.

template<unsigned exp, typename T, typename M = power_helper::Monoid<T>>
const T	pow (const T x)
	A generic compile time exponentiation function.

template<size_t Size, typename Expo = uint8_t>
std::array< Expo, Size >	sorted_vect (std::array< Expo, Size > v)

Variables
constexpr std::array< epu8, 4 >	masks

constexpr TPUBuild< epu8 >	Epu8 {}
	Factory object acting as a class constructor for type HPCombi::epu8.

constexpr uint64_t	prime = 0x9e3779b97f4a7bb9
	A prime number good for hashing.

constexpr std::array< epu8, 9 >	sorting_rounds
	A 16-way sorting network.

constexpr std::array< epu8, 6 >	sorting_rounds8
	A duplicated 8-way sorting network.

constexpr std::array< epu8, 6 >	merge_rounds

constexpr std::array< epu8, 3 >	inverting_rounds

constexpr std::array< epu8, 4 >	summing_rounds
	Permutation Round for partial and horizontal sums.

constexpr std::array< epu8, 4 >	mining_rounds

Typedef Documentation

◆ epu64

using HPCombi::epu64 = uint64_t __attribute__((__vector_size__(16), __may_alias__))

◆ epu8

using HPCombi::epu8 = uint8_t __attribute__((vector_size(16)))

epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Intel for their API to intrinsics, ie a SIMD vector of 16 unsigned bytes (16×8 = 128bits).

Functions using this type use semantically equivalent types, eg a _m128 which is a vector containing 2 signed 64 bits integers. A flag tells the compiler to silently consider those types equivalent.

◆ xpu16

using HPCombi::xpu16 = uint16_t __attribute__((vector_size(32)))

◆ xpu64

using HPCombi::xpu64 = uint64_t __attribute__((vector_size(32)))

Function Documentation

◆ as_array() [1/2]

template<class TPU>

const TPUBuild< TPU >::array & HPCombi::as_array ( const TPU & v )

inlinenoexcept

Cast a constant TPU to a constant c++ std::array.

This is usually faster for algorithm using a lot of indexed access.

◆ as_array() [2/2]

template<class TPU>

TPUBuild< TPU >::array & HPCombi::as_array ( TPU & v )

inlinenoexcept

Cast a TPU to a c++ std::array.

This is usually faster for algorithm using a lot of indexed access.

◆ as_VectGeneric() [1/2]

template<class TPU>

const VectGeneric< TPUBuild< TPU >::size > & HPCombi::as_VectGeneric ( const TPU & v )

inline

Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.

This is usually faster for algorithm using a lot of indexed access.

◆ as_VectGeneric() [2/2]

template<class TPU>

VectGeneric< TPUBuild< TPU >::size > & HPCombi::as_VectGeneric ( TPU & v )

inline

Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.

This is usually faster for algorithm using a lot of indexed access.

◆ equal()

bool HPCombi::equal	(	epu8	a,
		epu8	b )

inlinenoexcept

Equality of HPCombi::epu8.

◆ eval16()

epu8 HPCombi::eval16 ( epu8 v )

inlinenoexcept

Evaluation of a HPCombi::epu8: count how many times each int of 0..15 appears in the input.

Parameters

v	: a HPCombi::epu8

Returns: the evaluation, that is the HPCombi::epu8 r such that r[i] is the number of occurrence of i in the input v

Example:: eval16(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});

HPCombi::eval16
epu8 eval16(epu8 v) noexcept
Evaluation of a HPCombi::epu8: count how many times each int of 0..15 appears in the input.
Definition epu8.hpp:488

HPCombi::epu8
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73

Returns { 1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1}

Warning: The entries larger than 15 are ignored

◆ eval16_arr()

epu8 HPCombi::eval16_arr ( epu8 v )

inlinenoexcept

Same interface as eval16 but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and cast to array

◆ eval16_cycle()

epu8 HPCombi::eval16_cycle ( epu8 v )

inlinenoexcept

Same interface as eval16 but with a different implementation.

Algorithm:: Vector \(O(n)\) using cyclic shifting

◆ eval16_gen()

epu8 HPCombi::eval16_gen ( epu8 v )

inlinenoexcept

◆ eval16_popcount()

epu8 HPCombi::eval16_popcount ( epu8 v )

inlinenoexcept

Same interface as eval16 but with a different implementation.

Algorithm:: Vector \(O(n)\) using popcount

◆ eval16_ref()

epu8 HPCombi::eval16_ref ( epu8 v )

inlinenoexcept

Same interface as eval16 but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ first_diff()

uint64_t HPCombi::first_diff	(	epu8	a,
		epu8	b,
		size_t	bound = 16 )

inlinenoexcept

The first difference between two HPCombi::epu8.

Parameters

a,b	: two HPCombi::epu8
bound	: a `size_t`

Returns: the smallest index \(i<bound\) such that a[i] and b[i] differ, 16 if there is no differences before bound.

Example:: epu8 a { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15};

epu8 b { 5, 5, 2, 9, 1, 6,12, 4, 0, 4, 4, 4,12,13,14,15};

epu8
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73

then first_diff(a, b) returns 3, first_diff(a, b, 3) returns 16, first_diff(a, b, 4) returns 3, first_diff(a, b, 7) returns 3.

Warning: bound is assumed to be smaller or equal than 16

◆ first_diff_mask()

uint64_t HPCombi::first_diff_mask	(	epu8	a,
		epu8	b,
		size_t	bound = 16 )

inlinenoexcept

Same interface as first_diff but with a different implementation.

Algorithm:: Using vector comparison and mask

◆ first_diff_ref()

uint64_t HPCombi::first_diff_ref	(	epu8	a,
		epu8	b,
		size_t	bound = 16 )

inlinenoexcept

Same interface as first_diff but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ first_mask()

uint64_t HPCombi::first_mask	(	epu8	msk,
		size_t	bound )

inline

◆ first_non_zero()

uint64_t HPCombi::first_non_zero	(	epu8	v,
		int	bnd )

inlinenoexcept

return the index of the first non zero entry or 16 if there are none Only index smaller than bound are taken into account.

◆ first_zero()

uint64_t HPCombi::first_zero	(	epu8	v,
		int	bnd )

inlinenoexcept

return the index of the first zero entry or 16 if there are none Only index smaller than bound are taken into account.

◆ horiz_max()

uint8_t HPCombi::horiz_max ( epu8 v )

inlinenoexcept

Horizontal sum of a HPCombi::epu8.

Returns: the horizontal sum of the input

Example:: horiz_max(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2, 0,12, 0, 0, 0});

HPCombi::horiz_max
uint8_t horiz_max(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:335

Returns 12

◆ horiz_max3()

uint8_t HPCombi::horiz_max3 ( epu8 v )

inlinenoexcept

Same interface as horiz_max but with a different implementation.

Algorithm:: 3-stages parallel algorithm + indexed access

◆ horiz_max4()

uint8_t HPCombi::horiz_max4 ( epu8 v )

inlinenoexcept

Same interface as horiz_max but with a different implementation.

Algorithm:: 4-stages parallel algorithm

◆ horiz_max_gen()

uint8_t HPCombi::horiz_max_gen ( epu8 v )

inlinenoexcept

Same interface as horiz_max but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access through HPCombi::VectGeneric

◆ horiz_max_ref()

uint8_t HPCombi::horiz_max_ref ( epu8 v )

inlinenoexcept

Same interface as horiz_max but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ horiz_min()

uint8_t HPCombi::horiz_min ( epu8 v )

inlinenoexcept

Horizontal sum of a HPCombi::epu8.

Returns: the horizontal sum of the input

Example:: horiz_min(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 1, 3, 2, 2,12, 3, 4, 4});

HPCombi::horiz_min
uint8_t horiz_min(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:410

Returns 1

◆ horiz_min3()

uint8_t HPCombi::horiz_min3 ( epu8 v )

inlinenoexcept

Same interface as horiz_min but with a different implementation.

Algorithm:: 3-stages parallel algorithm + indexed access

◆ horiz_min4()

uint8_t HPCombi::horiz_min4 ( epu8 v )

inlinenoexcept

Same interface as horiz_min but with a different implementation.

Algorithm:: 4-stages parallel algorithm

◆ horiz_min_gen()

uint8_t HPCombi::horiz_min_gen ( epu8 v )

inlinenoexcept

Same interface as horiz_min but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access through HPCombi::VectGeneric

◆ horiz_min_ref()

uint8_t HPCombi::horiz_min_ref ( epu8 v )

inlinenoexcept

Same interface as horiz_min but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ horiz_sum()

uint8_t HPCombi::horiz_sum ( epu8 v )

inlinenoexcept

Horizontal sum of a HPCombi::epu8.

Returns: the horizontal sum of the input

Example:: horiz_sum(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});

HPCombi::horiz_sum
uint8_t horiz_sum(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:260

Returns 110

Warning: The result is supposed to fit in a uint8_t

◆ horiz_sum3()

uint8_t HPCombi::horiz_sum3 ( epu8 v )

inlinenoexcept

Same interface as horiz_sum but with a different implementation.

Algorithm:: 3-stages parallel algorithm + indexed access

◆ horiz_sum4()

uint8_t HPCombi::horiz_sum4 ( epu8 v )

inlinenoexcept

Same interface as horiz_sum but with a different implementation.

Algorithm:: 4-stages parallel algorithm

◆ horiz_sum_gen()

uint8_t HPCombi::horiz_sum_gen ( epu8 v )

inlinenoexcept

Same interface as horiz_sum but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access through HPCombi::VectGeneric

◆ horiz_sum_ref()

uint8_t HPCombi::horiz_sum_ref ( epu8 v )

inlinenoexcept

Same interface as horiz_sum but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ is_all_one()

bool HPCombi::is_all_one ( epu8 a )

inlinenoexcept

Test whether all the entries of a HPCombi::epu8 are one.

◆ is_all_zero()

bool HPCombi::is_all_zero ( epu8 a )

inlinenoexcept

Test whether all the entries of a HPCombi::epu8 are zero.

◆ is_partial_permutation()

bool HPCombi::is_partial_permutation	(	epu8	v,
		const size_t	k = 16 )

inlinenoexcept

Test for partial permutations.

Returns: whether *this is a partial permutation.

Parameters

v	the vector to test
k	the size of `*this` (default 16)

Points where the function is undefined are mapped to 0xff. If *this is a partial permutation of \(0\dots n-1\) for \(n<16\), it should be completed to a partial permutation of \(0\dots 15\) by adding fixed points. That is the values \(i\geq n\) should be mapped to themself.

Example:: The permutation \(\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}\) is encoded by the array {2,0,5,0xFF,0xFF,4,6,7,8,9,10,11,12,13,14,15}

◆ is_partial_transformation()

bool HPCombi::is_partial_transformation	(	epu8	v,
		const size_t	k = 16 )

inlinenoexcept

Test for partial transformation.

Returns: whether v is a partial transformation.

Parameters

v	the vector to test
k	the size of `*this` (default 16)

Points where the function is undefined are mapped to 0xff. If *this is a transformation of \(0\dots n-1\) for \(n<16\), it should be completed to a transformation of \(0\dots 15\) by adding fixed points. That is the values \(i\geq n\) should be mapped to themself.

Example:: The partial transformation \(\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}\) is encoded by the array {2,0,5,0xff,0xff,4,6,7,8,9,10,11,12,13,14,15}

◆ is_permutation()

bool HPCombi::is_permutation	(	epu8	v,
		const size_t	k = 16 )

inlinenoexcept

Returns: whether *this is a permutation.

Parameters

v	the vector to test
k	the size of `*this` (default 16)

If *this is a permutation of \(0\dots n-1\) for \(n<16\), it should be completed to a permutation of \(0\dots 15\) by adding fixed points. That is the values \(i\geq n\) should be mapped to themselves.

Example:: The permutation \(\begin{matrix}0 1 2 3 4 5\\ 2 0 5 3 1 4 \end{matrix}\) is encoded by the array {2,0,5,3,1,4,6,7,8,9,10,11,12,13,14,15}

Algorithm: architecture dependent

◆ is_permutation_eval()

bool HPCombi::is_permutation_eval	(	epu8	v,
		const size_t	k = 16 )

inlinenoexcept

Same interface as is_permutation but with a different implementation.

Algorithm: uses evaluation

◆ is_permutation_sort()

bool HPCombi::is_permutation_sort	(	epu8	v,
		const size_t	k = 16 )

inlinenoexcept

Same interface as is_permutation but with a different implementation.

Algorithm: sort the vector and compare to identity

◆ is_sorted()

bool HPCombi::is_sorted ( epu8 a )

inlinenoexcept

Testing if a HPCombi::epu8 is sorted.

◆ is_transformation()

bool HPCombi::is_transformation	(	epu8	v,
		const size_t	k = 16 )

inlinenoexcept

Test for transformation.

Returns: whether *this is a transformation.

Parameters

v	the vector to test
k	the size of `*this` (default 16)

If *this is a transformation of \(0\dots n-1\) for \(n<16\), it should be completed to a transformation of \(0\dots 15\) by adding fixed points. That is the values \(i\geq n\) should be mapped to themself.

Example:: The transformation \(\begin{matrix}0 1 2 3 4 5\\ 2 0 5 2 1 4 \end{matrix}\) is encoded by the array {2,0,5,2,1,4,6,7,8,9,10,11,12,13,14,15}

◆ last_diff()

uint64_t HPCombi::last_diff	(	epu8	a,
		epu8	b,
		size_t	bound = 16 )

inlinenoexcept

The last difference between two HPCombi::epu8.

Parameters

a,b	: HPCombi::epu8
bound	: a `size_t`

Returns: the largest index \(i<bound\) such that a[i] and b[i] differ, 16 if there is no differences before bound.

Example:: epu8 a { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15};

epu8 b { 5, 5, 2, 9, 1, 6,12, 4, 0, 4, 4, 4,12,13,14,15};

then last_diff(a, b) returns 11, last_diff(a, b, 3) returns 16, last_diff(a, b, 4) returns 3, last_diff(a, b, 7) returns 3.

Warning: bound is assumed to be smaller or equal than 16

◆ last_diff_mask()

uint64_t HPCombi::last_diff_mask	(	epu8	a,
		epu8	b,
		size_t	bound = 16 )

inlinenoexcept

Same interface as last_diff but with a different implementation.

Algorithm:: Using vector comparison and mask

◆ last_diff_ref()

uint64_t HPCombi::last_diff_ref	(	epu8	a,
		epu8	b,
		size_t	bound = 16 )

inlinenoexcept

Same interface as last_diff but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ last_mask()

uint64_t HPCombi::last_mask	(	epu8	msk,
		size_t	bound )

inline

◆ last_non_zero()

uint64_t HPCombi::last_non_zero	(	epu8	v,
		int	bnd )

inlinenoexcept

return the index of the last non zero entry or 16 if there are none Only index smaller than bound are taken into account.

◆ last_zero()

uint64_t HPCombi::last_zero	(	epu8	v,
		int	bnd )

inlinenoexcept

return the index of the last zero entry or 16 if there are none Only index smaller than bound are taken into account.

◆ less()

bool HPCombi::less	(	epu8	a,
		epu8	b )

inlinenoexcept

Lexicographic comparison between two HPCombi::epu8.

◆ less_partial()

int8_t HPCombi::less_partial	(	epu8	a,
		epu8	b,
		int	k )

inlinenoexcept

Partial lexicographic comparison between two HPCombi::epu8.

Parameters

a,b	: the vectors to compare
k	: the bound for the lexicographic comparison

Returns: a positive, negative or zero int8_t depending on the result

◆ max()

epu8 HPCombi::max	(	epu8	a,
		epu8	b )

inlinenoexcept

Vector max between two HPCombi::epu8 0.

◆ merge()

void HPCombi::merge	(	epu8 &	a,
		epu8 &	b )

inlinenoexcept

Merge two sorted epu8.

Parameters

a,b	two HPCombi::epu8 after executing merge, `a` and `b` are sorted and `a`[15] <= `b`[0]

Algorithm: bitonic merge sorting network

◆ merge_rev()

void HPCombi::merge_rev	(	epu8 &	a,
		epu8 &	b )

inlinenoexcept

◆ min()

epu8 HPCombi::min	(	epu8	a,
		epu8	b )

inlinenoexcept

Vector min between two HPCombi::epu8 0.

◆ network_sort()

template<bool Increasing = true, size_t sz>

epu8 HPCombi::network_sort	(	epu8	res,
		std::array< epu8, sz >	rounds )

inline

Apply a sorting network.

◆ network_sort_perm()

template<bool Increasing = true, size_t sz>

epu8 HPCombi::network_sort_perm	(	epu8 &	v,
		std::array< epu8, sz >	rounds )

inline

Apply a sorting network in place and return the permutation.

◆ not_equal()

bool HPCombi::not_equal	(	epu8	a,
		epu8	b )

inlinenoexcept

Non equality of HPCombi::epu8.

◆ operator""_u8()

uint8_t HPCombi::operator""_u8 ( unsigned long long arg )

inlineconstexprnoexcept

Unsigned 8 bits int constant.

◆ partial_max()

epu8 HPCombi::partial_max ( epu8 v )

inlinenoexcept

Horizontal partial sum of a HPCombi::epu8.

Returns: the partials max of the input

Example:: partial_max(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});

HPCombi::partial_max
epu8 partial_max(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:369

Returns { 5, 5, 5, 5, 5, 6,12,12,12,12,12,12,12,13,14,15}

◆ partial_max_gen()

epu8 HPCombi::partial_max_gen ( epu8 v )

inlinenoexcept

Same interface as partial_max but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access through HPCombi::VectGeneric

◆ partial_max_ref()

epu8 HPCombi::partial_max_ref ( epu8 v )

inlinenoexcept

Same interface as partial_max but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ partial_max_round()

epu8 HPCombi::partial_max_round ( epu8 v )

inlinenoexcept

Same interface as partial_max but with a different implementation.

Algorithm:: 4-stages parallel algorithm

◆ partial_min()

epu8 HPCombi::partial_min ( epu8 v )

inlinenoexcept

Horizontal partial sum of a HPCombi::epu8.

Returns: the partials min of the input

Example:: partial_min(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});

HPCombi::partial_min
epu8 partial_min(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:444

Returns { 5, 5, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}

◆ partial_min_gen()

epu8 HPCombi::partial_min_gen ( epu8 v )

inlinenoexcept

Same interface as partial_min but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access through HPCombi::VectGeneric

◆ partial_min_ref()

epu8 HPCombi::partial_min_ref ( epu8 v )

inlinenoexcept

Same interface as partial_min but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ partial_min_round()

epu8 HPCombi::partial_min_round ( epu8 v )

inlinenoexcept

Same interface as partial_min but with a different implementation.

Algorithm:: 4-stages parallel algorithm

◆ partial_sums()

epu8 HPCombi::partial_sums ( epu8 v )

inlinenoexcept

Horizontal partial sum of a HPCombi::epu8.

Returns: the partials sums of the input

Example:: partial_sums(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});

HPCombi::partial_sums
epu8 partial_sums(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:294

Returns { 5,10,12,17,18,24,36,40,40,43,45,56,68,81,95,110}

◆ partial_sums_gen()

epu8 HPCombi::partial_sums_gen ( epu8 v )

inlinenoexcept

Same interface as partial_sums but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access through HPCombi::VectGeneric

◆ partial_sums_ref()

epu8 HPCombi::partial_sums_ref ( epu8 v )

inlinenoexcept

Same interface as partial_sums but with a different implementation.

Algorithm:: Reference \(O(n)\) algorithm using loop and indexed access

◆ partial_sums_round()

epu8 HPCombi::partial_sums_round ( epu8 v )

inlinenoexcept

Same interface as partial_sums but with a different implementation.

Algorithm:: 4-stages parallel algorithm

◆ permutation_of()

epu8 HPCombi::permutation_of	(	epu8	a,
		epu8	b )

inlinenoexcept

Find if a vector is a permutation of another one.

Parameters

a,b	two HPCombi::epu8

Returns: a HPCombi::epu8 For each \(0 \leq i < 16\), res[i] is the position in a of b[i] if b[i] appears exactly once in a, or undefined if not.

Algorithm: architecture dependent

◆ permutation_of_ref()

epu8 HPCombi::permutation_of_ref	(	epu8	a,
		epu8	b )

inlinenoexcept

Same interface as permutation_of but with a different implementation.

Algorithm: reference implementation

◆ permuted()

epu8 HPCombi::permuted	(	epu8	a,
		epu8	b )

inlinenoexcept

Same as permuted_ref but with an optimized implementation using intrinsics.

◆ permuted_ref()

epu8 HPCombi::permuted_ref	(	epu8	a,
		epu8	b )

inlinenoexcept

Apply a permutation b on the vector a: for i=0..16 {result[i] = a[b[i]}.

Permuting a HPCombi::epu8.

◆ popcount16()

epu8 HPCombi::popcount16 ( epu8 v )

inlinenoexcept

a vector popcount function

◆ pow()

template<unsigned exp, typename T, typename M = power_helper::Monoid<T>>

const T HPCombi::pow ( const T x )

A generic compile time exponentiation function.

Template Parameters

exp the power

Parameters

x	the number to exponentiate

Returns: x to the power exp

Raise x to the exponent exp where exp is known at compile time. We use the classical recursive binary algorithm, but the recursion is unfolded and optimized at compile time giving an assembly code which is just a sequence of multiplication.

To use for a specific type the user should pass a Monoid structure (see below) as third parameter to the template. Alternatively a default monoid structure can be defined for a given type by specializing the template struct HPCombi::power_helper::Monoid

◆ random_epu8()

epu8 HPCombi::random_epu8 ( uint16_t bnd )

inline

A random HPCombi::epu8.

Parameters

bnd	: the upper bound for the value of the entries. `bnd` must verify \( 0 < bnd \leq 256 \). This is not checked.

Returns: a random HPCombi::epu8 with value in the interval \([0, 1, 2, ..., bnd-1]\).

◆ remove_dups()

epu8 HPCombi::remove_dups	(	epu8	a,
		uint8_t	repl = 0 )

inlinenoexcept

Remove duplicates in a sorted HPCombi::epu8.

Parameters

a	supposed to be sorted
repl	the value replacing the duplicate entries (default to 0)

Returns: the vector a where repeated occurrences of entries are replaced by repl

◆ reverted()

epu8 HPCombi::reverted ( epu8 a )

inlinenoexcept

Reverting a HPCombi::epu8.

◆ revsorted()

epu8 HPCombi::revsorted ( epu8 a )

inlinenoexcept

Return a reverse sorted HPCombi::epu8.

Algorithm:: Uses the 9 stages sorting network sorting_rounds

◆ revsorted8()

epu8 HPCombi::revsorted8 ( epu8 a )

inlinenoexcept

Return a HPCombi::epu8 with both halves reverse sorted.

Algorithm: Uses a 6 stages sorting network #sorting_rounds8

◆ shifted_left()

epu8 HPCombi::shifted_left ( epu8 a )

inlinenoexcept

Right shifted of a HPCombi::epu8 inserting a 0.

Warning: we use the convention that the 0 entry is on the left !

◆ shifted_right()

epu8 HPCombi::shifted_right ( epu8 a )

inlinenoexcept

Left shifted of a HPCombi::epu8 inserting a 0.

Warning: we use the convention that the 0 entry is on the left !

◆ sort8_perm()

epu8 HPCombi::sort8_perm ( epu8 & a )

inlinenoexcept

Sort this and return the sorting permutation.

Algorithm: Uses a 6 stages sorting network #sorting_rounds8

◆ sort_perm()

epu8 HPCombi::sort_perm ( epu8 & a )

inlinenoexcept

Sort this and return the sorting permutation.

Algorithm: Uses a 9 stages sorting network #sorting_rounds8

◆ sorted()

epu8 HPCombi::sorted ( epu8 a )

inlinenoexcept

Return a sorted HPCombi::epu8.

Algorithm:: Uses the 9 stages sorting network sorting_rounds

◆ sorted8()

epu8 HPCombi::sorted8 ( epu8 a )

inlinenoexcept

Return a HPCombi::epu8 with both halves sorted.

Algorithm: Uses a 6 stages sorting network #sorting_rounds8

◆ sorted_vect()

template<size_t Size, typename Expo = uint8_t>

std::array< Expo, Size > HPCombi::sorted_vect ( std::array< Expo, Size > v )

◆ square()

template<typename T, typename M = power_helper::Monoid<T>>

const T HPCombi::square ( const T x )

A generic compile time squaring function.

Parameters

x	the number to square

Returns: x squared

To use for a specific type the user should pass a monoid structure as second parameter to the template. Alternatively a default monoid structure can be defined for a given type by specializing the template struct HPCombi::power_helper::Monoid

◆ to_block()

xpu64 HPCombi::to_block ( xpu64 vect )

inline

Converting storage type from rows to blocks of a xpu64 representing a 16x16 matrix (used in BMat16).

Each 64 bit unsigned int represents one of the four 8x8 matrix that make up a 16x16 when quartered.

◆ to_line()

xpu64 HPCombi::to_line ( xpu64 vect )

inline

Converting storage type from blocks to rows of a xpu64 representing a 16x16 matrix (used in BMat16).

Each 64 bit unsigned int represents 4 lines of the matrix.

Variable Documentation

◆ Epu8

TPUBuild<epu8> HPCombi::Epu8 {}

constexpr

Factory object acting as a class constructor for type HPCombi::epu8.

see HPCombi::TPUBuild for usage and capability

◆ inverting_rounds

std::array<epu8, 3> HPCombi::inverting_rounds

constexpr

Initial value:

{{
    
    
    epu8 { 0,  1,  2,  3,  8,  9, 10, 11,  4,  5,  6,  7, 12, 13, 14, 15},
    epu8 { 0,  1,  4,  5,  8,  9, 12, 13,  2,  3,  6,  7, 10, 11, 14, 15},
    epu8 { 0,  2,  4,  6,  8, 10, 12, 14,  1,  3,  5,  7,  9, 11, 13, 15}
    
}}

◆ masks

std::array<epu8, 4> HPCombi::masks

constexpr

Initial value:

{{
    
      {FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0}, 
      {FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1}, 
      {FF,FF,FF,FF, 2, 2, 2, 2,FF,FF,FF,FF, 2, 2, 2, 2}, 
      {FF,FF,FF,FF,FF,FF,FF,FF, 3, 3, 3, 3, 3, 3, 3, 3}  
    }}

◆ merge_rounds

std::array<epu8, 6> HPCombi::merge_rounds

constexpr

Initial value:

{{
    epu8 { 8,  9, 10, 11, 12, 13, 14, 15,  0,  1,  2,  3,  4,  5,  6,  7},
    epu8 { 4,  5,  6,  7,  0,  1,  2,  3, 12, 13, 14, 15,  8,  9, 10, 11},
    epu8 { 2,  3,  0,  1,  6,  7,  4,  5, 10, 11,  8,  9, 14, 15, 12, 13},
    epu8 { 1,  0,  3,  2,  5,  4,  7,  6,  9,  8, 11, 10, 13, 12, 15, 14},
}}

◆ mining_rounds

std::array<epu8, 4> HPCombi::mining_rounds

constexpr

Initial value:

{{
    
    
    epu8 {  0,  0,  2,  2,  4,  4,  6,  6,  8,  8, 10, 10, 12, 12, 14, 14},
    epu8 {  0,  1,  1,  1,  4,  5,  5,  5,  8,  9,  9,  9, 12, 13, 13, 13},
    epu8 {  0,  1,  2,  3,  3,  3,  3,  3,  8,  9, 10, 11, 11, 11, 11, 11},
    epu8 {  0,  1,  2,  3,  4,  5,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7}
    
}}

◆ prime

uint64_t HPCombi::prime = 0x9e3779b97f4a7bb9

constexpr

A prime number good for hashing.

◆ sorting_rounds

std::array<epu8, 9> HPCombi::sorting_rounds

constexpr

Initial value:

{{epu8{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
      epu8{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
      epu8{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
      epu8{8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7},
      epu8{0, 2, 1, 12, 8, 10, 9, 11, 4, 6, 5, 7, 3, 14, 13, 15},
      epu8{0, 4, 8, 10, 1, 9, 12, 13, 2, 5, 3, 14, 6, 7, 11, 15},
      epu8{0, 1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 14, 15},
      epu8{0, 1, 2, 6, 4, 8, 3, 10, 5, 12, 7, 11, 9, 13, 14, 15},
      epu8{0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 13, 14, 15}}}

A 16-way sorting network.

Sorting network from Knuth [AoCP3] Fig. 51 p 229. used by the sorted function

[AoCP3]: "D. Knuth, The art of computer programming vol. 3"

◆ sorting_rounds8

std::array<epu8, 6> HPCombi::sorting_rounds8

constexpr

Initial value:

{{
    epu8 { 1,  0,  3,  2,  5,  4,  7,  6,  9,  8, 11, 10, 13, 12, 15, 14},
    epu8 { 2,  3,  0,  1,  6,  7,  4,  5, 10, 11,  8,  9, 14, 15, 12, 13},
    epu8 { 0,  2,  1,  3,  4,  6,  5,  7,  8, 10,  9, 11, 12, 14, 13, 15},
    epu8 { 4,  5,  6,  7,  0,  1,  2,  3, 12, 13, 14, 15,  8,  9, 10, 11},
    epu8 { 0,  1,  4,  5,  2,  3,  6,  7,  8,  9, 12, 13, 10, 11, 14, 15},
    epu8 { 0,  2,  1,  4,  3,  6,  5,  7,  8, 10,  9, 12, 11, 14, 13, 15}
}}

A duplicated 8-way sorting network.

Batcher odd-Even mergesort sorting network used by the sorted function

◆ summing_rounds

std::array<epu8, 4> HPCombi::summing_rounds

constexpr

Initial value:

{{
    
    
    epu8 { FF,  0, FF,  2, FF,  4, FF,  6, FF,  8, FF, 10, FF, 12, FF, 14},
    epu8 { FF, FF,  1,  1, FF, FF,  5,  5, FF, FF,  9,  9, FF, FF, 13, 13},
    epu8 { FF, FF, FF, FF,  3,  3,  3,  3, FF, FF, FF, FF, 11, 11, 11, 11},
    epu8 { FF, FF, FF, FF, FF, FF, FF, FF,  7,  7,  7,  7,  7,  7,  7,  7}
    
}}

Permutation Round for partial and horizontal sums.

Namespaces

Classes

Typedefs

Functions

Variables

Typedef Documentation

◆ epu64

◆ epu8

◆ xpu16

◆ xpu64

Function Documentation

◆ as_array() [1/2]

◆ as_array() [2/2]

◆ as_VectGeneric() [1/2]

◆ as_VectGeneric() [2/2]

◆ equal()

◆ eval16()

◆ eval16_arr()

◆ eval16_cycle()

◆ eval16_gen()

◆ eval16_popcount()

◆ eval16_ref()

◆ first_diff()

◆ first_diff_mask()

◆ first_diff_ref()

◆ first_mask()

◆ first_non_zero()

◆ first_zero()

◆ horiz_max()

◆ horiz_max3()

◆ horiz_max4()

◆ horiz_max_gen()

◆ horiz_max_ref()

◆ horiz_min()

◆ horiz_min3()

◆ horiz_min4()

◆ horiz_min_gen()

◆ horiz_min_ref()

◆ horiz_sum()

◆ horiz_sum3()

◆ horiz_sum4()

◆ horiz_sum_gen()

◆ horiz_sum_ref()

◆ is_all_one()

◆ is_all_zero()

◆ is_partial_permutation()

◆ is_partial_transformation()

◆ is_permutation()

◆ is_permutation_eval()

◆ is_permutation_sort()

◆ is_sorted()

◆ is_transformation()

◆ last_diff()

◆ last_diff_mask()

◆ last_diff_ref()

◆ last_mask()

◆ last_non_zero()

◆ last_zero()

◆ less()

◆ less_partial()

◆ max()

◆ merge()

◆ merge_rev()

◆ min()

◆ network_sort()

◆ network_sort_perm()

◆ not_equal()

◆ operator""_u8()

◆ partial_max()

◆ partial_max_gen()

◆ partial_max_ref()

◆ partial_max_round()

◆ partial_min()

◆ partial_min_gen()

◆ partial_min_ref()

◆ partial_min_round()

◆ partial_sums()

◆ partial_sums_gen()

◆ partial_sums_ref()

◆ partial_sums_round()