HPCombi
High Performance Combinatorics in C++ using vector instructions v1.0.0
Loading...
Searching...
No Matches
perm16_impl.hpp
Go to the documentation of this file.
1//****************************************************************************//
2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
3// //
4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
5// //
6// HP-Combi is free software: you can redistribute it and/or modify it //
7// under the terms of the GNU General Public License as published by the //
8// Free Software Foundation, either version 3 of the License, or //
9// (at your option) any later version. //
10// //
11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
14// for more details. //
15// //
16// You should have received a copy of the GNU General Public License along //
17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
18//****************************************************************************//
19
20// NOLINT(build/header_guard)
21
22namespace HPCombi {
23
25// Implementation part for inline functions //////////////////////////////////
27
28inline PTransf16::PTransf16(std::initializer_list<uint8_t> il)
29 : Vect16(Epu8.id()) {
30 HPCOMBI_ASSERT(il.size() <= 16);
31 std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin());
32}
33
34inline PTransf16::PTransf16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
35 size_t /*unused */)
36 : Vect16(Epu8(0xFF)) {
37 HPCOMBI_ASSERT(dom.size() == rng.size());
38 HPCOMBI_ASSERT(dom.size() <= 16);
39 for (size_t i = 0; i < dom.size(); ++i) {
40 HPCOMBI_ASSERT(dom[i] < 16);
41 v[dom[i]] = rng[i];
42 }
43}
44
45inline epu8 PTransf16::domain_mask(bool complement) const {
46 return complement ? v == Epu8(0xFF) : v != Epu8(0xFF);
47}
48inline uint32_t PTransf16::domain_bitset(bool complement) const {
49 return simde_mm_movemask_epi8(domain_mask(complement));
50}
52 return domain_mask(true) | Epu8.id();
53}
54
55#ifdef SIMDE_X86_SSE4_2_NATIVE
56inline epu8 PTransf16::image_mask_cmpestrm(bool complement) const {
57 return complement ? _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT)
58 : _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT_COMPL);
59}
60#endif
61inline epu8 PTransf16::image_mask_ref(bool complement) const {
62 epu8 res{};
63 for (auto x : *this)
64 if (x != 0xFF)
65 res[x] = 0xFF;
66 return complement ? static_cast<epu8>(!res) : res;
67}
68
69inline uint32_t PTransf16::image_bitset(bool complement) const {
70 return simde_mm_movemask_epi8(image_mask(complement));
71}
73 return image_mask(true) | Epu8.id();
74}
75inline uint32_t PTransf16::rank_ref() const {
76 decltype(Epu8)::array tmp{};
77 static_assert(decltype(Epu8)::size == 16, "Wrong size of EPU8 array");
78 for (auto x : *this)
79 if (x != 0xFF)
80 tmp[x] = 1;
81 return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0));
82}
83
84inline uint32_t PTransf16::rank_cmpestrm() const {
85 return __builtin_popcountl(image_bitset());
86}
87
88inline uint32_t PTransf16::rank() const {
89#ifdef SIMDE_X86_SSE4_2_NATIVE
90 return rank_cmpestrm();
91#else
92 return rank_ref();
93#endif
94}
95
96inline epu8 PTransf16::fix_points_mask(bool complement) const {
97 return complement ? v != one().v : v == one().v;
98}
99inline uint32_t PTransf16::fix_points_bitset(bool complement) const {
100 return simde_mm_movemask_epi8(fix_points_mask(complement));
101}
102
103inline uint8_t PTransf16::smallest_fix_point() const {
104 return __builtin_ffs(fix_points_bitset(false)) - 1;
105}
107inline uint8_t PTransf16::smallest_moved_point() const {
108 return __builtin_ffs(fix_points_bitset(true)) - 1;
109}
111inline uint8_t PTransf16::largest_fix_point() const {
112 uint32_t res = fix_points_bitset(false);
113 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
114}
116inline uint8_t PTransf16::largest_moved_point() const {
117 uint32_t res = fix_points_bitset(true);
118 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
119}
121inline uint8_t PTransf16::nb_fix_points() const {
122 return __builtin_popcountl(fix_points_bitset());
123}
124
125inline static constexpr uint8_t hilo_exchng_fun(uint8_t i) {
126 return i < 8 ? i + 8 : i - 8;
127}
128static constexpr epu8 hilo_exchng = Epu8(hilo_exchng_fun);
129inline static constexpr uint8_t hilo_mask_fun(uint8_t i) {
130 return i < 8 ? 0x0 : 0xFF;
131}
132static constexpr epu8 hilo_mask = Epu8(hilo_mask_fun);
133
134inline Transf16::Transf16(uint64_t compressed) {
135 epu8 res = simde_mm_set_epi64x(compressed, compressed);
136 v = simde_mm_blendv_epi8(res & Epu8(0x0F), res >> 4, hilo_mask);
137}
138
139inline Transf16::operator uint64_t() const {
140 epu8 res =
141 static_cast<epu8>(simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4));
142 res = HPCombi::permuted(res, hilo_exchng) + v;
143 return simde_mm_extract_epi64(res, 0);
144}
145
147 epu8 res = Epu8(0xFF);
148 for (size_t i = 0; i < 16; ++i)
149 if (v[i] < 16)
150 res[v[i]] = i;
151 return res;
152}
153
154#ifdef SIMDE_X86_SSE4_2_NATIVE
155inline PPerm16 PPerm16::inverse_find() const {
156 epu8 mask = _mm_cmpestrm(v, 16, one(), 16, FIND_IN_VECT);
157 return permutation_of(v, one()) | mask;
158}
159#endif
160
161inline Perm16 Perm16::random(uint64_t n) {
162 static std::random_device rd;
163 static std::mt19937 g(rd());
164
165 Perm16 res = one();
166 auto ar = res.as_array();
167
168 std::shuffle(ar.begin(), ar.begin() + n, g);
169 return res;
170}
171
172// From Ruskey : Combinatorial Generation page 138
173inline Perm16 Perm16::unrankSJT(int n, int r) {
174 int j;
175 std::array<int, 16> dir;
176 epu8 res{};
177 for (j = 0; j < n; ++j)
178 res[j] = 0xFF;
179 for (j = n - 1; j >= 0; --j) {
180 int k, rem, c;
181 rem = r % (j + 1);
182 r = r / (j + 1);
183 if ((r & 1) != 0) {
184 k = -1;
185 dir[j] = +1;
186 } else {
187 k = n;
188 dir[j] = -1;
189 }
190 c = -1;
191 do {
192 k = k + dir[j];
193 if (res[k] == 0xFF)
194 ++c;
195 } while (c < rem);
196 res[k] = j;
197 }
198 return res;
199}
200
202 HPCOMBI_ASSERT(i < 16);
203 epu8 res = one();
204 res[i] = i + 1;
205 res[i + 1] = i;
206 return res;
207}
208
210 epu8 res;
211 for (size_t i = 0; i < 16; ++i)
212 res[v[i]] = i;
213 return res;
214}
215
217 epu8 res;
218 auto &arres = HPCombi::as_array(res);
219 auto self = as_array();
220 for (size_t i = 0; i < 16; ++i)
221 arres[self[i]] = i;
222 return res;
223}
224
226 // G++-7 compile this shift by 3 additions.
227 // epu8 res = (v << 4) + one().v;
228 // I call directly the shift intrinsic
229 epu8 res = static_cast<epu8>(
230 simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4)) +
231 one().v;
232 res = sorted(res) & Epu8(0x0F);
233 return res;
234}
235
236// We declare PERM16 as a correct Monoid
237namespace power_helper {
238
239// TODO required?
241
242template <> struct Monoid<Perm16> {
243 static const Perm16 one() { return Perm16::one(); }
244 static Perm16 prod(Perm16 a, Perm16 b) { return a * b; }
245};
246
247} // namespace power_helper
248
250 Perm16 res = one();
251 Perm16 newpow = pow<8>(*this);
252 for (int i = 9; i <= 16; i++) {
253 Perm16 oldpow = newpow;
254 newpow = oldpow * *this;
255 res.v = simde_mm_blendv_epi8(res, oldpow, newpow.v == one().v);
256 }
257 return res;
258}
259
260static constexpr uint32_t lcm_range(uint8_t n) {
261 uint32_t res = 1;
262 for (uint8_t i = 1; i <= n; ++i)
263 res = std::lcm(res, i);
264 return res;
265}
266
268 return pow<lcm_range(16) - 1>(*this);
269}
270
271inline epu8 Perm16::lehmer_ref() const {
272 epu8 res{};
273 for (size_t i = 0; i < 16; i++)
274 for (size_t j = i + 1; j < 16; j++)
275 if (v[i] > v[j])
276 res[i]++;
277 return res;
278}
279
280inline epu8 Perm16::lehmer_arr() const {
281 decltype(Epu8)::array res{};
282 decltype(Epu8)::array ar = as_array();
283 for (size_t i = 0; i < 16; i++)
284 for (size_t j = i + 1; j < 16; j++)
285 if (ar[i] > ar[j])
286 res[i]++;
287 return Epu8(res);
288}
289
290inline epu8 Perm16::lehmer() const {
291 epu8 vsh = v, res = -one().v;
292 for (int i = 1; i < 16; i++) {
293 vsh = shifted_left(vsh);
294 res -= (v >= vsh);
295 }
296 return res;
297}
298
299inline uint8_t Perm16::length_ref() const {
300 uint8_t res = 0;
301 for (size_t i = 0; i < 16; i++)
302 for (size_t j = i + 1; j < 16; j++)
303 if (v[i] > v[j])
304 res++;
305 return res;
306}
307
308inline uint8_t Perm16::length_arr() const {
309 uint8_t res = 0;
310 decltype(Epu8)::array ar = as_array();
311 for (size_t i = 0; i < 16; i++)
312 for (size_t j = i + 1; j < 16; j++)
313 if (ar[i] > ar[j])
314 res++;
315 return res;
316}
317
318inline uint8_t Perm16::length() const { return horiz_sum(lehmer()); }
319
320inline uint8_t Perm16::nb_descents_ref() const {
321 uint8_t res = 0;
322 for (size_t i = 0; i < 16 - 1; i++)
323 if (v[i] > v[i + 1])
324 res++;
325 return res;
326}
327inline uint8_t Perm16::nb_descents() const {
328 return __builtin_popcountl(simde_mm_movemask_epi8(v < shifted_right(v)));
329}
330
331inline uint8_t Perm16::nb_cycles_ref() const {
332 std::array<bool, 16> b{};
333 uint8_t c = 0;
334 for (size_t i = 0; i < 16; i++) {
335 if (!b[i]) {
336 for (size_t j = i; !b[j]; j = v[j])
337 b[j] = true;
338 c++;
339 }
340 }
341 return c;
342}
343
345 epu8 x0, x1 = one();
346 Perm16 p = *this;
347 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
348 p = p * p;
349 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
350 p = p * p;
351 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
352 p = p * p;
353 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
354 return x1;
355}
356
357inline uint8_t Perm16::nb_cycles_unroll() const {
358 epu8 res = (Epu8.id() == cycles_partition());
359 return __builtin_popcountl(simde_mm_movemask_epi8(res));
360}
361
362inline bool Perm16::left_weak_leq_ref(Perm16 other) const {
363 for (size_t i = 0; i < 16; i++) {
364 for (size_t j = i + 1; j < 16; j++) {
365 if ((v[i] > v[j]) && (other[i] < other[j]))
366 return false;
367 }
368 }
369 return true;
370}
371
372inline bool Perm16::left_weak_leq(Perm16 other) const {
373 epu8 srot = v, orot = other;
374 for (size_t i = 0; i < 15; i++) {
375 srot = shifted_right(srot);
376 orot = shifted_right(orot);
377 uint64_t sinv = simde_mm_movemask_epi8(v < srot);
378 uint64_t oinv = simde_mm_movemask_epi8(other.v < orot);
379 if ((sinv & oinv) != sinv)
380 return false;
381 }
382 return true;
383}
384
385inline bool Perm16::left_weak_leq_length(Perm16 other) const {
386 Perm16 prod = *this * other.inverse();
387 return other.length() == length() + prod.length();
388}
389
390} // namespace HPCombi
const PTransf16 id
Definition RD.cpp:37
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:28
std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
Definition image.cpp:66
Perm16 Perm16
Definition perm16_impl.hpp:240
Definition bmat8.hpp:41
epu8 permuted(epu8 a, epu8 b) noexcept
Permuting a HPCombi::epu8.
Definition epu8.hpp:72
epu8 shifted_right(epu8 a) noexcept
Left shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:78
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of one other.
Definition epu8_impl.hpp:303
uint8_t horiz_sum(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:217
epu8 sorted(epu8 a) noexcept
Return a sorted HPCombi::epu8.
Definition epu8_impl.hpp:203
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:53
uint8_t __attribute__((vector_size(16))) epu8
SIMD vector of 16 unsigned bytes.
Definition epu8.hpp:45
epu8 shifted_left(epu8 a) noexcept
Right shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:84
TPUBuild< TPU >::array & as_array(TPU &v) noexcept
Cast a TPU to a c++ std::array.
Definition builder.hpp:139
const T pow(const T x)
A generic compile time exponentiation function.
Definition power.hpp:83
Partial permutation of .
Definition perm16.hpp:149
PPerm16 inverse_ref() const
The inverse of a partial permutation.
Definition perm16_impl.hpp:146
static constexpr PPerm16 one()
The identity partial permutations.
Definition perm16.hpp:166
Partial transformation of .
Definition perm16.hpp:47
uint8_t nb_fix_points() const
Returns the number of fix points of *this.
Definition perm16_impl.hpp:121
uint32_t fix_points_bitset(bool complement=false) const
Returns a bit mask for the fix point of *this.
Definition perm16_impl.hpp:99
static constexpr size_t size()
Definition perm16.hpp:48
static constexpr PTransf16 one()
The identity partial transformation.
Definition perm16.hpp:67
uint8_t largest_moved_point() const
Returns the largest non fix point of *this.
Definition perm16_impl.hpp:116
uint32_t domain_bitset(bool complement=false) const
Returns a bit mask for the domain of *this.
Definition perm16_impl.hpp:48
PTransf16 left_one() const
Returns the partial left identity for *this.
Definition perm16_impl.hpp:72
typename decltype(Epu8)::array array
Definition perm16.hpp:51
uint32_t rank_ref() const
Returns the size of the image of *this.
Definition perm16_impl.hpp:75
PTransf16 right_one() const
Returns the partial right identity for *this.
Definition perm16_impl.hpp:51
uint32_t image_bitset(bool complement=false) const
Returns a bit mask for the image of *this.
Definition perm16_impl.hpp:69
epu8 fix_points_mask(bool complement=false) const
Returns a mask for the fix point of *this.
Definition perm16_impl.hpp:96
uint8_t smallest_fix_point() const
Returns the smallest fix point of *this.
Definition perm16_impl.hpp:103
uint32_t rank_cmpestrm() const
Returns the size of the image of *this.
Definition perm16_impl.hpp:84
epu8 domain_mask(bool complement=false) const
Returns a mask for the domain of *this.
Definition perm16_impl.hpp:45
uint32_t rank() const
Returns the size of the image of *this.
Definition perm16_impl.hpp:88
epu8 image_mask_ref(bool complement=false) const
Returns a mask for the image of *this.
Definition perm16_impl.hpp:61
uint8_t largest_fix_point() const
Returns the largest fix point of *this.
Definition perm16_impl.hpp:111
epu8 image_mask(bool complement=false) const
Definition perm16.hpp:77
epu8 image_mask_cmpestrm(bool complement=false) const
Returns a mask for the image of *this.
uint8_t smallest_moved_point() const
Returns the smallest non fix point of *this.
Definition perm16_impl.hpp:107
Permutations of .
Definition perm16.hpp:208
Perm16 inverse_cycl() const
The inverse permutation.
Definition perm16_impl.hpp:249
Perm16 inverse() const
The inverse permutation.
Definition perm16.hpp:283
epu8 lehmer() const
The Lehmer code of a permutation.
Definition perm16_impl.hpp:290
uint8_t length_ref() const
The Coxeter length (ie: number of inversion) of a permutation.
Definition perm16_impl.hpp:299
epu8 cycles_partition() const
The set partition of the cycles of a permutation.
Definition perm16_impl.hpp:344
bool left_weak_leq_ref(Perm16 other) const
Compare two permutations for the left weak order.
Definition perm16_impl.hpp:362
uint8_t nb_descents_ref() const
The number of descent of a permutation.
Definition perm16_impl.hpp:320
Perm16 inverse_sort() const
The inverse permutation.
Definition perm16_impl.hpp:225
static constexpr Perm16 one()
The identity partial permutation.
Definition perm16.hpp:223
epu8 lehmer_ref() const
The Lehmer code of a permutation.
Definition perm16_impl.hpp:271
bool left_weak_leq_length(Perm16 other) const
Compare two permutations for the left weak order.
Definition perm16_impl.hpp:385
uint8_t length() const
The Coxeter length (ie: number of inversion) of a permutation.
Definition perm16_impl.hpp:318
Perm16 inverse_ref() const
The inverse permutation.
Definition perm16_impl.hpp:209
uint8_t nb_descents() const
The number of descent of a permutation.
Definition perm16_impl.hpp:327
uint8_t nb_cycles_ref() const
The number of cycles of a permutation.
Definition perm16_impl.hpp:331
static Perm16 elementary_transposition(uint64_t i)
The elementary transposition exchanging and .
Definition perm16_impl.hpp:201
epu8 lehmer_arr() const
The Lehmer code of a permutation.
Definition perm16_impl.hpp:280
static Perm16 unrankSJT(int n, int r)
The r -th permutation of size n for the Steinhaus–Johnson–Trotter order.
Definition perm16_impl.hpp:173
bool left_weak_leq(Perm16 other) const
Compare two permutations for the left weak order.
Definition perm16_impl.hpp:372
uint8_t nb_cycles_unroll() const
The number of cycles of a permutation.
Definition perm16_impl.hpp:357
Perm16 inverse_pow() const
The inverse permutation.
Definition perm16_impl.hpp:267
uint8_t length_arr() const
The Coxeter length (ie: number of inversion) of a permutation.
Definition perm16_impl.hpp:308
Perm16 inverse_arr() const
The inverse permutation.
Definition perm16_impl.hpp:216
static Perm16 random(uint64_t n=16)
A random permutation of size .
Definition perm16_impl.hpp:161
Transf16()=default
Definition vect16.hpp:34
array & as_array()
Definition vect16.hpp:45
epu8 v
Definition vect16.hpp:37
static const Perm16 one()
Definition perm16_impl.hpp:243
static Perm16 prod(Perm16 a, Perm16 b)
Definition perm16_impl.hpp:244
Algebraic monoid structure used by default for type T by the pow function and prod function.
Definition power.hpp:103