HPCombi
High Performance Combinatorics in C++ using vector instructions v1.0.3
Loading...
Searching...
No Matches
perm16_impl.hpp
Go to the documentation of this file.
1//****************************************************************************//
2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
3// //
4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
5// //
6// HP-Combi is free software: you can redistribute it and/or modify it //
7// under the terms of the GNU General Public License as published by the //
8// Free Software Foundation, either version 3 of the License, or //
9// (at your option) any later version. //
10// //
11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
14// for more details. //
15// //
16// You should have received a copy of the GNU General Public License along //
17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
18//****************************************************************************//
19
20// NOLINT(build/header_guard)
21
25
26namespace HPCombi {
27inline PTransf16::PTransf16(std::initializer_list<uint8_t> il)
28 : Vect16(Epu8.id()) {
29 HPCOMBI_ASSERT(il.size() <= 16);
30 std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin());
31}
32
33inline PTransf16::PTransf16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
34 size_t /*unused */)
35 : Vect16(Epu8(0xFF)) {
36 HPCOMBI_ASSERT(dom.size() == rng.size());
37 HPCOMBI_ASSERT(dom.size() <= 16);
38 for (size_t i = 0; i < dom.size(); ++i) {
39 HPCOMBI_ASSERT(dom[i] < 16);
40 v[dom[i]] = rng[i];
41 }
42}
43
44inline epu8 PTransf16::domain_mask(bool complement) const {
45 return complement ? v == Epu8(0xFF) : v != Epu8(0xFF);
46}
47inline uint32_t PTransf16::domain_bitset(bool complement) const {
48 return simde_mm_movemask_epi8(domain_mask(complement));
49}
51 return domain_mask(true) | Epu8.id();
52}
53
54#ifdef SIMDE_X86_SSE4_2_NATIVE
55inline epu8 PTransf16::image_mask_cmpestrm(bool complement) const {
56 return complement ? _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT)
57 : _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT_COMPL);
58}
59#endif
60inline epu8 PTransf16::image_mask_ref(bool complement) const {
61 epu8 res{};
62 for (auto x : *this)
63 if (x != 0xFF)
64 res[x] = 0xFF;
65 return complement ? static_cast<epu8>(!res) : res;
66}
67
68inline uint32_t PTransf16::image_bitset(bool complement) const {
69 return simde_mm_movemask_epi8(image_mask(complement));
70}
72 return image_mask(true) | Epu8.id();
73}
74inline uint32_t PTransf16::rank_ref() const {
75 decltype(Epu8)::array tmp{};
76 static_assert(decltype(Epu8)::size == 16, "Wrong size of EPU8 array");
77 for (auto x : *this)
78 if (x != 0xFF)
79 tmp[x] = 1;
80 return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0));
81}
82
83inline uint32_t PTransf16::rank_cmpestrm() const {
84 return __builtin_popcountl(image_bitset());
85}
86
87inline uint32_t PTransf16::rank() const {
88#ifdef SIMDE_X86_SSE4_2_NATIVE
89 return rank_cmpestrm();
90#else
91 return rank_ref();
92#endif
93}
94
95inline epu8 PTransf16::fix_points_mask(bool complement) const {
96 return complement ? v != one().v : v == one().v;
97}
98inline uint32_t PTransf16::fix_points_bitset(bool complement) const {
99 return simde_mm_movemask_epi8(fix_points_mask(complement));
100}
101
102inline uint8_t PTransf16::smallest_fix_point() const {
103 return __builtin_ffs(fix_points_bitset(false)) - 1;
104}
105
106inline uint8_t PTransf16::smallest_moved_point() const {
107 return __builtin_ffs(fix_points_bitset(true)) - 1;
108}
109
110inline uint8_t PTransf16::largest_fix_point() const {
111 uint32_t res = fix_points_bitset(false);
112 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
113}
114
115inline uint8_t PTransf16::largest_moved_point() const {
116 uint32_t res = fix_points_bitset(true);
117 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
118}
119
120inline uint8_t PTransf16::nb_fix_points() const {
121 return __builtin_popcountl(fix_points_bitset());
122}
123
124inline static constexpr uint8_t hilo_exchng_fun(uint8_t i) {
125 return i < 8 ? i + 8 : i - 8;
126}
127static constexpr epu8 hilo_exchng = Epu8(hilo_exchng_fun);
128inline static constexpr uint8_t hilo_mask_fun(uint8_t i) {
129 return i < 8 ? 0x0 : 0xFF;
130}
131static constexpr epu8 hilo_mask = Epu8(hilo_mask_fun);
132
133inline Transf16::Transf16(uint64_t compressed) {
134 epu8 res = simde_mm_set_epi64x(compressed, compressed);
135 v = simde_mm_blendv_epi8(res & Epu8(0x0F), res >> 4, hilo_mask);
136}
137
138inline Transf16::operator uint64_t() const {
139 epu8 res =
140 static_cast<epu8>(simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4));
141 res = HPCombi::permuted(res, hilo_exchng) + v;
142 return simde_mm_extract_epi64(res, 0);
143}
144
146 epu8 res = Epu8(0xFF);
147 for (size_t i = 0; i < 16; ++i)
148 if (v[i] < 16)
149 res[v[i]] = i;
150 return res;
151}
152
153#ifdef SIMDE_X86_SSE4_2_NATIVE
154inline PPerm16 PPerm16::inverse_find() const {
155 epu8 mask = _mm_cmpestrm(v, 16, one(), 16, FIND_IN_VECT);
156 return permutation_of(v, one()) | mask;
157}
158#endif
159
160inline Perm16 Perm16::random(uint64_t n) {
161 static std::random_device rd;
162 static std::mt19937 g(rd());
163
164 Perm16 res = one();
165 auto ar = res.as_array();
166
167 std::shuffle(ar.begin(), ar.begin() + n, g);
168 return res;
169}
170
171// From Ruskey : Combinatorial Generation page 138
172inline Perm16 Perm16::unrankSJT(int n, int r) {
173 int j;
174 std::array<int, 16> dir;
175 epu8 res{};
176 for (j = 0; j < n; ++j)
177 res[j] = 0xFF;
178 for (j = n - 1; j >= 0; --j) {
179 int k, rem, c;
180 rem = r % (j + 1);
181 r = r / (j + 1);
182 if ((r & 1) != 0) {
183 k = -1;
184 dir[j] = +1;
185 } else {
186 k = n;
187 dir[j] = -1;
188 }
189 c = -1;
190 do {
191 k = k + dir[j];
192 if (res[k] == 0xFF)
193 ++c;
194 } while (c < rem);
195 res[k] = j;
196 }
197 return res;
198}
199
201 HPCOMBI_ASSERT(i < 16);
202 epu8 res = one();
203 res[i] = i + 1;
204 res[i + 1] = i;
205 return res;
206}
207
209 epu8 res;
210 for (size_t i = 0; i < 16; ++i)
211 res[v[i]] = i;
212 return res;
213}
214
216 epu8 res;
217 auto &arres = HPCombi::as_array(res);
218 auto self = as_array();
219 for (size_t i = 0; i < 16; ++i)
220 arres[self[i]] = i;
221 return res;
222}
223
225 // G++-7 compile this shift by 3 additions.
226 // epu8 res = (v << 4) + one().v;
227 // I call directly the shift intrinsic
228 epu8 res = static_cast<epu8>(
229 simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4)) +
230 one().v;
231 res = sorted(res) & Epu8(0x0F);
232 return res;
233}
234
235// We declare PERM16 as a correct Monoid
236namespace power_helper {
237
238// TODO required?
240
241template <> struct Monoid<Perm16> {
242 static const Perm16 one() { return Perm16::one(); }
243 static Perm16 prod(Perm16 a, Perm16 b) { return a * b; }
244};
245
246} // namespace power_helper
247
249 Perm16 res = one();
250 Perm16 newpow = pow<8>(*this);
251 for (int i = 9; i <= 16; i++) {
252 Perm16 oldpow = newpow;
253 newpow = oldpow * *this;
254 res.v = simde_mm_blendv_epi8(res, oldpow, newpow.v == one().v);
255 }
256 return res;
257}
258
259static constexpr uint32_t lcm_range(uint8_t n) {
260 uint32_t res = 1;
261 for (uint8_t i = 1; i <= n; ++i)
262 res = std::lcm(res, i);
263 return res;
264}
265
267 return pow<lcm_range(16) - 1>(*this);
268}
269
270inline epu8 Perm16::lehmer_ref() const {
271 epu8 res{};
272 for (size_t i = 0; i < 16; i++)
273 for (size_t j = i + 1; j < 16; j++)
274 if (v[i] > v[j])
275 res[i]++;
276 return res;
277}
278
279inline epu8 Perm16::lehmer_arr() const {
280 decltype(Epu8)::array res{};
281 decltype(Epu8)::array ar = as_array();
282 for (size_t i = 0; i < 16; i++)
283 for (size_t j = i + 1; j < 16; j++)
284 if (ar[i] > ar[j])
285 res[i]++;
286 return Epu8(res);
287}
288
289inline epu8 Perm16::lehmer() const {
290 epu8 vsh = v, res = -one().v;
291 for (int i = 1; i < 16; i++) {
292 vsh = shifted_left(vsh);
293 res -= (v >= vsh);
294 }
295 return res;
296}
297
298inline uint8_t Perm16::length_ref() const {
299 uint8_t res = 0;
300 for (size_t i = 0; i < 16; i++)
301 for (size_t j = i + 1; j < 16; j++)
302 if (v[i] > v[j])
303 res++;
304 return res;
305}
306
307inline uint8_t Perm16::length_arr() const {
308 uint8_t res = 0;
309 decltype(Epu8)::array ar = as_array();
310 for (size_t i = 0; i < 16; i++)
311 for (size_t j = i + 1; j < 16; j++)
312 if (ar[i] > ar[j])
313 res++;
314 return res;
315}
316
317inline uint8_t Perm16::length() const { return horiz_sum(lehmer()); }
318
319inline uint8_t Perm16::nb_descents_ref() const {
320 uint8_t res = 0;
321 for (size_t i = 0; i < 16 - 1; i++)
322 if (v[i] > v[i + 1])
323 res++;
324 return res;
325}
326inline uint8_t Perm16::nb_descents() const {
327 return __builtin_popcountl(simde_mm_movemask_epi8(v < shifted_right(v)));
328}
329
330inline uint8_t Perm16::nb_cycles_ref() const {
331 std::array<bool, 16> b{};
332 uint8_t c = 0;
333 for (size_t i = 0; i < 16; i++) {
334 if (!b[i]) {
335 for (size_t j = i; !b[j]; j = v[j])
336 b[j] = true;
337 c++;
338 }
339 }
340 return c;
341}
342
344 epu8 x0, x1 = one();
345 Perm16 p = *this;
346 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
347 p = p * p;
348 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
349 p = p * p;
350 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
351 p = p * p;
352 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
353 return x1;
354}
355
356inline uint8_t Perm16::nb_cycles_unroll() const {
357 epu8 res = (Epu8.id() == cycles_partition());
358 return __builtin_popcountl(simde_mm_movemask_epi8(res));
359}
360
361inline bool Perm16::left_weak_leq_ref(Perm16 other) const {
362 for (size_t i = 0; i < 16; i++) {
363 for (size_t j = i + 1; j < 16; j++) {
364 if ((v[i] > v[j]) && (other[i] < other[j]))
365 return false;
366 }
367 }
368 return true;
369}
370
371inline bool Perm16::left_weak_leq(Perm16 other) const {
372 epu8 srot = v, orot = other;
373 for (size_t i = 0; i < 15; i++) {
374 srot = shifted_right(srot);
375 orot = shifted_right(orot);
376 uint64_t sinv = simde_mm_movemask_epi8(v < srot);
377 uint64_t oinv = simde_mm_movemask_epi8(other.v < orot);
378 if ((sinv & oinv) != sinv)
379 return false;
380 }
381 return true;
382}
383
384inline bool Perm16::left_weak_leq_length(Perm16 other) const {
385 Perm16 prod = *this * other.inverse();
386 return other.length() == length() + prod.length();
387}
388
389} // namespace HPCombi
const PTransf16 id
Definition RD.cpp:37
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:31
std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
Definition image.cpp:66
Definition perm16_impl.hpp:236
Perm16 Perm16
Definition perm16_impl.hpp:239
Definition bmat16.hpp:39
epu8 permuted(epu8 a, epu8 b) noexcept
Same as permuted_ref but with an optimized implementation using intrinsics.
Definition epu8.hpp:103
epu8 shifted_right(epu8 a) noexcept
Left shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:110
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of another one.
Definition epu8_impl.hpp:304
uint8_t horiz_sum(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:260
epu8 sorted(epu8 a) noexcept
Return a sorted HPCombi::epu8.
Definition epu8_impl.hpp:204
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:81
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:73
epu8 shifted_left(epu8 a) noexcept
Right shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:117
TPUBuild< TPU >::array & as_array(TPU &v) noexcept
Cast a TPU to a c++ std::array.
Definition builder.hpp:145
const T pow(const T x)
A generic compile time exponentiation function.
Definition power.hpp:91
Partial permutation of ; see also HPCombi::Perm16; partial means it might not be defined everywhere (...
Definition perm16.hpp:176
PPerm16 inverse_ref() const
The inverse of a partial permutation.
Definition perm16_impl.hpp:145
PPerm16()=default
static constexpr PPerm16 one()
The identity partial permutations.
Definition perm16.hpp:194
uint8_t nb_fix_points() const
Returns the number of fix points of *this.
Definition perm16_impl.hpp:120
uint32_t fix_points_bitset(bool complement=false) const
Returns a bit mask for the fix point of *this.
Definition perm16_impl.hpp:98
static constexpr size_t size()
Definition perm16.hpp:71
static constexpr PTransf16 one()
The identity partial transformation.
Definition perm16.hpp:90
uint8_t largest_moved_point() const
Returns the largest non fix point of *this.
Definition perm16_impl.hpp:115
uint32_t domain_bitset(bool complement=false) const
Returns a bit mask for the domain of *this.
Definition perm16_impl.hpp:47
PTransf16 left_one() const
Returns the partial left identity for *this.
Definition perm16_impl.hpp:71
typename decltype(Epu8)::array array
Definition perm16.hpp:74
uint32_t rank_ref() const
Returns the size of the image of *this.
Definition perm16_impl.hpp:74
PTransf16 right_one() const
Returns the partial right identity for *this.
Definition perm16_impl.hpp:50
uint32_t image_bitset(bool complement=false) const
Returns a bit mask for the image of *this.
Definition perm16_impl.hpp:68
epu8 fix_points_mask(bool complement=false) const
Returns a mask for the fix point of *this.
Definition perm16_impl.hpp:95
uint8_t smallest_fix_point() const
Returns the smallest fix point of *this.
Definition perm16_impl.hpp:102
uint32_t rank_cmpestrm() const
Returns the size of the image of *this.
Definition perm16_impl.hpp:83
epu8 domain_mask(bool complement=false) const
Returns a mask for the domain of *this.
Definition perm16_impl.hpp:44
uint32_t rank() const
Returns the size of the image of *this.
Definition perm16_impl.hpp:87
epu8 image_mask_ref(bool complement=false) const
Returns a mask for the image of *this.
Definition perm16_impl.hpp:60
uint8_t largest_fix_point() const
Returns the largest fix point of *this.
Definition perm16_impl.hpp:110
epu8 image_mask(bool complement=false) const
Definition perm16.hpp:100
epu8 image_mask_cmpestrm(bool complement=false) const
Returns a mask for the image of *this.
uint8_t smallest_moved_point() const
Returns the smallest non fix point of *this.
Definition perm16_impl.hpp:106
Perm16 inverse_cycl() const
Same as inverse but with a different algorithm.
Definition perm16_impl.hpp:248
Perm16 inverse() const
The inverse permutation.
Definition perm16.hpp:274
epu8 lehmer() const
The Lehmer code of a permutation.
Definition perm16_impl.hpp:289
uint8_t length_ref() const
Same interface as length, with a different implementation.
Definition perm16_impl.hpp:298
epu8 cycles_partition() const
The set partition of the cycles of a permutation.
Definition perm16_impl.hpp:343
bool left_weak_leq_ref(Perm16 other) const
Same interface as left_weak_leq but with a different implementation.
Definition perm16_impl.hpp:361
uint8_t nb_descents_ref() const
Same interface as nb_descents, with a different implementation.
Definition perm16_impl.hpp:319
Perm16 inverse_sort() const
Same as inverse but with a different algorithm.
Definition perm16_impl.hpp:224
static constexpr Perm16 one()
The identity partial permutation.
Definition perm16.hpp:252
epu8 lehmer_ref() const
Same interface as lehmer but with a different implementation.
Definition perm16_impl.hpp:270
bool left_weak_leq_length(Perm16 other) const
Same interface as left_weak_leq but with a different implementation.
Definition perm16_impl.hpp:384
uint8_t length() const
The Coxeter length (ie: number of inversion) of a permutation.
Definition perm16_impl.hpp:317
Perm16 inverse_ref() const
Same as inverse but with a different algorithm.
Definition perm16_impl.hpp:208
uint8_t nb_descents() const
The number of descent of a permutation.
Definition perm16_impl.hpp:326
uint8_t nb_cycles_ref() const
Same interface as nb_cycles but with a different implementation.
Definition perm16_impl.hpp:330
static Perm16 elementary_transposition(uint64_t i)
The elementary transposition exchanging and .
Definition perm16_impl.hpp:200
epu8 lehmer_arr() const
Same interface as lehmer but with a different implementation.
Definition perm16_impl.hpp:279
static Perm16 unrankSJT(int n, int r)
The r -th permutation of size n for the Steinhaus–Johnson–Trotter order.
Definition perm16_impl.hpp:172
bool left_weak_leq(Perm16 other) const
Compare two permutations for the left weak order.
Definition perm16_impl.hpp:371
uint8_t nb_cycles_unroll() const
Same interface as nb_cycles but with a different implementation.
Definition perm16_impl.hpp:356
Perm16()=default
Perm16 inverse_pow() const
Same as inverse but with a different algorithm.
Definition perm16_impl.hpp:266
uint8_t length_arr() const
Same interface as length, with a different implementation.
Definition perm16_impl.hpp:307
Perm16 inverse_arr() const
Same as inverse but with a different algorithm.
Definition perm16_impl.hpp:215
static Perm16 random(uint64_t n=16)
A random permutation of size .
Definition perm16_impl.hpp:160
Transf16()=default
Vect16()=default
array & as_array()
Definition vect16.hpp:50
epu8 v
Definition vect16.hpp:42
static const Perm16 one()
Definition perm16_impl.hpp:242
static Perm16 prod(Perm16 a, Perm16 b)
Definition perm16_impl.hpp:243
Algebraic monoid structure used by default for type T by the pow function and prod function.
Definition power.hpp:111