28static_assert(std::is_trivial<BMat16>(),
"BMat16 is not a trivial class!");
30static constexpr xpu16 line{0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05,
31 0xe06, 0xf07, 0x800, 0x901, 0xa02, 0xb03,
32 0xc04, 0xd05, 0xe06, 0xf07};
33static constexpr xpu16 block{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705,
34 0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c,
35 0x301, 0x705, 0xb09, 0xf0d};
38 return simde_mm256_shuffle_epi8(vect, line);
42 return simde_mm256_shuffle_epi8(vect, block);
46 uint64_t n3)
noexcept {
47 xpu64 tmp{n0, n1, n2, n3};
54 std::array<uint64_t, 4> tmp = {0, 0, 0, 0};
55 for (
int i = mat.size() - 1; i >= 0; --i) {
57 tmp[i / 4] <<= 16 - mat.size();
58 for (
int j = mat[i].size() - 1; j >= 0; --j) {
59 tmp[i / 4] = (tmp[i / 4] << 1) | mat[i][j];
62 _data =
xpu64{tmp[0], tmp[1], tmp[2], tmp[3]};
66 return (_data[i / 4] >> (16 * (i % 4) + j)) & 1;
69inline void BMat16::set(
size_t i,
size_t j,
bool val)
noexcept {
73 a <<= 16 * (i % 4) + j;
74 xpu64 mask{(i / 4 == 0) * a, (i / 4 == 1) * a, (i / 4 == 2) * a,
76 _data ^= (-val ^ _data) & mask;
80 xpu64 tmp = _data ^ that._data;
81 return simde_mm256_testz_si256(tmp, tmp);
85 return _data[0] < that._data[0] ||
86 (_data[0] == that._data[0] &&
87 (_data[1] < that._data[1] ||
88 (_data[1] == that._data[1] &&
89 (_data[2] < that._data[2] ||
90 (_data[2] == that._data[2] && (_data[3] < that._data[3]))))));
94 return _data[0] > that._data[0] ||
95 (_data[0] == that._data[0] &&
96 (_data[1] > that._data[1] ||
97 (_data[1] == that._data[1] &&
98 (_data[2] > that._data[2] ||
99 (_data[2] == that._data[2] && (_data[3] > that._data[3]))))));
104 uint64_t a = tmp[0], b = tmp[1], c = tmp[2], d = tmp[3];
105 std::array<std::array<bool, 16>, 16>
res;
106 for (
size_t i = 0; i < 64; ++i) {
107 res[i / 8][i % 8] = a & 1;
109 res[i / 8][8 + i % 8] = b & 1;
111 res[8 + i / 8][i % 8] = c & 1;
113 res[8 + i / 8][8 + i % 8] = d & 1;
120 uint64_t a = 0, b = 0, c = 0, d = 0;
121 for (
int i = 7; i >= 0; --i) {
122 for (
int j = 7; j >= 0; --j) {
123 a = (a << 1) | (*
this)(j, i);
124 b = (b << 1) | (*
this)(j + 8, i);
125 c = (c << 1) | (*
this)(j, i + 8);
126 d = (d << 1) | (*
this)(j + 8, i + 8);
129 return BMat16(a, b, c, d);
134 xpu64 x = simde_mm256_set_epi64x(tmp[3], tmp[1], tmp[2], tmp[0]);
135 xpu64 y = (x ^ (x >> 7)) & (
xpu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA,
136 0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
137 x = x ^ y ^ (y << 7);
138 y = (x ^ (x >> 14)) &
139 (
xpu64{0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC});
140 x = x ^ y ^ (y << 14);
141 y = (x ^ (x >> 28)) &
142 (
xpu64{0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0});
143 x = x ^ y ^ (y << 28);
147static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c,
148 0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908,
149 0xb0a, 0xd0c, 0xf0e, 0x100};
153 xpu16 y1 = that._data;
154 xpu16 y2 = simde_mm256_set_epi64x(that._data[1], that._data[0],
155 that._data[3], that._data[2]);
156 xpu16 zero = simde_mm256_setzero_si256();
157 xpu16 data = simde_mm256_setzero_si256();
158 xpu16 diag1{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
159 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
160 xpu16 diag2{0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000,
161 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
162 for (
size_t i = 0; i < 8; ++i) {
163 data |= ((x & y1) != zero) & diag1;
164 data |= ((x & y2) != zero) & diag2;
165 y1 = simde_mm256_shuffle_epi8(y1, rot);
166 y2 = simde_mm256_shuffle_epi8(y2, rot);
167 diag1 = simde_mm256_shuffle_epi8(diag1, rot);
168 diag2 = simde_mm256_shuffle_epi8(diag2, rot);
174 BMat16 tmp = that.transpose();
176 BMat8 a1(t1[0]), b1(t1[1]), c1(t1[2]), d1(t1[3]),
a2(t2[0]), b2(t2[1]),
177 c2(t2[2]), d2(t2[3]);
178 return BMat16((
a1.mult_transpose(
a2) | b1.mult_transpose(b2)).to_int(),
179 (
a1.mult_transpose(c2) | b1.mult_transpose(d2)).to_int(),
180 (c1.mult_transpose(
a2) | d1.mult_transpose(b2)).to_int(),
181 (c1.mult_transpose(c2) | d1.mult_transpose(d2)).to_int());
185 uint64_t a = 0, b = 0, c = 0, d = 0;
186 for (
int i = 7; i >= 0; --i) {
187 for (
int j = 7; j >= 0; --j) {
192 for (
size_t k = 0; k < 8; ++k) {
193 a |= ((*this)(i, k) & that(k, j)) |
194 ((*
this)(i, k + 8) & that(k + 8, j));
195 b |= ((*this)(i, k) & that(k, j + 8)) |
196 ((*
this)(i, k + 8) & that(k + 8, j + 8));
197 c |= ((*this)(i + 8, k) & that(k, j)) |
198 ((*
this)(i + 8, k + 8) & that(k + 8, j));
199 d |= ((*this)(i + 8, k) & that(k, j + 8)) |
200 ((*
this)(i + 8, k + 8) & that(k + 8, j + 8));
204 return BMat16(a, b, c, d);
208 std::array<std::array<bool, 16>, 16> tab1 =
to_array(),
209 tab2 = that.to_array();
210 uint64_t a = 0, b = 0, c = 0, d = 0;
211 for (
int i = 7; i >= 0; --i) {
212 for (
int j = 7; j >= 0; --j) {
217 for (
size_t k = 0; k < 16; ++k) {
218 a |= tab1[i][k] & tab2[k][j];
219 b |= tab1[i][k] & tab2[k][j + 8];
220 c |= tab1[i + 8][k] & tab2[k][j];
221 d |= tab1[i + 8][k] & tab2[k][j + 8];
225 return BMat16(a, b, c, d);
230 for (
size_t i = 0; i < 16; ++i)
231 if ((_data[i / 4] << (16 * (i % 4)) >> 48) != 0)
243 std::vector<uint16_t>
rows;
244 for (
size_t i = 0; i < 16; ++i) {
245 uint16_t row_rev = (_data[i / 4] << (16 * (3 - i % 4)) >> 48);
249 for (
size_t j = 0; j < 16; ++j) {
250 row = (row << 1) | (row_rev & 1);
259 static std::random_device _rd;
260 static std::mt19937 _gen(_rd());
261 static std::uniform_int_distribution<uint64_t> _dist(0, 0xffffffffffffffff);
263 return BMat16(_dist(_gen), _dist(_gen), _dist(_gen), _dist(_gen));
266static const constexpr std::array<xpu64, 16> ROW_MASK16 = {
268 xpu16{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
270 xpu16{0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
272 xpu16{0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
274 xpu16{0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
276 xpu16{0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
278 xpu16{0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
280 xpu16{0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
282 xpu16{0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0}),
284 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0}),
286 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0}),
288 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0}),
290 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0}),
292 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0}),
294 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0}),
296 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0}),
298 xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff})};
300static const constexpr std::array<xpu64, 16> COL_MASK16 = {
301 static_cast<xpu64>(
xpu16{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}),
302 static_cast<xpu64>(
xpu16{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}),
303 static_cast<xpu64>(
xpu16{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}),
304 static_cast<xpu64>(
xpu16{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}),
305 static_cast<xpu64>(
xpu16{0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
306 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}),
307 static_cast<xpu64>(
xpu16{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
308 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}),
309 static_cast<xpu64>(
xpu16{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
310 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}),
311 static_cast<xpu64>(
xpu16{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
312 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}),
313 static_cast<xpu64>(
xpu16{0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
314 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
316 static_cast<xpu64>(
xpu16{0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
317 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
319 static_cast<xpu64>(
xpu16{0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400,
320 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400,
322 static_cast<xpu64>(
xpu16{0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800,
323 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800,
325 static_cast<xpu64>(
xpu16{0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
326 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
327 0x1000, 0x1000, 0x1000, 0x1000}),
328 static_cast<xpu64>(
xpu16{0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
329 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
330 0x2000, 0x2000, 0x2000, 0x2000}),
331 static_cast<xpu64>(
xpu16{0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
332 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
333 0x4000, 0x4000, 0x4000, 0x4000}),
334 static_cast<xpu64>(
xpu16{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
335 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
336 0x8000, 0x8000, 0x8000, 0x8000})};
343 for (
size_t i = dim; i < 16; ++i) {
344 bm._data &= ~ROW_MASK16[i];
345 bm._data &= ~COL_MASK16[i];
351 for (
size_t i = 0; i < 16; ++i) {
352 for (
size_t j = 0; j < 16; ++j) {
Class for fast boolean matrices of dimension up to 16 x 16.
Definition bmat16.hpp:65
std::ostream & write(std::ostream &os) const
Write this on os.
Definition bmat16_impl.hpp:350
size_t nr_rows() const noexcept
Returns the number of non-zero rows of this.
Definition bmat16_impl.hpp:228
bool operator>(BMat16 const &that) const noexcept
Returns true if this is greater than that.
Definition bmat16_impl.hpp:93
BMat16 mult_transpose(BMat16 const &that) const noexcept
Returns the matrix product of this and the transpose of that.
Definition bmat16_impl.hpp:151
std::vector< uint16_t > rows() const
Returns a std::vector for rows of this.
Definition bmat16_impl.hpp:242
bool operator==(BMat16 const &that) const noexcept
Returns true if this equals that.
Definition bmat16_impl.hpp:79
BMat16 mult_4bmat8(BMat16 const &that) const noexcept
Returns the matrix product of this and that.
Definition bmat16_impl.hpp:173
BMat16 mult_naive(BMat16 const &that) const noexcept
Returns the matrix product of this and that.
Definition bmat16_impl.hpp:184
BMat16 mult_naive_array(BMat16 const &that) const noexcept
Returns the matrix product of this and that.
Definition bmat16_impl.hpp:207
BMat16 transpose() const noexcept
Returns the transpose of this.
Definition bmat16_impl.hpp:132
void set(size_t i, size_t j, bool val) noexcept
Sets the (i, j)th position to val.
Definition bmat16_impl.hpp:69
std::array< std::array< bool, 16 >, 16 > to_array() const noexcept
Returns the array representation of this.
Definition bmat16_impl.hpp:102
BMat16 transpose_naive() const noexcept
Returns the transpose of this.
Definition bmat16_impl.hpp:119
bool operator<(BMat16 const &that) const noexcept
Returns true if this is less than that.
Definition bmat16_impl.hpp:84
static BMat16 random()
Returns a random BMat16.
Definition bmat16_impl.hpp:258
BMat16() noexcept=default
A default constructor.
bool operator()(size_t i, size_t j) const noexcept
Returns the entry in the (i, j)th position.
Definition bmat16_impl.hpp:65
Boolean matrices of dimension up to 8×8, stored as a single uint64; isomorph to binary relations with...
Definition bmat8.hpp:55
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:31
const Transf16 a1
Definition image.cpp:52
const Transf16 a2
Definition image.cpp:53
std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
Definition image.cpp:66
xpu64 to_block(xpu64 vect)
Converting storage type from rows to blocks of a xpu64 representing a 16x16 matrix (used in BMat16).
Definition bmat16_impl.hpp:41
uint64_t __attribute__((vector_size(32))) xpu64
Definition bmat16.hpp:41
uint16_t __attribute__((vector_size(32))) xpu16
Definition bmat16.hpp:40
xpu64 to_line(xpu64 vect)
Converting storage type from blocks to rows of a xpu64 representing a 16x16 matrix (used in BMat16).
Definition bmat16_impl.hpp:37
Definition bmat16_impl.hpp:362
std::ostream & operator<<(std::ostream &os, HPCombi::BMat16 const &bm)
Definition bmat16_impl.hpp:365