39#include <m4ri/m4ri_config.h>
49#include <m4ri/debug_dump.h>
59#define __M4RI_MUL_BLOCKSIZE MIN(((int)sqrt((double)(4 * __M4RI_CPU_L3_CACHE))) / 2, 2048)
95 uint8_t padding[63 - 2 *
sizeof(
rci_t) - 2 *
sizeof(
wi_t) -
sizeof(
word) -
sizeof(
void *)];
172 return (M->
flags & danger) == danger;
189static inline word const * mzd_row_const(
mzd_t const *M,
rci_t row) {
254#define mzd_free_window mzd_free
266 wi_t const startblock) {
267 if ((rowa == rowb) || (startblock >= M->
width)) {
return; }
275 for (
wi_t i = 0; i < width; ++i) {
280 tmp = (a[width] ^ b[width]) & mask_end;
284 __M4RI_DD_ROW(M, rowa);
285 __M4RI_DD_ROW(M, rowb);
326 rci_t const start_row,
rci_t const stop_row) {
327 if (cola == colb) {
return; }
329 rci_t const _cola = cola;
330 rci_t const _colb = colb;
339 int max_bit =
MAX(a_bit, b_bit);
340 int count_remaining = stop_row - start_row;
341 int min_bit = a_bit + b_bit - max_bit;
342 int offset = max_bit - min_bit;
344 int count = count_remaining;
347 if (count <= 0) {
return; }
349 if (a_word == b_word) {
351 count_remaining -= count;
352 assert(count_remaining == 0);
354 int fast_count = count / 4;
355 int rest_count = count - 4 * fast_count;
358 while (fast_count--) {
360 xor_v[1] = ptr[rowstride];
361 xor_v[2] = ptr[2 * rowstride];
362 xor_v[3] = ptr[3 * rowstride];
363 xor_v[0] ^= xor_v[0] >> offset;
364 xor_v[1] ^= xor_v[1] >> offset;
365 xor_v[2] ^= xor_v[2] >> offset;
366 xor_v[3] ^= xor_v[3] >> offset;
371 xor_v[0] |= xor_v[0] << offset;
372 xor_v[1] |= xor_v[1] << offset;
373 xor_v[2] |= xor_v[2] << offset;
374 xor_v[3] |= xor_v[3] << offset;
376 ptr[rowstride] ^= xor_v[1];
377 ptr[2 * rowstride] ^= xor_v[2];
378 ptr[3 * rowstride] ^= xor_v[3];
379 ptr += 4 * rowstride;
381 while (rest_count--) {
383 xor_v ^= xor_v >> offset;
385 *ptr ^= xor_v | (xor_v << offset);
391 word *RESTRICT min_ptr;
393 if (min_bit == a_bit) {
394 min_ptr = ptr + a_word;
395 max_offset = b_word - a_word;
397 min_ptr = ptr + b_word;
398 max_offset = a_word - b_word;
401 count_remaining -= count;
402 assert(count_remaining == 0);
405 word xor_v = (min_ptr[0] ^ (min_ptr[max_offset] >> offset)) & mask;
407 min_ptr[max_offset] ^= xor_v << offset;
408 min_ptr += rowstride;
441 word const * truerow = mzd_row_const(M, row);
477 row[block] ^= values << spot;
479 if (n > space) { row[block + 1] ^= values >> space; }
500 row[block] &= values << spot;
502 if (n > space) { row[block + 1] &= values >> space; }
520 row[block] &= ~(values << spot);
522 if (n > space) { row[block + 1] &= ~(values >> space); }
538 assert(dstrow < M->nrows && srcrow < M->nrows && coloffset < M->ncols);
546 *dst++ ^= *src++ & mask_begin;
551 if (wide > not_aligned + 1)
558 __m128i *__src = (__m128i *)src;
559 __m128i *__dst = (__m128i *)dst;
560 __m128i *
const eof = (__m128i *)((
unsigned long)(src + wide) & ~0xFUL);
562 __m128i xmm1 = _mm_xor_si128(*__dst, *__src);
564 }
while (++__src < eof);
567 wide = ((
sizeof(
word) * wide) % 16) /
sizeof(
word);
571 while (++i < wide) { dst[i] ^= src[i]; }
579 dst[i - 1] ^= src[i - 1] & ~mask_end;
581 __M4RI_DD_ROW(M, dstrow);
871#define mzd_sub mzd_add
881#define _mzd_sub _mzd_add
896 word const *row = mzd_row_const(M, x);
897 word temp = (spill <= 0)
898 ? row[block] << -spill
899 : (row[block + 1] << (
m4ri_radix - spill)) | (row[block] >> spill);
921 wi_t const b_startblock) {
926 word const *b = mzd_row_const(B, b_row) + b_startblock;
937 __m128i *a128 = (__m128i *)a;
938 __m128i *b128 = (__m128i *)b;
939 const __m128i *eof = (__m128i *)((
unsigned long)(a + wide) & ~0xFUL);
942 *a128 = _mm_xor_si128(*a128, *b128);
945 }
while (a128 < eof);
949 wide = ((
sizeof(
word) * wide) % 16) /
sizeof(
word);
955 wi_t n = (wide + 7) / 8;
957 case 0:
do { *(a++) ^= *(b++);
958 case 7: *(a++) ^= *(b++);
959 case 6: *(a++) ^= *(b++);
960 case 5: *(a++) ^= *(b++);
961 case 4: *(a++) ^= *(b++);
962 case 3: *(a++) ^= *(b++);
963 case 2: *(a++) ^= *(b++);
964 case 1: *(a++) ^= *(b++);
998 word const *a = mzd_row_const(A, a_row) + a_startblock;
999 word const *b = mzd_row_const(B, b_row) + b_startblock;
1011 __m128i *a128 = (__m128i *)a;
1012 __m128i *b128 = (__m128i *)b;
1013 __m128i *c128 = (__m128i *)c;
1014 const __m128i *eof = (__m128i *)((
unsigned long)(a + wide) & ~0xFUL);
1017 *c128 = _mm_xor_si128(*a128, *b128);
1021 }
while (a128 < eof);
1026 wide = ((
sizeof(
word) * wide) % 16) /
sizeof(
word);
1032 wi_t n = (wide + 7) / 8;
1034 case 0:
do { *(c++) = *(a++) ^ *(b++);
1035 case 7: *(c++) = *(a++) ^ *(b++);
1036 case 6: *(c++) = *(a++) ^ *(b++);
1037 case 5: *(c++) = *(a++) ^ *(b++);
1038 case 4: *(c++) = *(a++) ^ *(b++);
1039 case 3: *(c++) = *(a++) ^ *(b++);
1040 case 2: *(c++) = *(a++) ^ *(b++);
1041 case 1: *(c++) = *(a++) ^ *(b++);
1070 rci_t const b_row,
wi_t const b_startblock) {
1072 if ((C == A) & (a_row == c_row) & (a_startblock == c_startblock)) {
1075 mzd_combine_even(C, c_row, c_startblock, A, a_row, a_startblock, B, b_row, b_startblock);
1177 hash ^= rotate_word(calculate_hash(mzd_row_const(A, r), A->
width), r %
m4ri_radix);
int rci_t
Type of row and column indexes.
Definition misc.h:72
#define __M4RI_ALIGNMENT(addr, n)
Return alignment of addr w.r.t. n. For example the address 17 would be 1 aligned w....
Definition misc.h:421
int64_t wi_t
Type of word indexes.
Definition misc.h:81
#define __M4RI_CONVERT_TO_INT(w)
Explicit conversion macro.
Definition misc.h:99
#define __M4RI_GET_BIT(w, spot)
Get the bit spot (counting from the left) in the word w.
Definition misc.h:226
#define __M4RI_WRITE_BIT(w, spot, value)
Write the value to the bit spot in the word w.
Definition misc.h:236
uint64_t word
A word is the typical packed data structure to represent packed bits.
Definition misc.h:87
int BIT
Pretty for a boolean int.
Definition misc.h:64
static word const m4ri_ffff
A word with all bits set.
Definition misc.h:153
static int const m4ri_radix
The number of bits in a word.
Definition misc.h:141
#define MAX(x, y)
Return the maximal element of x and y.
Definition misc.h:163
#define __M4RI_RIGHT_BITMASK(n)
create a bit mask to zero out all but the n rightmost bits.
Definition misc.h:297
static word const m4ri_one
The number one as a word.
Definition misc.h:147
mzd_t * mzd_transpose(mzd_t *DST, mzd_t const *A)
Transpose a matrix.
Definition mzd.c:1119
static int mzd_is_windowed(mzd_t const *M)
Test if a matrix is windowed.
Definition mzd.h:159
static uint8_t const mzd_flag_nonzero_excess
flag when ncols%64 != 0
Definition mzd.h:144
mzd_t * mzd_submatrix(mzd_t *S, mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc)
Copy a submatrix.
Definition mzd.c:1586
rci_t mzd_echelonize_naive(mzd_t *M, int const full)
Gaussian elimination.
Definition mzd.c:234
word(* m4ri_random_callback)(void *data)
Random callback that produces uniformly distributed random words on every call.
Definition mzd.h:685
static void mzd_col_swap_in_rows(mzd_t *M, rci_t const cola, rci_t const colb, rci_t const start_row, rci_t const stop_row)
Swap the two columns cola and colb but only between start_row and stop_row.
Definition mzd.h:325
void mzd_randomize(mzd_t *M)
Fill matrix M with uniformly distributed bits.
Definition mzd.c:1271
void mzd_row_clear_offset(mzd_t *M, rci_t const row, rci_t const coloffset)
Clear the given row, but only begins at the column coloffset.
Definition mzd.c:192
void mzd_randomize_custom(mzd_t *M, m4ri_random_callback rc, void *data)
Fill matrix M with uniformly distributed bits.
Definition mzd.c:1283
mzd_t * mzd_stack(mzd_t *C, mzd_t const *A, mzd_t const *B)
Stack A on top of B and write the result to C.
Definition mzd.c:1411
void mzd_row_add(mzd_t *M, rci_t const sourcerow, rci_t const destrow)
Add the rows sourcerow and destrow and stores the total in the row destrow.
Definition mzd.c:188
void mzd_free(mzd_t *A)
Free a matrix created with mzd_init.
Definition mzd.c:180
rci_t mzd_first_zero_row(mzd_t const *A)
Return the first row with all zero entries.
Definition mzd.c:1827
static void mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb)
Swap the two rows rowa and rowb.
Definition mzd.h:296
static uint8_t const mzd_flag_windowed
flag for windowed matrix
Definition mzd.h:150
mzd_t * mzd_init(rci_t const r, rci_t const c)
Create a new matrix of dimension r x c.
Definition mzd.c:142
int mzd_cmp(mzd_t const *A, mzd_t const *B)
Return -1,0,1 if if A < B, A == B or A > B respectively.
Definition mzd.c:1334
static void mzd_row_add_offset(mzd_t *M, rci_t dstrow, rci_t srcrow, rci_t coloffset)
Add the rows sourcerow and destrow and stores the total in the row destrow, but only begins at the co...
Definition mzd.h:537
void mzd_copy_row(mzd_t *B, rci_t i, mzd_t const *A, rci_t j)
copy row j from A to row i from B.
Definition mzd.c:1642
mzd_t * mzd_invert_naive(mzd_t *INV, mzd_t const *A, mzd_t const *I)
Invert the matrix target using Gaussian elimination.
Definition mzd.c:1438
mzd_t * _mzd_mul_va(mzd_t *C, mzd_t const *v, mzd_t const *A, int const clear)
Matrix multiplication optimized for v*A where v is a vector.
Definition mzd.c:1257
static void mzd_and_bits(mzd_t *M, rci_t const x, rci_t const y, int const n, word values)
AND n bits from values to M starting a position (x,y).
Definition mzd.h:492
mzd_t * mzd_addmul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B)
Naive cubic matrix multiplication and addition.
Definition mzd.c:1160
int mzd_is_zero(mzd_t const *A)
Zero test for matrix.
Definition mzd.c:1630
double mzd_density(mzd_t const *A, wi_t res)
Return the number of nonzero entries divided by nrows * ncols.
Definition mzd.c:1825
static void mzd_combine_even(mzd_t *C, rci_t const c_row, wi_t const c_startblock, mzd_t const *A, rci_t const a_row, wi_t const a_startblock, mzd_t const *B, rci_t const b_row, wi_t const b_startblock)
c_row[c_startblock:] = a_row[a_startblock:] + b_row[b_startblock:] for offset 0
Definition mzd.h:993
static void mzd_clear_bits(mzd_t *M, rci_t const x, rci_t const y, int const n)
Clear n bits in M starting a position (x,y).
Definition mzd.h:514
static void _mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb, wi_t const startblock)
Swap the two rows rowa and rowb starting at startblock.
Definition mzd.h:265
static word * mzd_row(mzd_t *M, rci_t row)
Get pointer to first word of row.
Definition mzd.h:185
static void mzd_combine_even_in_place(mzd_t *A, rci_t const a_row, wi_t const a_startblock, mzd_t const *B, rci_t const b_row, wi_t const b_startblock)
a_row[a_startblock:] += b_row[b_startblock:] for offset 0
Definition mzd.h:919
static word mzd_hash(mzd_t const *A)
Return hash value for matrix.
Definition mzd.h:1174
mzd_t * mzd_concat(mzd_t *C, mzd_t const *A, mzd_t const *B)
Concatenate B to A and write the result to C.
Definition mzd.c:1386
static int mzd_read_bits_int(mzd_t const *M, rci_t const x, rci_t const y, int const n)
Get n bits starting a position (x,y) from the matrix M.
Definition mzd.h:1088
rci_t mzd_gauss_delayed(mzd_t *M, rci_t const startcol, int const full)
Gaussian elimination.
Definition mzd.c:209
static void mzd_combine(mzd_t *C, rci_t const c_row, wi_t const c_startblock, mzd_t const *A, rci_t const a_row, wi_t const a_startblock, mzd_t const *B, rci_t const b_row, wi_t const b_startblock)
row3[col3:] = row1[col1:] + row2[col2:]
Definition mzd.h:1068
double _mzd_density(mzd_t const *A, wi_t res, rci_t r, rci_t c)
Return the number of nonzero entries divided by nrows * ncols considering only the submatrix starting...
Definition mzd.c:1793
static word mzd_read_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n)
Definition mzd.h:892
static void mzd_write_bit(mzd_t *M, rci_t const row, rci_t const col, BIT const value)
Write the bit value to position M[row,col].
Definition mzd.h:457
static void mzd_col_swap(mzd_t *M, rci_t const cola, rci_t const colb)
Swap the two columns cola and colb.
Definition mzd.h:424
static void mzd_xor_bits(mzd_t *M, rci_t const x, rci_t const y, int const n, word values)
XOR n bits from values to M starting a position (x,y).
Definition mzd.h:472
static int mzd_is_dangerous_window(mzd_t const *M)
Test if a matrix is windowed with non-zero excess.
Definition mzd.h:170
mzd_t * mzd_extract_u(mzd_t *U, mzd_t const *A)
Definition mzd.c:1844
mzd_t * _mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B, int const clear)
Naive cubic matrix multiplication with the pre-transposed B.
Definition mzd.c:1175
mzd_t * mzd_init_window(mzd_t *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc)
Create a window/view into the matrix M.
Definition mzd.c:160
int mzd_equal(mzd_t const *A, mzd_t const *B)
Return TRUE if A == B.
Definition mzd.c:1315
static BIT mzd_read_bit(mzd_t const *M, rci_t const row, rci_t const col)
Read the bit at position M[row,col].
Definition mzd.h:440
mzd_t * mzd_copy(mzd_t *DST, mzd_t const *A)
Copy matrix A to DST.
Definition mzd.c:1364
mzd_t * mzd_extract_l(mzd_t *L, mzd_t const *A)
Definition mzd.c:1856
mzd_t * mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B)
Set C = A+B.
Definition mzd.c:1458
int mzd_find_pivot(mzd_t const *M, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c)
Find the next nonzero entry in M starting at start_row and start_col.
Definition mzd.c:1662
static mzd_t const * mzd_init_window_const(mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc)
Create a const window/view into a const matrix M.
Definition mzd.h:243
mzd_t * _mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B)
Same as mzd_add but without any checks on the input.
Definition mzd.c:1472
void mzd_set_ui(mzd_t *M, unsigned int const value)
Set the matrix M to the value equivalent to the integer value provided.
Definition mzd.c:1295
mzd_t * mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B)
Naive cubic matrix multiplication.
Definition mzd.c:1142
Dense matrices over GF(2).
Definition mzd.h:68
wi_t rowstride
Definition mzd.h:78
wi_t width
Definition mzd.h:72
rci_t nrows
Definition mzd.h:70
uint8_t flags
Definition mzd.h:92
word high_bitmask
Definition mzd.h:97
rci_t ncols
Definition mzd.h:71