#include #include typedef unsigned long long BITBOARD; static BITBOARD bitboard __attribute__ ((aligned(32))); static BITBOARD bitboard2 __attribute__ ((aligned(8))); static BITBOARD Occupied __attribute__ ((aligned(8))); static BITBOARD rank_mask __attribute__ ((aligned(8))); static BITBOARD all_mask __attribute__ ((aligned(8))); #define Old_And(a,b) ((a) & (b)) #define Old_Or(a,b) ((a) | (b)) #define Old_Xor(a,b) ((a) ^ (b)) #define Old_Compl(a) (~(a)) #define Old_Shiftl(a,b) ((a) << (b)) #define Old_Shiftr(a,b) ((a) >> (b)) #define And(a, b) \ ({ \ static BITBOARD __tmp __attribute__ ((aligned(8))); \ __asm__ ( \ "movq %1, %%mm0\n\t" \ "pand %2, %%mm0\n\t" \ "movq %%mm0, %0" \ : "=m" (__tmp): "m" (a), "m" (b) : "memory", "%mm0" ); \ __tmp; \ }) #define Or(a, b) \ ({ \ static BITBOARD __tmp __attribute__ ((aligned(8))); \ __asm__ ( \ "movq %1, %%mm0\n\t" \ "por %2, %%mm0\n\t" \ "movq %%mm0, %0" \ : "=m" (__tmp) : "m" (a), "m" (b) : "memory" ); \ __tmp; \ }) #define Xor(a, b) \ ({ \ static BITBOARD __tmp __attribute__ ((aligned(8))); \ __asm__ ( \ "movq %1, %%mm0\n\t" \ "pxor %2, %%mm0\n\t" \ "movq %%mm0, %0" \ : "=m" (__tmp) : "m" (a), "m" (b) : "memory" ); \ __tmp; \ }) #define Compl(a) \ ({ \ static BITBOARD __tmp __attribute__ ((aligned(8))); \ __asm__ ( \ "movq %1, %%mm0\n\t" \ "pandn all_mask, %%mm0\n\t" \ "movq %%mm0, %0" \ : "=m" (__tmp) : "m" (a) : "memory" ); \ __tmp; \ }) #define Shiftl(a, b) \ ({ \ static BITBOARD __tmp __attribute__ ((aligned(8))); \ __asm__ ( \ "movq %1, %%mm0\n\t" \ "psllq %2, %%mm0\n\t" \ "movq %%mm0, %0" \ : "=m" (__tmp) : "m" (a), "im" (b) : "memory" ); \ __tmp; \ }) #define Shiftr(a, b) \ ({ \ static BITBOARD __tmp __attribute__ ((aligned(8))); \ __asm__ ( \ "movq %1, %%mm0\n\t" \ "psrlq %2, %%mm0\n\t" \ "movq %%mm0, %0" \ : "=m" (__tmp) : "m" (a), "im" (b) : "memory" ); \ __tmp; \ }) #define Emms() __asm__("emms") normal_run() { clock_t t1,t2; unsigned int i; t1 = clock(); while(t1 == clock()); t1 = clock(); for(i = 0; i < 30000000; i++) { bitboard2 = Old_And(Old_And(bitboard, Old_Shiftl(Occupied,3)), Old_Compl(rank_mask)); bitboard ++; rank_mask--; } t2 = clock(); printf("Normal: %d\n", t2 - t1); } mmx_run() { clock_t t1, t2; unsigned int i; t1 = clock(); while(t1 == clock()); t1 = clock(); for(i = 0; i < 30000000; i++) { bitboard2 = And(And(bitboard, Shiftl(Occupied,3)), Compl(rank_mask)); bitboard++; rank_mask--; } t2 = clock(); printf("MMX: %d\n", t2 - t1); } main() { int i; printf("%p %p %p %p\n", &bitboard, &bitboard2, &Occupied, &rank_mask); Occupied = 0x80FF442288445533LL; bitboard = 0x8080808080804423LL; bitboard2 = 0x0110101001434400LL; all_mask = 0xffffffffffffffffLL; for(i = 0; i < 8; i++) { normal_run(); mmx_run(); } } .