| ␊ |
| ␊ |
| ␊ |
| //#if defined(ASM_386) || defined(ASM_X86_64)␊ |
| //#define SIEBASM_386␊ |
| #if defined(ASM_386) || defined(ASM_X86_64)␊ |
| #define SIEBASM_386␊ |
| ␊ |
| unsigned int clobbered_int; // dummy␊ |
| ␊ |
|
| "js 2f \n\t" \␊ |
| "decl %[pos] \n\t" \␊ |
| "2: # hit detected" \␊ |
| : [pos] "=q" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");␊ |
| : [pos] "=r" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");␊ |
| #endif␊ |
| #if defined(ASM_X86_64)␊ |
| // code containing cmov optimizations for X86_64␊ |
|
| "js 2f \n\t" \␊ |
| "decl %[pos] \n\t" \␊ |
| "2: # hit detected" \␊ |
| : [pos] "=q" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");␊ |
| : [pos] "=r" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");␊ |
| #define asm_sieb(lp, d0, d1, P) \␊ |
| asm volatile ( \␊ |
| "cmpl %[disp1],%[disp0]\n\t" \␊ |
|
| "testl $0x80008000,(%[sieb],%[pos],4) \n\t" \␊ |
| "jz 1b \n\t" \␊ |
| "2: # hit detected" \␊ |
| : [pos] "=q" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");␊ |
| : [pos] "=r" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");␊ |
| #define asm_sieb(lp, d0, d1, P) \␊ |
| asm volatile ( \␊ |
| "cmpl %[disp1],%[disp0]\n\t" \␊ |
|
| "pcmpeqd 16(%[Deltas],%[i],8),%%xmm1 \n\t" \␊ |
| "packssdw %%xmm1,%%xmm0 \n\t" \␊ |
| "pmovmskb %%xmm0,%[pflags] # create bytemask" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [divisors] "r" (&PrimeNumbers[0]), [Deltas] "r" (&Delta_of_PrimeNumbers[0][0]),␊ |
| [FloatRecips] "r" (&PrimeNumberFloatReciprocals[0]),␊ |
| [_xmm7] "x" (XMM7), [_xmm6] "x" (XMM6), [i] "r" (nr)␊ |
|
| "pcmpeqd %%xmm3,%%xmm1 \n\t" \␊ |
| "packssdw %%xmm1,%%xmm0 \n\t" \␊ |
| "pmovmskb %%xmm0,%[pflags] # create bytemask" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [PrimeNumbers] "r" (&PrimeNumbers[0]), [LocDeltas] "r" (&LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),␊ |
| [_xmm7] "x" (XMM7), [i] "r" (nr)␊ |
| : "xmm0", "xmm1", "xmm2", "xmm3");␊ |
|
| "pcmpeqd 16+%[Deltas](,%[i],8),%%xmm1 \n\t" \␊ |
| "packssdw %%xmm1,%%xmm0 \n\t" \␊ |
| "pmovmskb %%xmm0,%[pflags] # create bytemask" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [divisors] "o" (PrimeNumbers[0]), [Deltas] "o" (Delta_of_PrimeNumbers[0][0]),␊ |
| [FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),␊ |
| [_xmm7] "x" (XMM7), [_xmm6] "x" (XMM6), [i] "r" (nr)␊ |
|
| "pcmpeqd %%xmm3,%%xmm1 \n\t" \␊ |
| "packssdw %%xmm1,%%xmm0 \n\t" \␊ |
| "pmovmskb %%xmm0,%[pflags] # create bytemask" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),␊ |
| [_xmm7] "x" (XMM7), [i] "r" (nr)␊ |
| : "xmm0", "xmm1", "xmm2", "xmm3");␊ |
|
| "pcmpeqd 24+%[Deltas](,%[i],8),%%mm2 # L2AM \n\t" \␊ |
| "packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \␊ |
| "pmovmskb %%mm1,%[pflags2] # L6+ (VectorPath) into bytemask" \␊ |
| : [pflags] "=q" (pflags), [pflags2] "=q" (pflags2)␊ |
| : [pflags] "=r" (pflags), [pflags2] "=r" (pflags2)␊ |
| : [divisors] "o" (PrimeNumbers[0]), [Deltas] "o" (Delta_of_PrimeNumbers[0][0]),␊ |
| [FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),␊ |
| [_xmm7] "x" (XMM7), [_xmm6] "x" (XMM6), [i] "r" (nr)␊ |
|
| "pcmpeqd 8+%[LocDeltas](,%[i],8),%%mm2 # L2AM \n\t" \␊ |
| "packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \␊ |
| "pmovmskb %%mm1,%[pflags] # L6+ (VectorPath) into bytemask" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),␊ |
| [_mm7] "y" (MM7), [i] "r" (nr)␊ |
| : "mm0", "mm1", "mm2");␊ |
|
| "pcmpeqd 8+%[Deltas](,%[i],8),%%mm2 # L2AM \n\t" \␊ |
| "packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \␊ |
| "pmovmskb %%mm1,%[pflags] # L6+ (VectorPath) into bytemask" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [divisors] "o" (PrimeNumbers[0]), [FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),␊ |
| [Deltas] "o" (Delta_of_PrimeNumbers[0][0]),␊ |
| [_mm7] "y" (MM7), [_mm6] "y" (MM6), [i] "r" (nr)␊ |
|
| "pcmpeqd 8+%[LocDeltas](,%[i],8),%%mm2 # L2AM \n\t" \␊ |
| "packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \␊ |
| "pmovmskb %%mm1,%[pflags] # L6+ (VectorPath) into bytemask" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),␊ |
| [_mm7] "y" (MM7), [i] "r" (nr)␊ |
| : "mm0", "mm1", "mm2");␊ |
|
| "packssdw %%mm1,%%mm5 \n\t" \␊ |
| "packsswb %%mm0,%%mm5 # mm0 is not relevant\n\t" \␊ |
| "movd %%mm5,%[pflags] # L5+ (VectorPath)" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [divisors] "o" (PrimeNumbers[0]), [FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),␊ |
| [Deltas] "o" (Delta_of_PrimeNumbers[0][0]),␊ |
| [_mm7] "y" (MM7), [_mm6] "y" (MM6), [i] "r" (nr)␊ |
|
| "packssdw %%mm1,%%mm0 \n\t" \␊ |
| "packsswb %%mm1,%%mm0 \n\t" \␊ |
| "movd %%mm0,%[pflags] # L5+ (VectorPath)" \␊ |
| : [pflags] "=q" (pflags)␊ |
| : [pflags] "=r" (pflags)␊ |
| : [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),␊ |
| [_mm7] "y" (MM7), [i] "r" (nr)␊ |
| : "mm0", "mm1");␊ |