qsieve

qsieve Commit Details


Date:2013-04-17 22:44:00 (11 years 8 months ago)
Author:Natalie Adams
Branch:default
Commit:d3bce9735b5d
Parents: 4a897e9b3975
Message:Fixing asm code

Changes:
Msrc/Sieving.cc (13 diffs)

File differences

src/Sieving.cc
386386
387387
388388
389
389
390390
391391
392392
......
560560
561561
562562
563
563
564564
565565
566566
......
593593
594594
595595
596
596
597597
598598
599599
......
15541554
15551555
15561556
1557
1557
15581558
15591559
15601560
......
16051605
16061606
16071607
1608
1608
16091609
16101610
16111611
......
17061706
17071707
17081708
1709
1709
17101710
17111711
17121712
......
17571757
17581758
17591759
1760
1760
17611761
17621762
17631763
......
18761876
18771877
18781878
1879
1879
18801880
18811881
18821882
......
19461946
19471947
19481948
1949
1949
19501950
19511951
19521952
......
20382038
20392039
20402040
2041
2041
20422042
20432043
20442044
......
20862086
20872087
20882088
2089
2089
20902090
20912091
20922092
......
21792179
21802180
21812181
2182
2182
21832183
21842184
21852185
......
22292229
22302230
22312231
2232
2232
22332233
22342234
22352235
"js 2f \n\t" \
"decl %[pos] \n\t" \
"2: # hit detected" \
: [pos] "=r" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");
: [pos] "=q" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");
#endif
#if defined(ASM_X86_64)
// code containing cmov optimizations for X86_64
"js 2f \n\t" \
"decl %[pos] \n\t" \
"2: # hit detected" \
: [pos] "=r" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");
: [pos] "=q" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");
#define asm_sieb(lp, d0, d1, P) \
asm volatile ( \
"cmpl %[disp1],%[disp0]\n\t" \
"testl $0x80008000,(%[sieb],%[pos],4) \n\t" \
"jz 1b \n\t" \
"2: # hit detected" \
: [pos] "=r" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");
: [pos] "=q" (offset) : "[pos]" (offset), [sieb] "q" (SieveArray) : "cc");
#define asm_sieb(lp, d0, d1, P) \
asm volatile ( \
"cmpl %[disp1],%[disp0]\n\t" \
"pcmpeqd 16(%[Deltas],%[i],8),%%xmm1 \n\t" \
"packssdw %%xmm1,%%xmm0 \n\t" \
"pmovmskb %%xmm0,%[pflags] # create bytemask" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [divisors] "r" (&PrimeNumbers[0]), [Deltas] "r" (&Delta_of_PrimeNumbers[0][0]),
[FloatRecips] "r" (&PrimeNumberFloatReciprocals[0]),
[_xmm7] "x" (XMM7), [_xmm6] "x" (XMM6), [i] "r" (nr)
"pcmpeqd %%xmm3,%%xmm1 \n\t" \
"packssdw %%xmm1,%%xmm0 \n\t" \
"pmovmskb %%xmm0,%[pflags] # create bytemask" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [PrimeNumbers] "r" (&PrimeNumbers[0]), [LocDeltas] "r" (&LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),
[_xmm7] "x" (XMM7), [i] "r" (nr)
: "xmm0", "xmm1", "xmm2", "xmm3");
"pcmpeqd 16+%[Deltas](,%[i],8),%%xmm1 \n\t" \
"packssdw %%xmm1,%%xmm0 \n\t" \
"pmovmskb %%xmm0,%[pflags] # create bytemask" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [divisors] "o" (PrimeNumbers[0]), [Deltas] "o" (Delta_of_PrimeNumbers[0][0]),
[FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),
[_xmm7] "x" (XMM7), [_xmm6] "x" (XMM6), [i] "r" (nr)
"pcmpeqd %%xmm3,%%xmm1 \n\t" \
"packssdw %%xmm1,%%xmm0 \n\t" \
"pmovmskb %%xmm0,%[pflags] # create bytemask" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),
[_xmm7] "x" (XMM7), [i] "r" (nr)
: "xmm0", "xmm1", "xmm2", "xmm3");
"pcmpeqd 24+%[Deltas](,%[i],8),%%mm2 # L2AM \n\t" \
"packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \
"pmovmskb %%mm1,%[pflags2] # L6+ (VectorPath) into bytemask" \
: [pflags] "=r" (pflags), [pflags2] "=r" (pflags2)
: [pflags] "=q" (pflags), [pflags2] "=q" (pflags2)
: [divisors] "o" (PrimeNumbers[0]), [Deltas] "o" (Delta_of_PrimeNumbers[0][0]),
[FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),
[_xmm7] "x" (XMM7), [_xmm6] "x" (XMM6), [i] "r" (nr)
"pcmpeqd 8+%[LocDeltas](,%[i],8),%%mm2 # L2AM \n\t" \
"packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \
"pmovmskb %%mm1,%[pflags] # L6+ (VectorPath) into bytemask" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),
[_mm7] "y" (MM7), [i] "r" (nr)
: "mm0", "mm1", "mm2");
"pcmpeqd 8+%[Deltas](,%[i],8),%%mm2 # L2AM \n\t" \
"packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \
"pmovmskb %%mm1,%[pflags] # L6+ (VectorPath) into bytemask" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [divisors] "o" (PrimeNumbers[0]), [FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),
[Deltas] "o" (Delta_of_PrimeNumbers[0][0]),
[_mm7] "y" (MM7), [_mm6] "y" (MM6), [i] "r" (nr)
"pcmpeqd 8+%[LocDeltas](,%[i],8),%%mm2 # L2AM \n\t" \
"packssdw %%mm2,%%mm1 # L2AM pack comparison results together \n\t" \
"pmovmskb %%mm1,%[pflags] # L6+ (VectorPath) into bytemask" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),
[_mm7] "y" (MM7), [i] "r" (nr)
: "mm0", "mm1", "mm2");
"packssdw %%mm1,%%mm5 \n\t" \
"packsswb %%mm0,%%mm5 # mm0 is not relevant\n\t" \
"movd %%mm5,%[pflags] # L5+ (VectorPath)" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [divisors] "o" (PrimeNumbers[0]), [FloatRecips] "o" (PrimeNumberFloatReciprocals[0]),
[Deltas] "o" (Delta_of_PrimeNumbers[0][0]),
[_mm7] "y" (MM7), [_mm6] "y" (MM6), [i] "r" (nr)
"packssdw %%mm1,%%mm0 \n\t" \
"packsswb %%mm1,%%mm0 \n\t" \
"movd %%mm0,%[pflags] # L5+ (VectorPath)" \
: [pflags] "=r" (pflags)
: [pflags] "=q" (pflags)
: [PrimeNumbers] "o" (PrimeNumbers[0]), [LocDeltas] "o" (LocalPhysInterval_Delta_of_PrimeNumbers[0][0]),
[_mm7] "y" (MM7), [i] "r" (nr)
: "mm0", "mm1");

Archive Download the corresponding diff file

Branches

Tags

Page rendered in 0.63715s using 14 queries.