/*! @file
* @brief
* mpqs sieving stuff that needs to be included by Sieving.cc
*/
#include "mpqsPolynom.H"
extern CmpqsPolynom Polynom; // object to manage polynomial computations for multipolynomial sieve (MPQS)
#ifdef USE_FIBHEAP
#include "fibheap.H"
FibHeap <TSieve_Delta> Sieve_Delta_Heap, Sieve_Delta_HeapOfSquares;
#include <queue>
#elif defined(USE_FAKEHEAP)
#include "fakeheap.H"
FakeHeap <TSieve_Delta> Sieve_Delta_Heap;
#include <queue>
std::priority_queue<TSieve_Delta> Sieve_Delta_HeapOfSquares;
#else
#include <queue>
std::priority_queue<TSieve_Delta> Sieve_Delta_Heap, Sieve_Delta_HeapOfSquares;
#endif
extern bool is_dynamic_factor(const int number);
//! contains data structures for accessing dynamic factors
namespace DynamicFactorArrays
{
// this is for much faster access to the data
// (in streaming manner, possibly using SSE instructions!)
const unsigned int MaxDynamicFactors = 2500000;
static int DynFactors[MaxDynamicFactors]; // write once (per entry) - read many access
static int DynFactorReciprocals[MaxDynamicFactors]; // write once (per entry) - read many access
static int SQRT_kNs[MaxDynamicFactors]; // write once (per entry) - read many access
static signed int PolyDs[MaxDynamicFactors]; // read/write many access
unsigned int DynamicFactorsInUse = 0; // already declared in Sieving.H
double DYNFB_threshold = 0.0; // already declared in Sieving.H
void compute_Deltas_for_DynamicFactors(const int offset)
{
#if defined(VERBOSE)
cout << "FYI: we have " << DynamicFactorsInUse << " dynamic factors in use for sieving." << endl;
#endif
for (unsigned int i=0; i<DynamicFactorsInUse; ++i)
{
register const int DynFac=DynFactors[i];
const int &DynFacReciprocal = DynFactorReciprocals[i];
TSieve_Delta sieb_delta;
int d1,d2;
sieb_delta.factor=DynFac;
const unsigned int PolyB = mpz_remainder_ui(Polynom.get_B(),DynFac);
if (PolyDs[i]<0 || !CDeltaComputations::D_difference_to_last)
{
//cout << "get MPQS-PolyD for dynamic factor " << DynFac << endl;
PolyDs[i]=mpz_remainder_ui(Polynom.get_D(),DynFac);
}
else
{
//cout << "updating MPQS-PolyD for dynamic factor " << DynFac << endl;
PolyDs[i]+=CDeltaComputations::D_difference_to_last;
}
while (PolyDs[i]>=DynFac) PolyDs[i]-=DynFac;
unsigned int inv_A2 = numtheory::squaremod(PolyDs[i],DynFac)<<1;
if (inv_A2>=static_cast<unsigned int>(DynFac)) inv_A2-=DynFac;
inv_A2 = fastinvmod(inv_A2,DynFac);
d1=SQRT_kNs[i]-PolyB; if (d1<0) d1+=DynFac;
d1=mulmod(d1,inv_A2,DynFac);
sieb_delta.delta=normalized_signed_mod(d1-offset,DynFac,DynFacReciprocal);
sieb_delta.delta+=offset;
#ifdef USE_FAKEHEAP
while (sieb_delta.delta<=LogicalSieveSize) // shortcut: only re-insert, if still inside the sieve interval!
{
Sieve_Delta_Heap.push(sieb_delta);
sieb_delta.delta+=sieb_delta.factor;
}
#else
if (sieb_delta.delta<=LogicalSieveSize) // shortcut: only re-insert, if still inside the sieve interval!
Sieve_Delta_Heap.push(sieb_delta);
#endif
d2=DynFac-SQRT_kNs[i]-PolyB; if (d2<0) d2+=DynFac;
d2=mulmod(d2,inv_A2,DynFac);
sieb_delta.delta=normalized_signed_mod(d2-offset,DynFac,DynFacReciprocal);
sieb_delta.delta+=offset;
if ( (sieb_delta.delta<=LogicalSieveSize) && (d1!=d2) ) // shortcut: only re-insert, if still inside the sieve interval!
{
Sieve_Delta_Heap.push(sieb_delta);
#ifdef USE_FAKEHEAP
sieb_delta.delta+=sieb_delta.factor;
while (sieb_delta.delta<=LogicalSieveSize)
{
Sieve_Delta_Heap.push(sieb_delta);
sieb_delta.delta+=sieb_delta.factor;
}
#endif
}
}
}
} // end of namespace DynamicFactorArrays
void TDynamicFactorRelation::append_DynamicFactor_for_sieving(const int DynFac)
{
using namespace DynamicFactorArrays;
if (DynamicFactorsInUse>=MaxDynamicFactors)
{
MARK; cerr << "you may want to increase MaxDynamicFactors and recompile..." << endl;
return; //exit(19);
}
// if we use partial sieving, then we cannot be sure, that a
// already known dynamic factor gets redetected: redetected dynamic factors
// *must not* appended again!!
// we could avoid this check for the "complete interval sieving", but I doubt that
// this saves us much time...
if (is_dynamic_factor(DynFac))
{
#ifdef VERBOSE_INFO
cout << "append dynamic factor: " << DynFac << " is already appended!" << endl;
#endif
return; // don't append this dynamic factor twice...
}
DynFactors[DynamicFactorsInUse] = DynFac;
DynFactorReciprocals[DynamicFactorsInUse] = numtheory::reciprocal(DynFac);
SQRT_kNs[DynamicFactorsInUse]=SQRT_kN_mod_PrimeNumber(DynFac);
PolyDs[DynamicFactorsInUse]=-1; // tripper update at first access
++DynamicFactorsInUse; // now we have one more in use
DYNFB_threshold-=(4.0*LogicalSieveSize)/DynFac;
//if ((DynamicFactorsInUse&255)==255) cout << "#DYNFB: " << DynamicFactorsInUse << ", countdown now: " << DYNFB_threshold << endl;
}
void do_sieving_Squares()
{
// Sieve squares of static (and dynamic) factors.
// Important: Only subtract the single logarithm of the factors
// (because if a square divides, then the factor has divided already!)
// Remark: To be more efficient, at most one hit in the sieve interval
// is considered.
//cout << "Start sieving squares" << endl;
if (Sieve_Delta_HeapOfSquares.empty()) return; // there's nothing to sieve...
TSieve_Delta sieb_delta = Sieve_Delta_HeapOfSquares.top();
register int delta = sieb_delta.delta-SieveOffset;
#if 0
while (delta<0) // do we need to skip any factors? (for partial sieving!)
{
cout << "sieving squares: " << sieb_delta.factor << " skipped!" << endl;
Sieve_Delta_HeapOfSquares.pop(); // pop element out of priority queue
if (Sieve_Delta_HeapOfSquares.empty()) return;
sieb_delta = Sieve_Delta_HeapOfSquares.top();
delta = sieb_delta.delta-SieveOffset;
}
#endif
while (delta<PhysicalSieveSize)
{
Sieve_Delta_HeapOfSquares.pop(); // pop element out of priority queue
SieveArray[delta]-=log_DynamicLargePrime(sieb_delta.factor); // mark factor at sieve position
if (Sieve_Delta_HeapOfSquares.empty()) break;
sieb_delta = Sieve_Delta_HeapOfSquares.top();
delta = sieb_delta.delta-SieveOffset;
}
//cout << "Finished sieving squares" << endl;
}
// new version, patch contributed by Alexis Michon
void do_sieving_DynamicFactors()
{
if (Sieve_Delta_Heap.empty()) return; // there's nothing to sieve...
short int HitCount=0;
TSieve_Delta sieb_delta = Sieve_Delta_Heap.top();
register int delta = sieb_delta.delta-SieveOffset;
#if 0
while (delta<0) // do we need to skip any factors? (for partial sieving!)
{
cout << "sieving dynamic factors: " << sieb_delta.factor << " skipped!" << endl;
Sieve_Delta_Heap.pop(); // pop element out of priority queue
if (Sieve_Delta_Heap.empty()) return;
sieb_delta = Sieve_Delta_Heap.top();
delta = sieb_delta.delta-SieveOffset;
}
#endif
while (delta < PhysicalSieveSize) // still in the current (physical) interval?
{
Sieve_Delta_Heap.pop(); // pop element out of priority queue
SieveArray[delta]-=log_DynamicLargePrime(sieb_delta.factor); // mark factor at sieve position
// to speedup refactorization of relations, a dynamic factor hit is memorized here
Hits[HitCount].Faktor=sieb_delta.factor;
#ifdef USE_FAKEHEAP
if (Sieve_Delta_Heap.empty())
{
if (SieveArray[delta]<0) // found a (possible) relation?
if (SieveControl::Hit_after_DirtyCorrection(delta+SieveOffset,SieveArray[delta]))
{
SieveArray[delta]=1; // stall further hits at this position...
StaticRelations::insert(new CRelation(delta+SieveOffset,HitCount+1)); // and insert as a hit
}
return;
}
#else
sieb_delta.delta += sieb_delta.factor; // next hit-position
if (sieb_delta.delta<=LogicalSieveSize) // shortcut: push only, if it will be inside the sieve interval
{
Sieve_Delta_Heap.push(sieb_delta); // pushing to priority queue again
}
else
if (Sieve_Delta_Heap.empty())
{
if (SieveArray[delta]<0) // found a (possible) relation?
if (SieveControl::Hit_after_DirtyCorrection(delta+SieveOffset,SieveArray[delta]))
{
SieveArray[delta]=1; // stall further hits at this position...
StaticRelations::insert(new CRelation(delta+SieveOffset,HitCount+1)); // and insert as a hit
}
return;
}
#endif
sieb_delta = Sieve_Delta_Heap.top(); // get next entry out of the priority queue
register int delta2; delta2 = sieb_delta.delta-SieveOffset;
if (delta==delta2)
{
++HitCount;
#ifdef DEBUG
if (HitCount>=CHits::MaxHits) { cerr << "increase MaxHits!"; exit(1); }
#endif
continue;
}
if (SieveArray[delta]<0) // found a (possible) relation?
if (SieveControl::Hit_after_DirtyCorrection(delta+SieveOffset,SieveArray[delta]))
{
SieveArray[delta]=1; // stall further hits at this position...
StaticRelations::insert(new CRelation(delta+SieveOffset, HitCount+1)); // and insert as a hit
}
delta=delta2;
HitCount=0; // reset HitCount for next hit
}
return; // done with this interval
}
#if 0
#include <map>
void Werteverteilung_im_Sieb_ausgeben(void)
{
// if you want to view more stats, you can use this function
// to get the distribution of hits in the sieve
// (don't activate this for "production use", as this is very slow!)
typedef map<TSieveElement,unsigned int> TStats;
static TStats S;
for (int i=0; i<PhysicalSieveSize; ++i) S[SieveArray[i]]++;
static int z = 1000;
if (--z==0)
{
z=1000;
for (TStats::const_iterator pos=S.begin(); pos!=S.end(); ++pos)
cout << static_cast<signed long int>(pos->first) << " : " << pos->second << endl;
cout << "press <Enter> to continue" << endl;
string s; cin >> s;
}
}
#endif
void CDeltaComputations::recompute_all_Deltas(const bool DoReallyAll /* =true */)
{
// While changing polynomials, the old Deltavalues get void.
// They must be recomputed.
//cout << "recomputing deltas" << endl;
// at first, remove any (possible remaining) dynamic factors from the queue:
#ifdef USE_FAKEHEAP
// cout << "clearing Fakeheap with " << Sieve_Delta_Heap.size() << " elements..." << endl;
Sieve_Delta_Heap.clear();
#else
while (!Sieve_Delta_Heap.empty()) Sieve_Delta_Heap.pop(); // for priority_queue (in case that no clear-method is defined)
#endif
// do the same with squares:
//Sieve_Delta_HeapOfSquares.clear();
while (!Sieve_Delta_HeapOfSquares.empty()) Sieve_Delta_HeapOfSquares.pop(); // for priority_queue (in case that no clear-method is defined)
// first, compute the Deltas in/of the static factorbase
unsigned int inv_A2_modp, PolyB;
mpz_t inv_A2, P, wuq, x, y;
mpz_init(inv_A2); mpz_init(P); mpz_init(wuq); mpz_init(x); mpz_init(y);
// for computing Deltas, we need to query the mpqs coefficient "A2" and
// compute "A2 mod p", if we want to sieve with p. This is slow.
// For gods sake we can do it much faster:
mpqsDmodPUpdater.update();
// .. and now the new A2's are available via "CDeltaComputations::get_A2_mod_FBPrime(int Nr)"
// instead of the more slowly version "Polynom.get_A2_mod(const unsigned int m)"
// factorbase: compute first sieve hit; ignore primenumber 2, because computation of root for this value is too complicated...
// ignore PrimeNumbers[0]==-1 because of sign
if (PrimeNumbers[1]!=2)
{
inv_A2_modp=fastinvmod_23bit(Polynom.get_A2_mod(PrimeNumbers[1]),PrimeNumbers[1]);
PolyB=mpz_remainder_ui(Polynom.get_B(),PrimeNumbers[1]);
register signed int d = PolyB%PrimeNumbers[1];
register signed int h = inv_A2_modp%PrimeNumbers[1];
d=mulmod(d,h,PrimeNumbers[1]);
h=mulmod(SQRT_kN_of_PrimeNumbers[1],h,PrimeNumbers[1]);
d=-d; d+=h; if (d<0) d+=PrimeNumbers[1];
Delta_of_PrimeNumbers[1][0]=d;
d-=h; if (d<0) d+=PrimeNumbers[1];
d-=h; if (d<0) d+=PrimeNumbers[1];
Delta_of_PrimeNumbers[1][1]=d;
}
long int i=2;
while(i<StaticFactorbase::Size() && PrimeNumbers[i+1]<46340) // do the paired part (magic 46340 is sqrt(2^31))
{
#if 0 || defined(DEBUG)
if (D_mod_FBPrime[i]!=PrimeNumbers[i]*PrimeNumbers[i+1])
{
MARK; cerr << "implementation error."
<< "The D_mod_FBPrime[] structure is a bit tricky..." << endl
<< "review the source code." << endl;
exit(2);
}
if (get_A2_mod_FBPrimePair(i)!=Polynom.get_A2_mod(D_mod_FBPrime[i]))
{
MARK;
cout << "StaticFactorbase::Size(): " << StaticFactorbase::Size() << endl;
cout << "PrimeNumbers[i], PrimeNumbers[i+1]: " << PrimeNumbers[i] << ", " << PrimeNumbers[i+1] << endl;
cout << i << ": " << get_A2_mod_FBPrimePair(i) << " ?? " << Polynom.get_A2_mod(D_mod_FBPrime[i]) << endl;
exit(2);
}
#endif
const int N=D_mod_FBPrime[i]; // = PrimeNumbers[i]*PrimeNumbers[i+1];
inv_A2_modp=fastinvmod(get_A2_mod_FBPrimePair(i),N); // fast version
//inv_A2_modp=fastinvmod(Polynom.get_A2_mod(N),N); // slow version
PolyB=mpz_remainder_ui(Polynom.get_B(),N);
//cout << "paired: i=" << i << " N=" << N << endl;
for (signed int j=2; --j>=0; ++i) // this is no typo, it's really ++i!
{
#if 1 && defined(ASM_X86_64)
asm volatile(\
"mov %[inv_A2_modp],%%edx \n\t" \
"mov %[inv_A2_modp],%%eax \n\t" \
"shr $16,%%edx \n\t" \
"divw %%si \n\t" \
"movzxw %%dx,%%ebx \n\t" \
"mov %[PolyB],%%edx \n\t" \
"movzxw %%dx,%%eax \n\t" \
"shr $16,%%edx \n\t" \
"divw %%si \n\t" \
"movw %%dx,%%ax \n\t" \
"mulw %%bx \n\t" \
"divw %%si \n\t" \
"mov %%ebx,%%eax \n\t" \
"movzx %%dx,%%ebx \n\t" \
"mulw (%[SQRTkN],%[i],4) \n\t" \
"divw %%si \n\t" \
"# %%edx and %%ebx are now swapped! \n\t" \
"xorl %%eax,%%eax \n\t" \
"negl %%ebx \n\t" \
"addl %%edx,%%ebx \n\t" \
"cmovsl %%esi,%%eax \n\t" \
"addl %%eax,%%ebx \n\t" \
"xorl %%eax,%%eax \n\t" \
"movl %%ebx,(%[Deltas],%[i],8) \n\t" \
"subl %%edx,%%ebx \n\t" \
"cmovsl %%esi,%%eax \n\t" \
"addl %%eax,%%ebx \n\t" \
"xorl %%eax,%%eax \n\t" \
"subl %%edx,%%ebx \n\t" \
"cmovsl %%esi,%%eax \n\t" \
"addl %%eax,%%ebx \n\t" \
"movl %%ebx,4(%[Deltas],%[i],8) \n"
: /* empty output list, we're writing to memory */
: "S" (PrimeNumbers[i]), [inv_A2_modp] "g" (inv_A2_modp),
[PolyB] "g" (PolyB), [SQRTkN] "r" (&SQRT_kN_of_PrimeNumbers[0]),
[Deltas] "r" (&Delta_of_PrimeNumbers[0][0]), [i] "r" (i)
: "cc", "memory", "eax","ebx","edx");
#elif 1 && defined(ASM_CMOV)
asm volatile(\
"mov %[inv_A2_modp],%%edx \n\t" \
"mov %[inv_A2_modp],%%eax \n\t" \
"shr $16,%%edx \n\t" \
"divw %%si \n\t" \
"movzxw %%dx,%%ebx \n\t" \
"mov %[PolyB],%%edx \n\t" \
"movzxw %%dx,%%eax \n\t" \
"shr $16,%%edx \n\t" \
"divw %%si \n\t" \
"movw %%dx,%%ax \n\t" \
"mulw %%bx \n\t" \
"divw %%si \n\t" \
"mov %%ebx,%%eax \n\t" \
"movzx %%dx,%%ebx \n\t" \
"mulw %[SQRTkN](,%[i],4) \n\t" \
"divw %%si \n\t" \
"# %%edx and %%ebx are now swapped! \n\t" \
"xorl %%eax,%%eax \n\t" \
"negl %%ebx \n\t" \
"addl %%edx,%%ebx \n\t" \
"cmovsl %%esi,%%eax \n\t" \
"addl %%eax,%%ebx \n\t" \
"xorl %%eax,%%eax \n\t" \
"movl %%ebx,%[Deltas](,%[i],8) \n\t" \
"subl %%edx,%%ebx \n\t" \
"cmovsl %%esi,%%eax \n\t" \
"addl %%eax,%%ebx \n\t" \
"xorl %%eax,%%eax \n\t" \
"subl %%edx,%%ebx \n\t" \
"cmovsl %%esi,%%eax \n\t" \
"addl %%eax,%%ebx \n\t" \
"movl %%ebx,4+%[Deltas](,%[i],8) \n"
: /* empty output list, we're writing to memory */
: "S" (PrimeNumbers[i]), [inv_A2_modp] "g" (inv_A2_modp),
[PolyB] "g" (PolyB), [SQRTkN] "o" (SQRT_kN_of_PrimeNumbers[0]),
[Deltas] "o" (Delta_of_PrimeNumbers[0][0]), [i] "r" (i)
: "cc", "memory", "eax","ebx","edx");
#else
register signed int d = PolyB%PrimeNumbers[i];
register signed int h = inv_A2_modp%PrimeNumbers[i];
d=mulmod(d,h,PrimeNumbers[i]);
h=mulmod(SQRT_kN_of_PrimeNumbers[i],h,PrimeNumbers[i]);
d=-d; d+=h; if (d<0) d+=PrimeNumbers[i];
Delta_of_PrimeNumbers[i][0]=d;
d-=h; if (d<0) d+=PrimeNumbers[i];
d-=h; if (d<0) d+=PrimeNumbers[i];
Delta_of_PrimeNumbers[i][1]=d;
#endif
#if 0 || defined(DEBUG)
// we may want to check, whether above computations are valid:
Polynom.get_values(Delta_of_PrimeNumbers[i][0],x,y);
if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0)
{
cerr << "Delta0 " << Delta_of_PrimeNumbers[i][0] << " doesn't divide for "
<< PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
Delta_of_PrimeNumbers[i][0]-=mpz_get_ui(y);
if (Delta_of_PrimeNumbers[i][0]<0) Delta_of_PrimeNumbers[i][0]+=PrimeNumbers[i];
}
Polynom.get_values(Delta_of_PrimeNumbers[i][1],x,y);
if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0)
{
cerr << "Delta1 " << Delta_of_PrimeNumbers[i][1] << " doesn't divide for "
<< PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
Delta_of_PrimeNumbers[i][1]-=mpz_get_ui(y);
if (Delta_of_PrimeNumbers[i][1]<0) Delta_of_PrimeNumbers[i][1]+=PrimeNumbers[i];
}
#endif
}
}
// it guaranteed here that i%2=0
// factorbase: compute first sieve hit (unpaired part)
for ( ; i<StaticFactorbase::Size(); ++i) // do the unpaired part
{
//inv_A2_modp=fastinvmod(Polynom.get_A2_mod(PrimeNumbers[i]),PrimeNumbers[i]); // slow version
inv_A2_modp=fastinvmod_23bit(get_A2_mod_FBPrime(i),PrimeNumbers[i]); // fast version
#ifdef DEBUG
if ( (inv_A2_modp != fastinvmod(get_A2_mod_FBPrime(i),PrimeNumbers[i]))
|| (inv_A2_modp != fastinvmod(Polynom.get_A2_mod(PrimeNumbers[i]),PrimeNumbers[i])) )
{
MARK; cerr << "Inconsistency detected! please review source code!" << endl;
cerr << "i=" << i << endl;
cerr << "inv_A2_modp=" << inv_A2_modp << endl;
cerr << "but using method 1 we get: " << fastinvmod(Polynom.get_A2_mod(PrimeNumbers[i]),PrimeNumbers[i]) << endl;
cerr << "and using method 2 we get: " << fastinvmod(get_A2_mod_FBPrime(i),PrimeNumbers[i]) << endl;
exit(2);
}
#endif
PolyB=mpz_remainder_ui(Polynom.get_B(),PrimeNumbers[i]);
register int d;
d=SQRT_kN_of_PrimeNumbers[i]-PolyB; if (d<0) d+=PrimeNumbers[i];
Delta_of_PrimeNumbers[i][0]=mulmod(d,inv_A2_modp,PrimeNumbers[i]);
d=PrimeNumbers[i]-SQRT_kN_of_PrimeNumbers[i]-PolyB; if (d<0) d+=PrimeNumbers[i];
Delta_of_PrimeNumbers[i][1]=mulmod(d,inv_A2_modp,PrimeNumbers[i]);
#if 0 || defined(DEBUG)
// we may want to check, whether above computations are valid:
Polynom.get_values(Delta_of_PrimeNumbers[i][0],x,y);
if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0)
{
cerr << "Delta0 " << Delta_of_PrimeNumbers[i][0] << " doesn't divide for "
<< PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
Delta_of_PrimeNumbers[i][0]-=mpz_get_ui(y);
if (Delta_of_PrimeNumbers[i][0]<0) Delta_of_PrimeNumbers[i][0]+=PrimeNumbers[i];
}
Polynom.get_values(Delta_of_PrimeNumbers[i][1],x,y);
if (mpz_mod_ui(y,y,PrimeNumbers[i])!=0)
{
cerr << "Delta1 " << Delta_of_PrimeNumbers[i][1] << " doesn't divide for "
<< PrimeNumbers[i] << "! (remainder: " << y << ")" << endl;
Delta_of_PrimeNumbers[i][1]-=mpz_get_ui(y);
if (Delta_of_PrimeNumbers[i][1]<0) Delta_of_PrimeNumbers[i][1]+=PrimeNumbers[i];
}
#endif
}
for (int i=0; i<NumberOf_more_PrimePowers; ++i)
{
// compute Deltas for these numbers (which are powers of prime numbers of the factorbase)
// (and sieve them together with the static factors)
inv_A2_modp=fastinvmod(Polynom.get_A2_mod(PrimePowers[i]),PrimePowers[i]);
PolyB=mpz_remainder_ui(Polynom.get_B(),PrimePowers[i]);
register int d;
d=SQRT_kN_of_PrimePowers[i]-PolyB; if (d<0) d+=PrimePowers[i];
Delta_of_PrimePowers[i][0]=mulmod(d,inv_A2_modp,PrimePowers[i]);
d=PrimePowers[i]-SQRT_kN_of_PrimePowers[i]-PolyB; if (d<0) d+=PrimePowers[i];
Delta_of_PrimePowers[i][1]=mulmod(d,inv_A2_modp,PrimePowers[i]);
}
if (!DoReallyAll) goto done;
#if 1 /* Test!!!! 2004-03-09 */
for (int i=FB_maxQuadrate+1; i<StaticFactorbase::Size() && PrimeNumbers[i]<25000; ++i)
{
if (MPQS_Multiplier%PrimeNumbers[i]!=0)
{
// For all/some "large" squares of the factorbase we determine the Deltas,
// but these squares are sieved in conjunction with the dynamic factors.
const unsigned int P = PrimeNumbers[i]*PrimeNumbers[i];
const unsigned int wuq = SQRT_kN_of_PrimeSquares[i];
inv_A2_modp=fastinvmod(Polynom.get_A2_mod(P),P);
PolyB=mpz_remainder_ui(Polynom.get_B(),P);
// Delta1
signed int d = wuq>=PolyB ? wuq-PolyB : wuq-PolyB+P; // overflow?
d=mulmod(d,inv_A2_modp,P);
d+=LogicalSieveSize; d%=P; d-=LogicalSieveSize;
if (d<LogicalSieveSize) // value in interval?
{
TSieve_Delta sieb_delta;
sieb_delta.factor=PrimeNumbers[i];
sieb_delta.delta=d;
Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
//cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#if defined(DEBUG)
Polynom.get_values(sieb_delta.delta,x,y);
if (!mpz_divisible_ui_p(y,P))
{
MARK; cerr << "wrong delta position for square!" << endl; exit(1);
} //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta1)" << endl; };
#endif
}
// Delta2
unsigned int h = wuq+PolyB; d=P-mulmod(h,inv_A2_modp,P); d+=LogicalSieveSize; d%=P; d-=LogicalSieveSize;
if (d<LogicalSieveSize) // value in interval?
{
TSieve_Delta sieb_delta;
sieb_delta.factor=PrimeNumbers[i];
sieb_delta.delta=d;
Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
//cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#if defined(DEBUG)
Polynom.get_values(sieb_delta.delta,x,y);
if (!mpz_divisible_ui_p(y,P))
{
MARK; cerr << "wrong delta position for square!" << endl; exit(1);
} //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta2)" << endl; };
#endif
}
}
}
#endif
#ifdef SIEVING_LARGE_SQUARES
for (int i=StaticFactorbase::Size()-1; i>FB_maxQuadrate && PrimeNumbers[i]>25000; --i)
{
if (MPQS_Multiplier%PrimeNumbers[i]!=0)
{
// For all "large" squares of the factorbase we determine the Deltas,
// but these squares are sieved in conjunction with the dynamic factors.
mpz_set_ui(P,PrimeNumbers[i]); mpz_mul(P,P,P); // get square of the prime number
mpz_invert(inv_A2,Polynom.get_A2(),P);
// now compute square roots modulo P (these values are not memorized; this would take too much space)
// compute square roots of kN modulo Primzahl^2
const int Wu = SQRT_kN_of_PrimeNumbers[i];
mpz_set_ui(y,2*Wu);
if (mpz_invert(y,y,P)==0)
{
cerr << "PQ-root for " << PrimeNumbers[i] << ": inverse doesn't exist!" << endl;
exit(1);
}
mpz_mod(x,kN,P); mpz_set_ui(wuq,Wu); mpz_mul_ui(wuq,wuq,Wu); mpz_sub(x,x,wuq);
mpz_mul(x,x,y); mpz_add_ui(x,x,Wu); mpz_mod(wuq,x,P);
#ifdef DEBUG
// we may want to check, whether above computations are valid:
mpz_mul(x,wuq,wuq); mpz_sub(x,kN,x); mpz_mod(x,x,P);
if (mpz_cmp_ui(x,0)!=0)
{
cerr << "PQ-root incorrect!" << endl;
cerr << "Primzahl=" << PrimeNumbers[i] << endl;
cerr << "kN= " << kN << endl;
cerr << "SQRT(kN) mod p=" << SQRT_kN_of_PrimeNumbers[i] << endl;
cerr << "SQRT(kN) mod p^2=" << wuq << endl;
cerr << "but we got (wrong value): " << x << endl;
exit(1);
}
#endif
// and now the Deltas:
// Delta1
mpz_sub(x,wuq,Polynom.get_B()); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
mpz_sub_ui(x,x,LogicalSieveSize);
if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // does the value lie in the interval?
{
TSieve_Delta sieb_delta;
sieb_delta.factor=PrimeNumbers[i];
sieb_delta.delta=mpz_get_si(x);
Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
//cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#ifdef DEBUG
Polynom.get_values(sieb_delta.delta,x,y);
if (!mpz_divisible_p(y,P))
{
MARK; cerr << "wrong delta position for square!" << endl; exit(1);
} //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta1)" << endl; };
#endif
}
// Delta2
mpz_add(x,wuq,Polynom.get_B()); mpz_neg(x,x); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
mpz_sub_ui(x,x,LogicalSieveSize);
if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // value inside interval?
{
TSieve_Delta sieb_delta;
sieb_delta.factor=PrimeNumbers[i];
sieb_delta.delta=mpz_get_si(x);
Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
//cout << "Square of " << sieb_delta.factor << " pushed!" << endl;
#ifdef DEBUG
Polynom.get_values(sieb_delta.delta,x,y);
if (!mpz_divisible_p(y,P))
{
MARK; cerr << "wrong delta position for square!" << endl; exit(1);
} //else { cout << "square of " << sieb_delta.factor << " pushed and okay (delta2)" << endl; };
#endif
}
}
}
#endif
#ifdef SIEVING_MORE_LARGE_SQUARES
// and push more squares outside of the static factorbase:
{
unsigned int Primzahl = biggest_Prime_in_Factorbase;
const unsigned int UpperLimit = 1000000; // or maybe 4*biggest_Prime_in_Factorbase;
cout << "pushing more prime squares up to " << UpperLimit << endl;
while (Primzahl<UpperLimit)
{
do // compute next prime number of the static factorbase
{
do Primzahl+=2;
while( Primzahl%3==0 || Primzahl%5==0 || Primzahl%7==0
|| Primzahl%11==0 || Primzahl%13==0 || Primzahl%17==0
|| !probab_prime(Primzahl)); // next prime number
mpz_set_ui(P,Primzahl);
}
while (mpz_legendre(kN,P)<0); // prime number valid for factorbase?
mpz_mul(P,P,P); // compute square
mpz_invert(inv_A2,Polynom.get_A2(),P);
// now compute square roots modulo P (these values are not memorized; this would take too much space)
// compute square roots of kN modulo Primzahl^2
const int Wu = SQRT_kN_mod_PrimeNumber(Primzahl);
mpz_set_ui(y,2*Wu);
if (mpz_invert(y,y,P)==0)
{
cerr << "PQ-root for " << Primzahl << ": inverse doesn't exist!" << endl;
exit(1);
}
mpz_mod(x,kN,P); mpz_set_ui(wuq,Wu); mpz_mul_ui(wuq,wuq,Wu); mpz_sub(x,x,wuq);
mpz_mul(x,x,y); mpz_add_ui(x,x,Wu); mpz_mod(wuq,x,P);
#ifdef DEBUG
// we may want to check, whether above computations are valid:
mpz_mul(x,wuq,wuq); mpz_sub(x,kN,x); mpz_mod(x,x,P);
if (mpz_cmp_ui(x,0)!=0)
{
cerr << "PQ-root incorrect!" << endl;
cerr << "Primzahl=" << Primzahl << endl;
cerr << "kN= " << kN << endl;
cerr << "SQRT(kN) mod p=" << Wu << endl;
cerr << "SQRT(kN) mod p^2=" << wuq << endl;
cerr << "but we got (wrong value): " << x << endl;
exit(1);
}
#endif
// and now compute the Deltas:
// Delta1
mpz_sub(x,wuq,Polynom.get_B()); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
mpz_sub_ui(x,x,LogicalSieveSize);
if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // value inside interval?
{
TSieve_Delta sieb_delta;
sieb_delta.factor=Primzahl;
sieb_delta.delta=mpz_get_si(x);
Sieve_Delta_HeapOfSquares.push(sieb_delta); // and store
// IMPORTANT: The prime number will be placed into Sieve_Delta_HeapOfSquares *and*
// additionally into Sieve_Delta_Heap (if it is a square of a unknown/unresolved dynamic factor).
// Please remember: Known dynamic factors will be sieved anyway, so sieving two times again
// for squares should be avoided.
// Doing this, we can assure, that
// - the value (factor instead of factor^2) fits into integer range,
// - the value is hitting twice (as square) inside the sieve.
// The (possible) effect, that a square is counted with 3 hits remains still possible (whenever
// a new dynamic factor is detected during sieving and deactivated FAKEHEAP), but this case
// is unlikely and harmless. (In the worst case a non-relation will falsely be considered as
// a relation and rejected afterwards.)
if (!is_dynamic_factor(Primzahl)) Sieve_Delta_Heap.push(sieb_delta); // and store
#ifdef VERBOSE
cout << "Pushed square of " << sieb_delta.factor << " (Delta1)!" << endl;
#endif
#ifdef DEBUG
Polynom.get_values(sieb_delta.delta,x,y);
if (!mpz_divisible_p(y,P))
{
MARK; cerr << "wrong delta position for square!" << endl; exit(1);
} else { cout << "large square of " << Primzahl << " pushed and okay (delta1)" << endl; };
#endif
}
// Delta2
mpz_add(x,wuq,Polynom.get_B()); mpz_neg(x,x); mpz_mul(x,x,inv_A2); mpz_add_ui(x,x,LogicalSieveSize); mpz_mod(x,x,P);
mpz_sub_ui(x,x,LogicalSieveSize);
if (mpz_cmp_ui(x,LogicalSieveSize)<=0) // value inside interval?
{
TSieve_Delta sieb_delta;
sieb_delta.factor=Primzahl;
sieb_delta.delta=mpz_get_si(x);
Sieve_Delta_HeapOfSquares.push(sieb_delta); // and push into priority queue
// see remark for Delta1
if (!is_dynamic_factor(Primzahl)) Sieve_Delta_Heap.push(sieb_delta); // push into priority queue
#ifdef VERBOSE
cout << "Pushed square of " << sieb_delta.factor << " (Delta2)!" << endl;
#endif
#ifdef DEBUG
Polynom.get_values(sieb_delta.delta,x,y);
if (!mpz_divisible_p(y,P))
{
MARK; cerr << "wrong delta position for square!" << endl; exit(1);
} else { cout << "large square of " << Primzahl << " pushed and okay (delta2)" << endl; };
#endif
}
}
}
#endif
#if defined(VERBOSE) || defined(DEBUG)
if (!Sieve_Delta_HeapOfSquares.empty())
cout << Sieve_Delta_HeapOfSquares.size() << " squares have been pushed." << endl;
#endif
// and now do the same computations for dynamic factorbase:
DynamicFactorArrays::compute_Deltas_for_DynamicFactors(-LogicalSieveSize);
done:
mpz_clear(inv_A2); mpz_clear(P); mpz_clear(wuq); mpz_clear(x); mpz_clear(y);
// cout << "Finished computation of Deltas." << endl;
}
namespace statistical_data
{
extern bool StatusReport(const bool force_now = false);
}
void select_sieve_method();
void (*do_sieving)() = select_sieve_method;
void do_sieving_partial_intervals()
{
const int ScanningRange = (5*LogicalSieveSize)/(16*PhysicalSieveSize);
static signed int StartPSI[2]; // contains starting indices
static unsigned int StopPSI[2]; // contains stopping indices
static int* HelperTables[2][2] = { {NULL,NULL}, {NULL,NULL} };
static int* SieveImages[1024] = { 0 };
{
// this blocks triggers recalibration of sieving thresholds
// from time to time.
static unsigned int counter = 1;
if (--counter==0)
{
counter=0x20000;
SieveControl::RecalibrateThresholds();
cout << "Sieving partial intervals." << endl;
for (int i=0; i<2; ++i)
{
StartPSI[i]=StopPSI[i]=SieveControl::BestPSI[i];
StartPSI[i]-=ScanningRange; StopPSI[i]+=ScanningRange;
if (StartPSI[i]<0) StartPSI[i]=0;
if (StopPSI[i]>=2*static_cast<unsigned int>(LogicalSieveSize)/PhysicalSieveSize)
{
cout << "CUTTING RANGE!" << endl;
StopPSI[i]=2*LogicalSieveSize/PhysicalSieveSize-1;
}
CSieveStaticFactors::create_Tables_for_init_LocalDeltas(HelperTables[i][0],HelperTables[i][1],-LogicalSieveSize+PhysicalSieveSize*StartPSI[i]);
cout << "Physical intervals " << StartPSI[i] << " to " << StopPSI[i] << " marked for sieving." << endl;
#if 0
cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i] << endl;
SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]],-LogicalSieveSize+PhysicalSieveSize*SieveControl::BestPSI[i]);
if (static_cast<unsigned int>(StartPSI[i]+1)<=SieveControl::BestPSI[i])
{
cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i]-1 << endl;
SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]-1],-LogicalSieveSize+PhysicalSieveSize*(SieveControl::BestPSI[i]-1));
}
if (SieveControl::BestPSI[i]+1<=StopPSI[i])
{
cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i]+1 << endl;
SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]+1],-LogicalSieveSize+PhysicalSieveSize*(SieveControl::BestPSI[i]+1));
}
#elif 1
cout << "creating Threshold Sieve Image for Interval " << SieveControl::BestPSI[i] << endl;
SieveControl::create_sieve_image(SieveImages[SieveControl::BestPSI[i]],-LogicalSieveSize+PhysicalSieveSize*SieveControl::BestPSI[i]);
#endif
}
}
}
CDeltaComputations::recompute_all_Deltas(false);
for (int i=0; i<2; ++i)
{
SieveOffset=-LogicalSieveSize+PhysicalSieveSize*StartPSI[i];
SieveControl::PhysicalSieveIntervalIndex=StartPSI[i];
CSieveStaticFactors::init_LocalDeltas(HelperTables[i][0],HelperTables[i][1]);
while (SieveControl::PhysicalSieveIntervalIndex<=StopPSI[i])
{
//cout << "Sieving Interval: " << SieveOffset << endl;
if (SieveImages[SieveControl::PhysicalSieveIntervalIndex<=StopPSI[i]])
initialize_Sieve(SieveImages[SieveControl::PhysicalSieveIntervalIndex]);
else
initialize_Sieve();
CSieveStaticFactors::do_sieving_StaticFactors(); // with primes out of the static factorbase
do_scanning_Sieve();
SieveOffset+=PhysicalSieveSize;
SieveControl::PhysicalSieveIntervalIndex++;
}
}
statistical_data::StatusReport(); // issue information to screen (don't remove this, it triggers some other stats, too!)
// decide when to switch to complete interval sieving mode:
if (DynamicFactorArrays::DYNFB_threshold<0.0) do_sieving=do_sieving_full_intervals;
//if ( DynamicFactorRelations.size() > 8*static_cast<unsigned int>(StaticFactorbase::Size()) ) do_sieving=do_sieving_full_intervals;
}
void do_sieving_full_intervals()
{
SieveOffset=-LogicalSieveSize;
SieveControl::PhysicalSieveIntervalIndex=0;
#if 1
{
// this blocks triggers recalibration of sieving thresholds
// from time to time.
static unsigned int counter = 1;
if (--counter==0)
{
counter=0x20000;
statistical_data::StatusReport(true); // issue information to screen (don't remove this, it triggers some other stats, too!)
SieveControl::RecalibrateThresholds();
cout << "Sieving complete intervals." << endl;
}
}
#endif
CDeltaComputations::recompute_all_Deltas();
#ifdef USE_FAKEHEAP
Sieve_Delta_Heap.sort();
#endif
CSieveStaticFactors::init_LocalDeltas(CSieveStaticFactors::DefaultHelperTable[0],CSieveStaticFactors::DefaultHelperTable[1]);
//CSieveStaticFactors::init_LocalDeltas(); // or use this deprecated function, if memory access is slower (due to cache pollution) than computing thousands of mulmods!
while (SieveOffset<LogicalSieveSize)
{
initialize_Sieve();
CSieveStaticFactors::do_sieving_StaticFactors(); // with primes out of the static factorbase
do_sieving_Squares(); // (bigger) squares of primes of the static factorbase
do_sieving_DynamicFactors(); // sieve with active single large primes (=dynamic factors)
do_scanning_Sieve();
SieveOffset+=PhysicalSieveSize;
SieveControl::PhysicalSieveIntervalIndex++;
}
statistical_data::StatusReport(); // issue information to screen (don't remove this, it triggers some other stats, too!)
}
void select_sieve_method()
{
cout << "QSieve sieve method selector called." << endl;
// try calculate a good threshold for switching from partial sieving to complete sieving:
// - the logical sieve interval is [-LogicalSieveSize ... LogicalSieveSize] -> *2
// - two hits per prime number -> *2
// - the larger the logical sieve interval, the less costs for switching polynomials -> 2^21 as reference interval size
// - we want that the dynamic Factorbase has roughly the same quality as the static factorbase
DynamicFactorArrays::DYNFB_threshold=4.0*2097152.0*StaticFactorbaseSettings::Size()/StaticFactorbaseSettings::BiggestPrime();
if (mpz_sizeinbase(n,10)<60) // for small numbers we use full intervals!
{
do_sieving=do_sieving_full_intervals;
}
else // for large numbers we start with sieving partial intervals
{
do_sieving=do_sieving_partial_intervals;
if (mpz_sizeinbase(n,10)<100) DynamicFactorArrays::DYNFB_threshold*=5.0; // high threshold avoids sieving complete intervals
if (mpz_sizeinbase(n,10)>105) DynamicFactorArrays::DYNFB_threshold/=5.0; // low threshold (use partial intervals only to collect dynamic factors for starting the chain reaction)
}
do_sieving();
}