qsieve

qsieve Mercurial Source Tree


Root/src/invmod.c

/*! @file
 * @brief
 * floating point implementation for computing the modular inverse
 */
 
 
/*--------------------------------------------------------------------
This file is placed in the public domain by its author,
Jason Papadopoulos. You may use it for any purpose, free of charge,
without having to notify anyone. I disclaim any responsibility for any
errors.
 
Optionally, please be nice and tell me if you find this source to be
useful. Again optionally, if you add to the functionality present here
please consider making those additions public too, so that others may
benefit from your work.
                       --jasonp@boo.net
--------------------------------------------------------------------*/
 
 
/*-------------------------------------------------------------------------
This file was modified by Thorsten Reinecke to be compatible with Qsieve.
It can be used as replacement for the other invmod functions in modulo.cc.
 
For generic support of 23bit invmod, this version is a very fast
alternative. (For Athlon computers, the crafted cmov assembler version of
montgom_invmod is faster than this one.) Many thanks to the author for
providing me this file, so that I was able to do performance tests.
 
Except for this file (and except otherwise noted), all the other files in
the Qsieve distribution are still copyrighted under the GPL!
                                   thre@thorstenreinecke.de
-------------------------------------------------------------------------*/
 
 
/*--------------------------------------------------------------------*/
 
    /* When the factor base is really big, the MPQS
       initialization stage takes a big fraction of the
       runtime; timings show that ~90% of the time in
       the initialization stage is spent computing modular
       inverses. To that end, the following implements a
       custom modular inverse that avoids integer division.
       It's somewhat obscure, but far less obscure than
       implementing the self-initializing variant of MPQS.
 
       This is *much* faster than using mp_expo_1, because
       it uses floating point and several tricks for fast
       rounding. */
 
const double ieee_round = 6755399441055744.0;
const double intel_round = 6755399441055744.0 * 2048.0;
const double round_test = 2.7;
const double round_correct = 3.0;
volatile double round_constant[2];
 
 
void init_fast_invmod(void) {
  
    /* Unfortunately, the fast rounding requires figuring
       out the size of a floating point register. The choices
       are the 53-bit IEEE size and the 64-bit Intel size;
       however, the OS may force the Intel size down to the
       IEEE size for compatibility. The following performs
       a runtime check to see which register size is in use,
       and selects the magic rounding constant accordingly. */
 
    double round_me;
  
    round_constant[0] = intel_round;
    round_constant[1] = intel_round;
    round_me = round_test;
    round_me += round_constant[0];
    round_me -= round_constant[1];
     
     
    printf("initializing jason's fast_invmod\n");
    if (round_me != round_correct) {
        round_constant[0] = ieee_round;
        round_constant[1] = ieee_round;
        round_me = round_test;
        round_me += round_constant[0];
        round_me -= round_constant[1];
        if( round_me != round_correct ) {
            printf("error: can't initialize fast rounding\n");
            exit(-1);
        }
    }
 
    /* Check that the compiler did not incorrectly
       optimize our custom modular inverse routine */
 
    if (fast_invmod(6, 17) != 3) {
        printf("error: modular inverse does not work\n");
        exit(-1);
    }
}
 
unsigned int fast_invmod(unsigned int a, unsigned int p) {
 
    /* Compute mp_expo_1(a, p-2, p) using floating point.
       'a' and 'p' must be less than ~25 bits in size if
       using IEEE fast rounding, but can go up to ~31 bits
       if Intel fast rounding is in use. Both of these
       are far larger than MPQS will ever need.
 
       The code avoids an integer remainder by multiplying
       by the floating point reciprocal and (fast-)rounding to
       the nearest integral floating point value. Within
       the while() loop, the accumulated result is always
       bounded by +-p, and is normalized afterwards.
        
       The exponent is scanned right to left; at each step the
       modular squaring and multiply are both performed, even
       if the multiply result will be thrown away */
        
    unsigned int exponent = p - 2;
    int result;
    double dp = (double)(p);
    double dsquare = (double)(a);
    double recip = 1.0 / dp;
    double round0 = round_constant[0];
    double round1 = round_constant[1];
    double dresult = 1.0;
 
    while (exponent) {
        double dsquare2 = dsquare * dsquare;
        double dresult2 = dresult * dsquare;
 
        double drsquare = dsquare2 * recip + round0 - round1;
        double drresult = dresult2 * recip + round0 - round1;
 
        dsquare2 -= dp * drsquare;
        dresult2 -= dp * drresult;
 
        dsquare = dsquare2;
        if (exponent & 1)
            dresult = dresult2;
 
        exponent >>= 1;
    }
 
    result = (int)dresult;
 
    if (result < 0)
        return (unsigned int)(result + p);
    else
        return (unsigned int)result;
}
Source at commit 4d365e34ac0b created 11 years 9 months ago.
By Nathan Adams, fixing modifications with files

Archive Download this file

Branches

Tags

Page rendered in 0.75335s using 11 queries.