parallel-sigma_odd-problem/html/sigmaodd_8cl_source.html

 /* -*- coding: latin-1 -*- */
 /** \file opencl/opencl_src/sigmaodd.cl (February 6, 2018)
  * \brief
  * OpenCL implementation to check the sigma_odd problem,
  * mostly like the other implementations but without the advantage of the shortcut in the factorization.
  * If your GPU supports not too old OpenCL version, use the specific function **ctz** into divide_until_odd() instead the manual computation.
  * Some tips used:
  * "optimizing specifically for Processor Graphics OpenCL* device, ensure all conditionals are evaluated outside of code branches"
  * https://software.intel.com/sites/landingpage/opencl/optimization-guide/Notes_on_Branching_Loops.htm
  *
  * GPLv3 --- Copyright (C) 2017, 2018 Olivier Pirson
  * http://www.opimedia.be/
  */

 /* *******
  * Types *
  *********/

 /** \brief
  * Type for natural number used in all code, on 64 bits.
  */
 typedef unsigned long nat_type;


 /** \brief
  * Type for prime number, particularly for the table of primes.
  */
 typedef unsigned int prime_type;


 /* ************
  * Prototypes *
  **************/

 /** \brief
  * Return the eighth root of n rounded to above.
  */
 nat_type
 ceil_eighth_root(nat_type n);


 /** \brief
  * Return n divided by 2 until the result is odd.
  *
  * @param n != 0
  */
 nat_type
 divide_until_odd(nat_type n);


 /** \brief
  * Return the eighth root of n rounded to below.
  */
 nat_type
 floor_eighth_root(nat_type n);


 /** \brief
  * Return the square root of n rounded to below.
  */
 nat_type
 floor_square_root(nat_type n);


 /** \brief
  * Return true iff d divide n,
  * i.e. if n is divisible by d.
  *
  * @param d != 0
  * @param n
  */
 bool
 is_divide(nat_type d, nat_type n);


 /** \brief
  * Return true iff n is even.
  */
 bool
 is_even(nat_type n);


 /** \brief
  * Return true iff n is in the table.
  */
 bool
 is_in_table(__global const nat_type* begin,
             __global const nat_type* end, const nat_type n);


 /** \brief
  * Return true iff n is odd.
  */
 bool
 is_odd(nat_type n);


 /** \brief
  * Return true iff varsigma_odd(n) < n.
  *
  * If there is no enough primes
  * then return false.
  *
  * "Same" implementation than sequential/threads/MPI.
  *
  * @param primes
  * @param n odd >= 3
  */
 bool
 is_varsigma_odd_lower(__global const prime_type* primes, nat_type n);


 /** \brief
  * Version of is_varsigma_odd_lower() rewritten to OpenCL.
  *
  * @param primes
  * @param n odd >= 3
  */
 bool
 is_varsigma_odd_lower__optimized(__global const prime_type* primes, nat_type n);


 /** \brief
  * Simplified version of is_varsigma_odd_lower().
  *
  * @param primes
  * @param n odd >= 3
  */
 bool
 is_varsigma_odd_lower__simplified(__global const prime_type* primes, nat_type n);


 /** \brief
  * Return x^k, x power k.
  */
 nat_type
 pow_nat(nat_type n, unsigned int k);


 /** \brief
  * Return n*n.
  */
 nat_type
 square(nat_type n);


 /** \brief
  * Return the sum of the (k + 1) terms
  * of the geometric progression of the common ratio r.
  *
  * If r is prime
  * then the result is equal to the sum of the divisors of r.
  *
  * In fact calculates (r^k - 1) / (r - 1) + r^k to avoid some overflow.
  *
  * @param r > 1
  * @param k
  *
  * @return 1 + r + r^2 + r^3 + ... + r^k = (r^(k + 1) - 1) / (r - 1)
  */
 nat_type
 sum_geometric_progression_strict(nat_type r, unsigned int k);


 /** \brief
  * Return an upper bound of sigma_odd(n).
  *
  * If n == 1
  * then return 1,
  * else return floor((n * ceil(2 * (n - 1)^{1/8} + 1)) / 2).
  *
  * @param n odd
  */
 nat_type
 sigma_odd_upper_bound(nat_type n);


 /** \brief
  * Return an approximation of the square root of n.
  * The results is always >= the exact square root.
  */
 nat_type
 upper_square_root(nat_type n);


 /** \brief
  * Return varsigma_odd(n),
  * i.e. the sum of all odd divisors of n,
  *      divided by 2 until to be odd.
  *
  * If there is no enough primes
  * then return 0.
  *
  * @param primes
  * @param n odd >= 3
  */
 nat_type
 varsigma_odd(__global const prime_type* primes, nat_type n);


 /* ***********
  * Functions *
  *************/

 nat_type
 ceil_eighth_root(nat_type n) {
   const nat_type root = floor_eighth_root(n);

   return (square(square(square(root))) == n
           ? root
           : root + 1);
 }


 nat_type
 divide_until_odd(nat_type n) {
   // return n >> ctz(n);  // maybe not exactly the correct use
   //                         (ctz do not exist for my OpenCL version)
   // https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/ctz.html

   bool is = is_even(n);

   while (is) {
     n >>= 1;
     is = is_even(n);
   }

   return n;
 }


 nat_type
 floor_eighth_root(nat_type n) {
   if (n >= 17878103347812890625ul) {  // >= (2^(64/8) - 1)^8 = 17878103347812890625
     return 255;  // = 2^(64/8) - 1
   }
   else {
     nat_type root = (nat_type)floor(native_sqrt(native_sqrt(native_sqrt((float)n))));

     // Correct possible rounding error due to floating point computation
     while (square(square(square(root))) <= n) {  // get the first value too big
       ++root;
     }
     do  {  // get the correct value
       --root;
     } while (square(square(square(root))) > n);

     return root;
   }
 }


 nat_type
 floor_square_root(nat_type n) {
   if (n >= 18446744065119617025ul) {  // >= (2^(64/2) - 1)^2 = 18446744065119617025
     return 4294967295;  // = 2^(64/2) - 1
   }
   else {
     nat_type sqrt_n = (nat_type)floor(native_sqrt((float)n));

     /* Correct possible rounding error due to floating point computation */
     while (square(sqrt_n) <= n) {  /* get the first value too big */
       ++sqrt_n;
     }
     do  {  /* get the correct value */
       --sqrt_n;
     } while (square(sqrt_n) > n);

     return sqrt_n;
   }
 }


 bool
 is_divide(nat_type d, nat_type n) {
     return n % d == 0;
 }


 bool
 is_even(nat_type n) {
   return !is_odd(n);
 }


 bool
 is_in_table(__global const nat_type* begin,
             __global const nat_type* end, const nat_type n) {
   // Adapted from
   // https://stackoverflow.com/questions/24989455/is-a-binary-search-a-good-fit-for-opencl/26978943#26978943
   while (begin != end) {
     __global const nat_type* mid = begin + (end - begin)/2;

 #if false
     if (!(n < *mid)) {  // look to the right
       begin = mid + 1;
     }
     else {              // look to the left
       end = mid;
     }
 #else
     const bool b_right = !(n < *mid);

     begin = (__global const nat_type*)select((intptr_t)begin, (intptr_t)(mid + 1), b_right);
     end = (__global const nat_type*)select((intptr_t)mid, (intptr_t)end, b_right);  // (c ? b : a)
 #endif
   }

   return (*begin == n);
 }


 bool
 is_odd(nat_type n) {
   return n & 1;
 }


 bool
 is_varsigma_odd_lower(__global const prime_type* primes, nat_type n) {
   nat_type n_divided = n;
   nat_type sqrt_n_divided = upper_square_root(n_divided);
   nat_type varsigma_odd = 1;
   __global const prime_type* prime_ptr = primes - 1;
   nat_type prime;

   while ((prime = *(++prime_ptr)) <= sqrt_n_divided) {
     unsigned int alpha = 0;

     while ((n_divided % prime) == 0) {  // is divisible
       n_divided /= prime;
       ++alpha;
     }

     if (alpha > 0) {
       varsigma_odd *= divide_until_odd(sum_geometric_progression_strict(prime, alpha));

       const bool is_lower = (varsigma_odd * sigma_odd_upper_bound(n_divided) < n);

       if (is_lower) {
         return true;
       }

       sqrt_n_divided = upper_square_root(n_divided);
     }
   }

   // Remain n_divided is prime
   if (n_divided > 1) {
     varsigma_odd = varsigma_odd * divide_until_odd(n_divided + 1);
   }

   return (varsigma_odd < n);
 }


 // MAIN function
 bool
 is_varsigma_odd_lower__optimized(__global const prime_type* primes, nat_type n) {
   nat_type n_divided = n;
   nat_type sqrt_n = upper_square_root(n_divided);
   nat_type sigma_odd = 1;
   __global const prime_type* prime_ptr = primes;
   nat_type prime = *prime_ptr;
   bool not_finished;

   unsigned int nb_prime_divisor = 0;
   nat_type prime_divisors[15];  // primorial(15) > 2^56, thus no more 15 prime factors possible

   // Collect prime divisors
   do {
     const bool is_factor = ((n_divided % prime) == 0);

     if (is_factor) {
       prime_divisors[nb_prime_divisor++] = prime;
     }

     prime = *(++prime_ptr);
     not_finished = (prime <= sqrt_n);
   } while (not_finished);

   // For each prime divisors found
   for (unsigned int i = 0; i < nb_prime_divisor; ++i) {
     const nat_type prime = prime_divisors[i];
     nat_type pow_prime = 1;
     bool is_factor;

     do {
       pow_prime *= prime;
       n_divided /= prime;
       is_factor = ((n_divided % prime) == 0);
     } while (is_factor);

     sigma_odd *= (pow_prime - 1)/(prime - 1) + pow_prime;
   }

   // If n_divided > 1 then this remain n_divided is prime,
   // else factor 2
   return (divide_until_odd(sigma_odd * (n_divided + 1))
           < n);
 }


 bool
 is_varsigma_odd_lower__simplified(__global const prime_type* primes, nat_type n) {
   nat_type n_divided = n;
   nat_type sqrt_n = upper_square_root(n_divided);
   nat_type varsigma_odd = 1;
   __global const prime_type* prime_ptr = primes;
   nat_type prime = *prime_ptr;
   bool not_finished;

   do {
     nat_type pow_prime = 1;
     bool is_factor = ((n_divided % prime) == 0);

     while (is_factor) {
       pow_prime *= prime;
       n_divided /= prime;
       is_factor = ((n_divided % prime) == 0);
     };

     varsigma_odd *= divide_until_odd((pow_prime - 1)/(prime - 1) + pow_prime);

     prime = *(++prime_ptr);
     not_finished = (prime <= sqrt_n);
   } while (not_finished);

   // If n_divided > 1 then this remain n_divided is prime
   varsigma_odd = varsigma_odd * divide_until_odd(n_divided + 1);

   return (varsigma_odd < n);
 }


 nat_type
 pow_nat(nat_type n, unsigned int k) {
   nat_type product = 1;

   while (k != 0) {
     if (is_even(k)) {
       k >>= 1;
       n *= n;
     }
     else {
       --k;
       product *= n;
     }
   }

   return product;
 }


 nat_type
 square(nat_type n) {
   return n*n;
 }


 nat_type
 sum_geometric_progression_strict(nat_type r, unsigned int k) {
   // 1 + r + r^2 + r^3 + ... + r^k = (r^(k + 1) - 1) / (r - 1)
   // = (r^k - 1) / (r - 1) + r^k  ! to avoid some possible overflows
   const nat_type rk = pow_nat(r, k);

   return (rk - 1)/(r - 1) + rk;
 }


 nat_type
 sigma_odd_upper_bound(nat_type n) {
   return (n == 1
           ? 1
           : (((n*((ceil_eighth_root(n - 1) << 1) + 1)) >> 1)));
 }


 nat_type
 upper_square_root(nat_type n) {
   nat_type sqrt_n = (nat_type)native_sqrt((float)n);

   /* Correct possible rounding error due to floating point computation */
   bool is_lower = (square(sqrt_n) < n);

   while (is_lower) {  /* get the first value equal or too big */
     is_lower = (square(++sqrt_n) < n);
   }

   return sqrt_n;
 }


 nat_type
 varsigma_odd(__global const prime_type* primes, nat_type n) {
   nat_type n_divided = n;
   nat_type sqrt_n_divided = upper_square_root(n_divided);
   nat_type varsigma_odd = 1;
   __global const prime_type* prime_ptr = primes - 1;
   nat_type prime;

   while ((prime = *(++prime_ptr)) <= sqrt_n_divided) {
     unsigned int alpha = 0;

     while ((n_divided % prime) == 0) {  // is divisible
       n_divided /= prime;
       ++alpha;
     }

     if (alpha > 0) {
       sqrt_n_divided = upper_square_root(n_divided);
       varsigma_odd *= divide_until_odd(sum_geometric_progression_strict(prime, alpha));
     }
   }

   // Remain n_divided is prime
   if (n_divided > 1) {
     varsigma_odd = varsigma_odd * divide_until_odd(n_divided + 1);
   }

   return varsigma_odd;
 }


 /* ******
  * Main *
  ********/
 #ifndef ONLY_MAIN_KERNEL
 __kernel
 void
 check_ns(__global const prime_type* primes,
          __global const nat_type* ns, unsigned int nb,
          __global unsigned char* results) {
   const unsigned int i = get_global_id(0);

   if (i < nb) {
     results[i] = is_varsigma_odd_lower(primes, ns[i]);
   }
 }
 #endif


 // MAIN kernel
 __kernel
 void
 check_ns__optimized(__global const prime_type* primes,
                     __global const nat_type* ns, unsigned int nb,
                     __global unsigned char* results) {
   const unsigned int i = get_global_id(0);

   if (i < nb) {
     results[i] = is_varsigma_odd_lower__optimized(primes, ns[i]);
   }
 }


 #ifndef ONLY_MAIN_KERNEL
 __kernel
 void
 check_ns__simplified(__global const prime_type* primes,
                      __global const nat_type* ns, unsigned int nb,
                      __global unsigned char* results) {
   const unsigned int i = get_global_id(0);

   if (i < nb) {
     results[i] = is_varsigma_odd_lower__simplified(primes, ns[i]);
   }
 }
 #endif


 // MAIN kernel: other approach
 __kernel
 void
 compute_partial_sigma_odd(__global const prime_type* primes,
                           unsigned int prime_offset, unsigned int opencl_nb,
                           nat_type n,
                           __global nat_type* factors) {
   const unsigned int i = get_global_id(0);
   const prime_type prime = primes[prime_offset + i];

   // Each unit try to divide with its prime
   {
     const nat_type start_n = n;

     {
       bool is_factor = is_divide(prime, n);

       while (is_factor) {
         n /= prime;
         is_factor = is_divide(prime, n);
       }
     }

     n = start_n/n;
   }

   factors[i] = n;  // == prime^{alpha}  (== 1 if prime not divides start_n)

   {
     const bool is_not_receiver = is_odd(i);

     if (is_not_receiver) {
       return;
     }
   }

   // Compute sigma_odd for each prime factor and begin collect
   {
     const unsigned int j = i + 1;

     barrier(CLK_LOCAL_MEM_FENCE);

     factors[i] *= factors[j];  // factors product
     factors[j] = (((n - 1)/(prime - 1) + n)
                   * ((factors[j] - 1)/(primes[prime_offset + j] - 1) + factors[j]));  // sigma_odd product
   }

   // Collect factors for each prime to one unique factor
   unsigned int power2 = 2;

   while (power2 < opencl_nb) {
     const unsigned int double_power2 = power2 << 1;

     {
       const bool is_not_receiver = ((i & (double_power2 - 1)) != 0);  // double_power2 not divides i

       if (is_not_receiver) {
         return;
       }
     }

     const unsigned int j = i + power2;

     barrier(CLK_LOCAL_MEM_FENCE);

     factors[i]     *= factors[j];  // factors product
     factors[i + 1] *= factors[j + 1];  // sigma_odd product

     power2 = double_power2;
   }

   // factors[0] == product of p_i^{alpha_i} for all p_i primes checked in this step
   // factors[1] == sigma_odd(results[0])
 }


 #ifndef ONLY_MAIN_KERNEL
 /* *****************
  * Mains for tests *
  *******************/
 __kernel
 void
 test__divide_until_odd(__global const prime_type* primes,
                        __global const nat_type* ns, __global nat_type* results) {
   const unsigned int i = get_global_id(0);

   results[i] = divide_until_odd(ns[i]);
 }


 __kernel
 void
 test__floor_square_root(__global const prime_type* primes,
                         __global const nat_type* ns, __global nat_type* results) {
   const unsigned int i = get_global_id(0);

   results[i] = floor_square_root(ns[i]);
 }


 __kernel
 void
 test__pow_nat(__global const prime_type* primes,
               __global const nat_type* ns, __global nat_type* results) {
   const unsigned int i = get_global_id(0);

   results[i] = pow_nat(ns[i], i % 5);
 }


 __kernel
 void
 test__sum_geometric_progression_strict(__global const prime_type* primes,
                                        __global const nat_type* ns, __global nat_type* results) {
   const unsigned int i = get_global_id(0);

   results[i] = sum_geometric_progression_strict(ns[i], i % 16);
 }


 __kernel
 void
 test__varsigma_odd(__global const prime_type* primes,
                    __global const nat_type* ns, __global nat_type* results) {
   const unsigned int i = get_global_id(0);

   results[i] = varsigma_odd(primes, ns[i]);
 }
 #endif
check_ns__optimized
__kernel void check_ns__optimized(__global const prime_type *primes, __global const nat_type *ns, unsigned int nb, __global unsigned char *results)
Definition: sigmaodd.cl:549

nat_type
unsigned long nat_type
Type for natural number used in all code, on 64 bits.
Definition: sigmaodd.cl:22

pow_nat
nat_type pow_nat(nat_type n, unsigned int k)
Return x^k, x power k.
Definition: sigmaodd.cl:439

upper_square_root
nat_type upper_square_root(nat_type n)
Return an approximation of the square root of n. The results is always >= the exact square root...
Definition: sigmaodd.cl:482

is_varsigma_odd_lower
bool is_varsigma_odd_lower(__global const prime_type *primes, nat_type n)
Return true iff varsigma_odd(n) < n.
Definition: sigmaodd.cl:322

floor_square_root
nat_type floor_square_root(nat_type n)
Return the square root of n rounded to below.
Definition: sigmaodd.cl:256

is_odd
bool is_odd(nat_type n)
Return true iff n is odd.
Definition: sigmaodd.cl:316

test__pow_nat
__kernel void test__pow_nat(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:678

ceil_eighth_root
nat_type ceil_eighth_root(nat_type n)
Return the eighth root of n rounded to above.
Definition: sigmaodd.cl:208

is_varsigma_odd_lower__simplified
bool is_varsigma_odd_lower__simplified(__global const prime_type *primes, nat_type n)
Simplified version of is_varsigma_odd_lower().
Definition: sigmaodd.cl:407

is_even
bool is_even(nat_type n)
Return true iff n is even.
Definition: sigmaodd.cl:283

test__sum_geometric_progression_strict
__kernel void test__sum_geometric_progression_strict(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:688

sum_geometric_progression_strict
nat_type sum_geometric_progression_strict(nat_type r, unsigned int k)
Return the sum of the (k + 1) terms of the geometric progression of the common ratio r...
Definition: sigmaodd.cl:464

is_in_table
bool is_in_table(__global const nat_type *begin, __global const nat_type *end, const nat_type n)
Return true iff n is in the table.
Definition: sigmaodd.cl:289

test__divide_until_odd
__kernel void test__divide_until_odd(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:658

compute_partial_sigma_odd
__kernel void compute_partial_sigma_odd(__global const prime_type *primes, unsigned int prime_offset, unsigned int opencl_nb, nat_type n, __global nat_type *factors)
Definition: sigmaodd.cl:578

test__floor_square_root
__kernel void test__floor_square_root(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:668

prime_type
unsigned int prime_type
Type for prime number, particularly for the table of primes.
Definition: sigmaodd.cl:28

floor_eighth_root
nat_type floor_eighth_root(nat_type n)
Return the eighth root of n rounded to below.
Definition: sigmaodd.cl:235

test__varsigma_odd
__kernel void test__varsigma_odd(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:698

check_ns__simplified
__kernel void check_ns__simplified(__global const prime_type *primes, __global const nat_type *ns, unsigned int nb, __global unsigned char *results)
Definition: sigmaodd.cl:563

is_varsigma_odd_lower__optimized
bool is_varsigma_odd_lower__optimized(__global const prime_type *primes, nat_type n)
Version of is_varsigma_odd_lower() rewritten to OpenCL.
Definition: sigmaodd.cl:361

sigma_odd_upper_bound
nat_type sigma_odd_upper_bound(nat_type n)
Return an upper bound of sigma_odd(n).
Definition: sigmaodd.cl:474

sigmaodd::alpha
const double alpha
Definition: harmonic.cpp:24

square
nat_type square(nat_type n)
Return n*n.
Definition: sigmaodd.cl:458

is_divide
bool is_divide(nat_type d, nat_type n)
Return true iff d divide n, i.e. if n is divisible by d.
Definition: sigmaodd.cl:277

divide_until_odd
nat_type divide_until_odd(nat_type n)
Return n divided by 2 until the result is odd.
Definition: sigmaodd.cl:218

check_ns
__kernel void check_ns(__global const prime_type *primes, __global const nat_type *ns, unsigned int nb, __global unsigned char *results)
Definition: sigmaodd.cl:534

varsigma_odd
nat_type varsigma_odd(__global const prime_type *primes, nat_type n)
Return varsigma_odd(n), i.e. the sum of all odd divisors of n, divided by 2 until to be odd...
Definition: sigmaodd.cl:497