Loading [MathJax]/extensions/tex2jax.js
Parallel numerical verification of the σ_odd problem  October 6, 2018
All Classes Namespaces Files Functions Variables Typedefs Macros
sigmaodd.cl
Go to the documentation of this file.
1 /* -*- coding: latin-1 -*- */
2 /** \file opencl/opencl_src/sigmaodd.cl (February 6, 2018)
3  * \brief
4  * OpenCL implementation to check the sigma_odd problem,
5  * mostly like the other implementations but without the advantage of the shortcut in the factorization.
6  * If your GPU supports not too old OpenCL version, use the specific function **ctz** into divide_until_odd() instead the manual computation.
7  * Some tips used:
8  * "optimizing specifically for Processor Graphics OpenCL* device, ensure all conditionals are evaluated outside of code branches"
9  * https://software.intel.com/sites/landingpage/opencl/optimization-guide/Notes_on_Branching_Loops.htm
10  *
11  * GPLv3 --- Copyright (C) 2017, 2018 Olivier Pirson
12  * http://www.opimedia.be/
13  */
14 
15 /* *******
16  * Types *
17  *********/
18 
19 /** \brief
20  * Type for natural number used in all code, on 64 bits.
21  */
22 typedef unsigned long nat_type;
23 
24 
25 /** \brief
26  * Type for prime number, particularly for the table of primes.
27  */
28 typedef unsigned int prime_type;
29 
30 
31 
32 /* ************
33  * Prototypes *
34  **************/
35 
36 /** \brief
37  * Return the eighth root of n rounded to above.
38  */
41 
42 
43 /** \brief
44  * Return n divided by 2 until the result is odd.
45  *
46  * @param n != 0
47  */
50 
51 
52 /** \brief
53  * Return the eighth root of n rounded to below.
54  */
57 
58 
59 /** \brief
60  * Return the square root of n rounded to below.
61  */
64 
65 
66 /** \brief
67  * Return true iff d divide n,
68  * i.e. if n is divisible by d.
69  *
70  * @param d != 0
71  * @param n
72  */
73 bool
75 
76 
77 /** \brief
78  * Return true iff n is even.
79  */
80 bool
81 is_even(nat_type n);
82 
83 
84 /** \brief
85  * Return true iff n is in the table.
86  */
87 bool
88 is_in_table(__global const nat_type* begin,
89  __global const nat_type* end, const nat_type n);
90 
91 
92 /** \brief
93  * Return true iff n is odd.
94  */
95 bool
96 is_odd(nat_type n);
97 
98 
99 /** \brief
100  * Return true iff varsigma_odd(n) < n.
101  *
102  * If there is no enough primes
103  * then return false.
104  *
105  * "Same" implementation than sequential/threads/MPI.
106  *
107  * @param primes
108  * @param n odd >= 3
109  */
110 bool
111 is_varsigma_odd_lower(__global const prime_type* primes, nat_type n);
112 
113 
114 /** \brief
115  * Version of is_varsigma_odd_lower() rewritten to OpenCL.
116  *
117  * @param primes
118  * @param n odd >= 3
119  */
120 bool
121 is_varsigma_odd_lower__optimized(__global const prime_type* primes, nat_type n);
122 
123 
124 /** \brief
125  * Simplified version of is_varsigma_odd_lower().
126  *
127  * @param primes
128  * @param n odd >= 3
129  */
130 bool
131 is_varsigma_odd_lower__simplified(__global const prime_type* primes, nat_type n);
132 
133 
134 /** \brief
135  * Return x^k, x power k.
136  */
137 nat_type
138 pow_nat(nat_type n, unsigned int k);
139 
140 
141 /** \brief
142  * Return n*n.
143  */
144 nat_type
145 square(nat_type n);
146 
147 
148 /** \brief
149  * Return the sum of the (k + 1) terms
150  * of the geometric progression of the common ratio r.
151  *
152  * If r is prime
153  * then the result is equal to the sum of the divisors of r.
154  *
155  * In fact calculates (r^k - 1) / (r - 1) + r^k to avoid some overflow.
156  *
157  * @param r > 1
158  * @param k
159  *
160  * @return 1 + r + r^2 + r^3 + ... + r^k = (r^(k + 1) - 1) / (r - 1)
161  */
162 nat_type
163 sum_geometric_progression_strict(nat_type r, unsigned int k);
164 
165 
166 /** \brief
167  * Return an upper bound of sigma_odd(n).
168  *
169  * If n == 1
170  * then return 1,
171  * else return floor((n * ceil(2 * (n - 1)^{1/8} + 1)) / 2).
172  *
173  * @param n odd
174  */
175 nat_type
177 
178 
179 /** \brief
180  * Return an approximation of the square root of n.
181  * The results is always >= the exact square root.
182  */
183 nat_type
185 
186 
187 /** \brief
188  * Return varsigma_odd(n),
189  * i.e. the sum of all odd divisors of n,
190  * divided by 2 until to be odd.
191  *
192  * If there is no enough primes
193  * then return 0.
194  *
195  * @param primes
196  * @param n odd >= 3
197  */
198 nat_type
199 varsigma_odd(__global const prime_type* primes, nat_type n);
200 
201 
202 
203 /* ***********
204  * Functions *
205  *************/
206 
207 nat_type
209  const nat_type root = floor_eighth_root(n);
210 
211  return (square(square(square(root))) == n
212  ? root
213  : root + 1);
214 }
215 
216 
217 nat_type
219  // return n >> ctz(n); // maybe not exactly the correct use
220  // (ctz do not exist for my OpenCL version)
221  // https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/ctz.html
222 
223  bool is = is_even(n);
224 
225  while (is) {
226  n >>= 1;
227  is = is_even(n);
228  }
229 
230  return n;
231 }
232 
233 
234 nat_type
236  if (n >= 17878103347812890625ul) { // >= (2^(64/8) - 1)^8 = 17878103347812890625
237  return 255; // = 2^(64/8) - 1
238  }
239  else {
240  nat_type root = (nat_type)floor(native_sqrt(native_sqrt(native_sqrt((float)n))));
241 
242  // Correct possible rounding error due to floating point computation
243  while (square(square(square(root))) <= n) { // get the first value too big
244  ++root;
245  }
246  do { // get the correct value
247  --root;
248  } while (square(square(square(root))) > n);
249 
250  return root;
251  }
252 }
253 
254 
255 nat_type
257  if (n >= 18446744065119617025ul) { // >= (2^(64/2) - 1)^2 = 18446744065119617025
258  return 4294967295; // = 2^(64/2) - 1
259  }
260  else {
261  nat_type sqrt_n = (nat_type)floor(native_sqrt((float)n));
262 
263  /* Correct possible rounding error due to floating point computation */
264  while (square(sqrt_n) <= n) { /* get the first value too big */
265  ++sqrt_n;
266  }
267  do { /* get the correct value */
268  --sqrt_n;
269  } while (square(sqrt_n) > n);
270 
271  return sqrt_n;
272  }
273 }
274 
275 
276 bool
278  return n % d == 0;
279 }
280 
281 
282 bool
284  return !is_odd(n);
285 }
286 
287 
288 bool
289 is_in_table(__global const nat_type* begin,
290  __global const nat_type* end, const nat_type n) {
291  // Adapted from
292  // https://stackoverflow.com/questions/24989455/is-a-binary-search-a-good-fit-for-opencl/26978943#26978943
293  while (begin != end) {
294  __global const nat_type* mid = begin + (end - begin)/2;
295 
296 #if false
297  if (!(n < *mid)) { // look to the right
298  begin = mid + 1;
299  }
300  else { // look to the left
301  end = mid;
302  }
303 #else
304  const bool b_right = !(n < *mid);
305 
306  begin = (__global const nat_type*)select((intptr_t)begin, (intptr_t)(mid + 1), b_right);
307  end = (__global const nat_type*)select((intptr_t)mid, (intptr_t)end, b_right); // (c ? b : a)
308 #endif
309  }
310 
311  return (*begin == n);
312 }
313 
314 
315 bool
317  return n & 1;
318 }
319 
320 
321 bool
322 is_varsigma_odd_lower(__global const prime_type* primes, nat_type n) {
323  nat_type n_divided = n;
324  nat_type sqrt_n_divided = upper_square_root(n_divided);
326  __global const prime_type* prime_ptr = primes - 1;
327  nat_type prime;
328 
329  while ((prime = *(++prime_ptr)) <= sqrt_n_divided) {
330  unsigned int alpha = 0;
331 
332  while ((n_divided % prime) == 0) { // is divisible
333  n_divided /= prime;
334  ++alpha;
335  }
336 
337  if (alpha > 0) {
338  varsigma_odd *= divide_until_odd(sum_geometric_progression_strict(prime, alpha));
339 
340  const bool is_lower = (varsigma_odd * sigma_odd_upper_bound(n_divided) < n);
341 
342  if (is_lower) {
343  return true;
344  }
345 
346  sqrt_n_divided = upper_square_root(n_divided);
347  }
348  }
349 
350  // Remain n_divided is prime
351  if (n_divided > 1) {
352  varsigma_odd = varsigma_odd * divide_until_odd(n_divided + 1);
353  }
354 
355  return (varsigma_odd < n);
356 }
357 
358 
359 // MAIN function
360 bool
362  nat_type n_divided = n;
363  nat_type sqrt_n = upper_square_root(n_divided);
364  nat_type sigma_odd = 1;
365  __global const prime_type* prime_ptr = primes;
366  nat_type prime = *prime_ptr;
367  bool not_finished;
368 
369  unsigned int nb_prime_divisor = 0;
370  nat_type prime_divisors[15]; // primorial(15) > 2^56, thus no more 15 prime factors possible
371 
372  // Collect prime divisors
373  do {
374  const bool is_factor = ((n_divided % prime) == 0);
375 
376  if (is_factor) {
377  prime_divisors[nb_prime_divisor++] = prime;
378  }
379 
380  prime = *(++prime_ptr);
381  not_finished = (prime <= sqrt_n);
382  } while (not_finished);
383 
384  // For each prime divisors found
385  for (unsigned int i = 0; i < nb_prime_divisor; ++i) {
386  const nat_type prime = prime_divisors[i];
387  nat_type pow_prime = 1;
388  bool is_factor;
389 
390  do {
391  pow_prime *= prime;
392  n_divided /= prime;
393  is_factor = ((n_divided % prime) == 0);
394  } while (is_factor);
395 
396  sigma_odd *= (pow_prime - 1)/(prime - 1) + pow_prime;
397  }
398 
399  // If n_divided > 1 then this remain n_divided is prime,
400  // else factor 2
401  return (divide_until_odd(sigma_odd * (n_divided + 1))
402  < n);
403 }
404 
405 
406 bool
408  nat_type n_divided = n;
409  nat_type sqrt_n = upper_square_root(n_divided);
411  __global const prime_type* prime_ptr = primes;
412  nat_type prime = *prime_ptr;
413  bool not_finished;
414 
415  do {
416  nat_type pow_prime = 1;
417  bool is_factor = ((n_divided % prime) == 0);
418 
419  while (is_factor) {
420  pow_prime *= prime;
421  n_divided /= prime;
422  is_factor = ((n_divided % prime) == 0);
423  };
424 
425  varsigma_odd *= divide_until_odd((pow_prime - 1)/(prime - 1) + pow_prime);
426 
427  prime = *(++prime_ptr);
428  not_finished = (prime <= sqrt_n);
429  } while (not_finished);
430 
431  // If n_divided > 1 then this remain n_divided is prime
432  varsigma_odd = varsigma_odd * divide_until_odd(n_divided + 1);
433 
434  return (varsigma_odd < n);
435 }
436 
437 
438 nat_type
439 pow_nat(nat_type n, unsigned int k) {
440  nat_type product = 1;
441 
442  while (k != 0) {
443  if (is_even(k)) {
444  k >>= 1;
445  n *= n;
446  }
447  else {
448  --k;
449  product *= n;
450  }
451  }
452 
453  return product;
454 }
455 
456 
457 nat_type
459  return n*n;
460 }
461 
462 
463 nat_type
465  // 1 + r + r^2 + r^3 + ... + r^k = (r^(k + 1) - 1) / (r - 1)
466  // = (r^k - 1) / (r - 1) + r^k ! to avoid some possible overflows
467  const nat_type rk = pow_nat(r, k);
468 
469  return (rk - 1)/(r - 1) + rk;
470 }
471 
472 
473 nat_type
475  return (n == 1
476  ? 1
477  : (((n*((ceil_eighth_root(n - 1) << 1) + 1)) >> 1)));
478 }
479 
480 
481 nat_type
483  nat_type sqrt_n = (nat_type)native_sqrt((float)n);
484 
485  /* Correct possible rounding error due to floating point computation */
486  bool is_lower = (square(sqrt_n) < n);
487 
488  while (is_lower) { /* get the first value equal or too big */
489  is_lower = (square(++sqrt_n) < n);
490  }
491 
492  return sqrt_n;
493 }
494 
495 
496 nat_type
497 varsigma_odd(__global const prime_type* primes, nat_type n) {
498  nat_type n_divided = n;
499  nat_type sqrt_n_divided = upper_square_root(n_divided);
501  __global const prime_type* prime_ptr = primes - 1;
502  nat_type prime;
503 
504  while ((prime = *(++prime_ptr)) <= sqrt_n_divided) {
505  unsigned int alpha = 0;
506 
507  while ((n_divided % prime) == 0) { // is divisible
508  n_divided /= prime;
509  ++alpha;
510  }
511 
512  if (alpha > 0) {
513  sqrt_n_divided = upper_square_root(n_divided);
514  varsigma_odd *= divide_until_odd(sum_geometric_progression_strict(prime, alpha));
515  }
516  }
517 
518  // Remain n_divided is prime
519  if (n_divided > 1) {
520  varsigma_odd = varsigma_odd * divide_until_odd(n_divided + 1);
521  }
522 
523  return varsigma_odd;
524 }
525 
526 
527 
528 /* ******
529  * Main *
530  ********/
531 #ifndef ONLY_MAIN_KERNEL
532 __kernel
533 void
534 check_ns(__global const prime_type* primes,
535  __global const nat_type* ns, unsigned int nb,
536  __global unsigned char* results) {
537  const unsigned int i = get_global_id(0);
538 
539  if (i < nb) {
540  results[i] = is_varsigma_odd_lower(primes, ns[i]);
541  }
542 }
543 #endif
544 
545 
546 // MAIN kernel
547 __kernel
548 void
549 check_ns__optimized(__global const prime_type* primes,
550  __global const nat_type* ns, unsigned int nb,
551  __global unsigned char* results) {
552  const unsigned int i = get_global_id(0);
553 
554  if (i < nb) {
555  results[i] = is_varsigma_odd_lower__optimized(primes, ns[i]);
556  }
557 }
558 
559 
560 #ifndef ONLY_MAIN_KERNEL
561 __kernel
562 void
563 check_ns__simplified(__global const prime_type* primes,
564  __global const nat_type* ns, unsigned int nb,
565  __global unsigned char* results) {
566  const unsigned int i = get_global_id(0);
567 
568  if (i < nb) {
569  results[i] = is_varsigma_odd_lower__simplified(primes, ns[i]);
570  }
571 }
572 #endif
573 
574 
575 // MAIN kernel: other approach
576 __kernel
577 void
578 compute_partial_sigma_odd(__global const prime_type* primes,
579  unsigned int prime_offset, unsigned int opencl_nb,
580  nat_type n,
581  __global nat_type* factors) {
582  const unsigned int i = get_global_id(0);
583  const prime_type prime = primes[prime_offset + i];
584 
585  // Each unit try to divide with its prime
586  {
587  const nat_type start_n = n;
588 
589  {
590  bool is_factor = is_divide(prime, n);
591 
592  while (is_factor) {
593  n /= prime;
594  is_factor = is_divide(prime, n);
595  }
596  }
597 
598  n = start_n/n;
599  }
600 
601  factors[i] = n; // == prime^{alpha} (== 1 if prime not divides start_n)
602 
603  {
604  const bool is_not_receiver = is_odd(i);
605 
606  if (is_not_receiver) {
607  return;
608  }
609  }
610 
611  // Compute sigma_odd for each prime factor and begin collect
612  {
613  const unsigned int j = i + 1;
614 
615  barrier(CLK_LOCAL_MEM_FENCE);
616 
617  factors[i] *= factors[j]; // factors product
618  factors[j] = (((n - 1)/(prime - 1) + n)
619  * ((factors[j] - 1)/(primes[prime_offset + j] - 1) + factors[j])); // sigma_odd product
620  }
621 
622  // Collect factors for each prime to one unique factor
623  unsigned int power2 = 2;
624 
625  while (power2 < opencl_nb) {
626  const unsigned int double_power2 = power2 << 1;
627 
628  {
629  const bool is_not_receiver = ((i & (double_power2 - 1)) != 0); // double_power2 not divides i
630 
631  if (is_not_receiver) {
632  return;
633  }
634  }
635 
636  const unsigned int j = i + power2;
637 
638  barrier(CLK_LOCAL_MEM_FENCE);
639 
640  factors[i] *= factors[j]; // factors product
641  factors[i + 1] *= factors[j + 1]; // sigma_odd product
642 
643  power2 = double_power2;
644  }
645 
646  // factors[0] == product of p_i^{alpha_i} for all p_i primes checked in this step
647  // factors[1] == sigma_odd(results[0])
648 }
649 
650 
651 
652 #ifndef ONLY_MAIN_KERNEL
653 /* *****************
654  * Mains for tests *
655  *******************/
656 __kernel
657 void
658 test__divide_until_odd(__global const prime_type* primes,
659  __global const nat_type* ns, __global nat_type* results) {
660  const unsigned int i = get_global_id(0);
661 
662  results[i] = divide_until_odd(ns[i]);
663 }
664 
665 
666 __kernel
667 void
668 test__floor_square_root(__global const prime_type* primes,
669  __global const nat_type* ns, __global nat_type* results) {
670  const unsigned int i = get_global_id(0);
671 
672  results[i] = floor_square_root(ns[i]);
673 }
674 
675 
676 __kernel
677 void
678 test__pow_nat(__global const prime_type* primes,
679  __global const nat_type* ns, __global nat_type* results) {
680  const unsigned int i = get_global_id(0);
681 
682  results[i] = pow_nat(ns[i], i % 5);
683 }
684 
685 
686 __kernel
687 void
689  __global const nat_type* ns, __global nat_type* results) {
690  const unsigned int i = get_global_id(0);
691 
692  results[i] = sum_geometric_progression_strict(ns[i], i % 16);
693 }
694 
695 
696 __kernel
697 void
698 test__varsigma_odd(__global const prime_type* primes,
699  __global const nat_type* ns, __global nat_type* results) {
700  const unsigned int i = get_global_id(0);
701 
702  results[i] = varsigma_odd(primes, ns[i]);
703 }
704 #endif
__kernel void check_ns__optimized(__global const prime_type *primes, __global const nat_type *ns, unsigned int nb, __global unsigned char *results)
Definition: sigmaodd.cl:549
unsigned long nat_type
Type for natural number used in all code, on 64 bits.
Definition: sigmaodd.cl:22
nat_type pow_nat(nat_type n, unsigned int k)
Return x^k, x power k.
Definition: sigmaodd.cl:439
nat_type upper_square_root(nat_type n)
Return an approximation of the square root of n. The results is always >= the exact square root...
Definition: sigmaodd.cl:482
bool is_varsigma_odd_lower(__global const prime_type *primes, nat_type n)
Return true iff varsigma_odd(n) < n.
Definition: sigmaodd.cl:322
nat_type floor_square_root(nat_type n)
Return the square root of n rounded to below.
Definition: sigmaodd.cl:256
bool is_odd(nat_type n)
Return true iff n is odd.
Definition: sigmaodd.cl:316
__kernel void test__pow_nat(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:678
nat_type ceil_eighth_root(nat_type n)
Return the eighth root of n rounded to above.
Definition: sigmaodd.cl:208
bool is_varsigma_odd_lower__simplified(__global const prime_type *primes, nat_type n)
Simplified version of is_varsigma_odd_lower().
Definition: sigmaodd.cl:407
bool is_even(nat_type n)
Return true iff n is even.
Definition: sigmaodd.cl:283
__kernel void test__sum_geometric_progression_strict(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:688
nat_type sum_geometric_progression_strict(nat_type r, unsigned int k)
Return the sum of the (k + 1) terms of the geometric progression of the common ratio r...
Definition: sigmaodd.cl:464
bool is_in_table(__global const nat_type *begin, __global const nat_type *end, const nat_type n)
Return true iff n is in the table.
Definition: sigmaodd.cl:289
__kernel void test__divide_until_odd(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:658
__kernel void compute_partial_sigma_odd(__global const prime_type *primes, unsigned int prime_offset, unsigned int opencl_nb, nat_type n, __global nat_type *factors)
Definition: sigmaodd.cl:578
__kernel void test__floor_square_root(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:668
unsigned int prime_type
Type for prime number, particularly for the table of primes.
Definition: sigmaodd.cl:28
nat_type floor_eighth_root(nat_type n)
Return the eighth root of n rounded to below.
Definition: sigmaodd.cl:235
__kernel void test__varsigma_odd(__global const prime_type *primes, __global const nat_type *ns, __global nat_type *results)
Definition: sigmaodd.cl:698
__kernel void check_ns__simplified(__global const prime_type *primes, __global const nat_type *ns, unsigned int nb, __global unsigned char *results)
Definition: sigmaodd.cl:563
bool is_varsigma_odd_lower__optimized(__global const prime_type *primes, nat_type n)
Version of is_varsigma_odd_lower() rewritten to OpenCL.
Definition: sigmaodd.cl:361
nat_type sigma_odd_upper_bound(nat_type n)
Return an upper bound of sigma_odd(n).
Definition: sigmaodd.cl:474
const double alpha
Definition: harmonic.cpp:24
nat_type square(nat_type n)
Return n*n.
Definition: sigmaodd.cl:458
bool is_divide(nat_type d, nat_type n)
Return true iff d divide n, i.e. if n is divisible by d.
Definition: sigmaodd.cl:277
nat_type divide_until_odd(nat_type n)
Return n divided by 2 until the result is odd.
Definition: sigmaodd.cl:218
__kernel void check_ns(__global const prime_type *primes, __global const nat_type *ns, unsigned int nb, __global unsigned char *results)
Definition: sigmaodd.cl:534
nat_type varsigma_odd(__global const prime_type *primes, nat_type n)
Return varsigma_odd(n), i.e. the sum of all odd divisors of n, divided by 2 until to be odd...
Definition: sigmaodd.cl:497