14 #include "../../common/helper/helper.hpp" 15 #include "../../sequential/sequential/sequential.hpp" 32 unsigned int opencl_nb_number,
33 bool print_bad,
bool print_time) {
36 assert(first_n <= last_n);
44 assert(
sizeof(
nat_type) ==
sizeof(cl_ulong));
49 cl_ulong cumulative_time = 0;
57 const VECTOR_CLASS<cl::Device> devices = {device};
58 const cl::Context context{device};
63 size_t ns_byte_size = opencl_nb_number*
sizeof(
nat_type);
64 size_t results_byte_size = opencl_nb_number*
sizeof(cl_uchar);
66 cl::Buffer primes_buffer{context, CL_MEM_READ_ONLY, primes_byte_size};
67 cl::Buffer ns_buffer{context, CL_MEM_READ_ONLY, ns_byte_size};
68 cl::Buffer results_buffer{context, CL_MEM_WRITE_ONLY, results_byte_size};
72 const cl::CommandQueue queue{context, device, CL_QUEUE_PROFILING_ENABLE};
74 error = queue.enqueueWriteBuffer(primes_buffer, CL_TRUE, 0, primes_byte_size,
80 cl::Program program{context, kernel_src};
81 error = program.build(devices,
"-DONLY_MAIN_KERNEL -cl-single-precision-constant -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-unsafe-math-optimizations -cl-finite-math-only -cl-fast-relaxed-math" 92 error = program.getBuildInfo(device, CL_PROGRAM_BUILD_LOG, &log);
94 std::cout << log << std::endl;
99 cl::Kernel kernel{program,
"check_ns__optimized"};
104 size_t work_group_size;
106 error = kernel.getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE, &work_group_size);
107 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
109 size_t compile_work_group_size[3];
111 error = kernel.getWorkGroupInfo(device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, &compile_work_group_size);
112 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)", &compile_work_group_size);
114 cl_ulong local_mem_size;
116 error = kernel.getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE, &local_mem_size);
117 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
119 size_t preferred_work_group_size_multiple;
121 error = kernel.getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &preferred_work_group_size_multiple);
122 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
124 cl_ulong private_mem_size;
126 error = kernel.getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE, &private_mem_size);
127 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_PRIVATE_MEM_SIZE)");
131 error = kernel.setArg(0, primes_buffer);
132 print_error(error,
"clSetKernelArg(0, primes_buffer)");
134 error = kernel.setArg(1, ns_buffer);
135 print_error(error,
"clSetKernelArg(1, ns_buffer)");
137 error = kernel.setArg(3, results_buffer);
138 print_error(error,
"clSetKernelArg(3, results_buffer)");
140 std::vector<nat_type> ns(opencl_nb_number);
141 std::vector<unsigned char> results(opencl_nb_number);
144 std::set<nat_type> bad_table;
147 for (
nat_type n = first_n; n <= last_n; n += 2) {
153 if ((nb == opencl_nb_number) || (n == last_n)) {
154 error = kernel.setArg(2, nb);
157 const size_t nb_byte_size = nb*
sizeof(
nat_type);
159 error = queue.enqueueWriteBuffer(ns_buffer, CL_TRUE, 0, nb_byte_size, ns.data());
162 error = queue.enqueueNDRangeKernel(kernel,
164 cl::NDRange(opencl_nb_number),
173 error =
event.wait();
176 error =
event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
178 error =
event.getProfilingInfo(CL_PROFILING_COMMAND_END, &stop);
181 cumulative_time += stop - start;
183 error = queue.enqueueReadBuffer(results_buffer, CL_TRUE, 0, results_byte_size, results.data());
186 for (
unsigned int i = 0; i < nb; ++i) {
190 bad_table.insert(bad);
192 std::cout << bad << std::endl;
205 std::cout <<
"# bad numbers: " << bad_table.size() << std::endl;
209 std::cout <<
"Total OpenCL duration:\t" 220 unsigned int opencl_nb,
221 bool print_bad,
bool print_time) {
223 assert(3 <= first_n);
224 assert(first_n <= last_n);
227 assert(opencl_nb >= 2);
235 assert(
sizeof(
nat_type) ==
sizeof(cl_ulong));
238 assert(
sizeof(
prime_type) ==
sizeof(cl_uint));
240 cl_ulong cumulative_time = 0;
248 const VECTOR_CLASS<cl::Device> devices = {device};
249 const cl::Context context{device};
254 size_t factors_byte_size = opencl_nb*
sizeof(
nat_type);
256 cl::Buffer primes_buffer{context, CL_MEM_READ_ONLY, primes_byte_size};
257 cl::Buffer factors_buffer{context, CL_MEM_READ_WRITE, factors_byte_size};
261 const cl::CommandQueue queue{context, device, CL_QUEUE_PROFILING_ENABLE};
263 error = queue.enqueueWriteBuffer(primes_buffer, CL_TRUE, 0, primes_byte_size,
269 cl::Program program{context, kernel_src};
270 error = program.build(devices,
"-DONLY_MAIN_KERNEL -cl-single-precision-constant -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-unsafe-math-optimizations -cl-finite-math-only -cl-fast-relaxed-math" 281 error = program.getBuildInfo(device, CL_PROGRAM_BUILD_LOG, &log);
283 std::cout << log << std::endl;
287 cl::Kernel kernel{program,
"compute_partial_sigma_odd"};
291 size_t work_group_size;
293 error = kernel.getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE, &work_group_size);
294 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
296 size_t compile_work_group_size[3];
298 error = kernel.getWorkGroupInfo(device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, &compile_work_group_size);
299 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)", &compile_work_group_size);
301 cl_ulong local_mem_size;
303 error = kernel.getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE, &local_mem_size);
304 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
306 size_t preferred_work_group_size_multiple;
308 error = kernel.getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &preferred_work_group_size_multiple);
309 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
311 cl_ulong private_mem_size;
313 error = kernel.getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE, &private_mem_size);
314 print_error(error,
"clGetKernelWorkGroupInfo(CL_KERNEL_PRIVATE_MEM_SIZE)");
318 error = kernel.setArg(0, primes_buffer);
319 print_error(error,
"clSetKernelArg(0, primes_buffer)");
321 error = kernel.setArg(2, opencl_nb);
322 print_error(error,
"clSetKernelArg(2, opencl_nb)");
324 error = kernel.setArg(4, factors_buffer);
325 print_error(error,
"clSetKernelArg(4, factors_buffer)");
329 std::set<nat_type> bad_table;
331 for (
nat_type n = first_n; n <= last_n; n += 2) {
333 unsigned int prime_offset = 0;
339 error = kernel.setArg(1, prime_offset);
341 print_error(error,
"clSetKernelArg(1, prime_offset)");
344 assert(n_divided != 0);
346 error = kernel.setArg(3, n_divided);
348 print_error(error,
"clSetKernelArg(3, n_divided)");
351 error = queue.enqueueNDRangeKernel(kernel,
353 cl::NDRange(opencl_nb),
364 error =
event.wait();
369 error =
event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
373 error =
event.getProfilingInfo(CL_PROFILING_COMMAND_END, &stop);
378 cumulative_time += stop - start;
382 error = queue.enqueueReadBuffer(factors_buffer, CL_TRUE, 0,
sizeof(
nat_type)*2, &results);
387 assert(results[0] != 0);
388 assert(results[0] <= n_divided);
390 if (results[0] > 1) {
393 n_divided /= results[0];
396 assert(results[1] != 0);
402 sqrt_n_divided) < n) {
406 prime_offset = std::min(prime_offset + opencl_nb,
408 }
while ((n_divided > 1)
415 if (varsigma_odd > n) {
418 std::cout << n << std::endl;
426 std::cout <<
"# bad numbers: " << bad_table.size() << std::endl;
430 std::cout <<
"Total OpenCL duration:\t" 439 std::vector<nat_type>
441 assert(
sizeof(
nat_type) ==
sizeof(cl_ulong));
442 assert(
sizeof(
prime_type) ==
sizeof(cl_uint));
449 const VECTOR_CLASS<cl::Device> devices = {device};
450 const cl::Context context{device};
455 size_t ns_byte_size = ns.size()*
sizeof(
nat_type);
457 cl::Buffer primes_buffer{context, CL_MEM_READ_ONLY, primes_byte_size};
458 cl::Buffer ns_buffer{context, CL_MEM_READ_ONLY, ns_byte_size};
459 cl::Buffer results_buffer{context, CL_MEM_WRITE_ONLY, ns_byte_size};
463 const cl::CommandQueue queue{context, device, CL_QUEUE_PROFILING_ENABLE};
465 queue.enqueueWriteBuffer(primes_buffer, CL_TRUE, 0, primes_byte_size,
470 cl::Program program{context, kernel_src};
472 error = program.build(devices);
475 cl::Kernel kernel{program,
"check_ns"};
477 error = kernel.setArg(0, primes_buffer);
478 print_error(error,
"clSetKernelArg(0, primes_buffer)");
480 error = kernel.setArg(1, ns_buffer);
481 print_error(error,
"clSetKernelArg(1, ns_buffer)");
483 error = kernel.setArg(2, results_buffer);
484 print_error(error,
"clSetKernelArg(2, results_buffer)");
488 queue.enqueueWriteBuffer(ns_buffer, CL_TRUE, 0, ns_byte_size, ns.data());
489 error = queue.enqueueNDRangeKernel(kernel,
491 cl::NDRange(ns.size()),
499 std::vector<nat_type> results(ns.size());
501 queue.enqueueReadBuffer(results_buffer, CL_TRUE, 0, ns_byte_size, results.data());
503 std::vector<nat_type> bads;
505 for (
unsigned int i = 0; i < results.size(); ++i) {
507 bads.push_back(ns[i]);
515 std::vector<nat_type>
517 const std::vector<nat_type> &ns) {
518 assert(
sizeof(
nat_type) ==
sizeof(cl_ulong));
519 assert(
sizeof(
prime_type) ==
sizeof(cl_uint));
525 const VECTOR_CLASS<cl::Device> devices = {device};
526 const cl::Context context{device};
531 size_t ns_byte_size = ns.size()*
sizeof(
nat_type);
533 cl::Buffer primes_buffer{context, CL_MEM_READ_ONLY, primes_byte_size};
534 cl::Buffer ns_buffer{context, CL_MEM_READ_ONLY, ns_byte_size};
535 cl::Buffer results_buffer{context, CL_MEM_WRITE_ONLY, ns_byte_size};
539 const cl::CommandQueue queue{context, device};
541 queue.enqueueWriteBuffer(primes_buffer, CL_TRUE, 0, primes_byte_size,
543 queue.enqueueWriteBuffer(ns_buffer, CL_TRUE, 0, ns_byte_size, ns.data());
547 cl::Program program{context, kernel_src};
549 program.build(devices);
551 cl::Kernel kernel{program, kernal_name.c_str()};
553 kernel.setArg(0, primes_buffer);
554 kernel.setArg(1, ns_buffer);
555 kernel.setArg(2, results_buffer);
557 queue.enqueueNDRangeKernel(kernel,
559 cl::NDRange(ns.size()),
564 std::vector<nat_type> results(ns.size());
566 queue.enqueueReadBuffer(results_buffer, CL_TRUE, 0, ns_byte_size, results.data());
std::vector< nat_type > opencl_run_program_on_ns(const std::string &filename, const std::string &kernal_name, const std::vector< nat_type > &ns)
Run the OpenCL program from filename on ns. Used for tests.
cl::Device get_first_device_gpu()
Return the first GPU device found. If not found then print an error message and exit.
std::set< nat_type > opencl_check_gentle_varsigma_odd(nat_type first_n, nat_type last_n, unsigned int opencl_nb_number, bool print_bad, bool print_time)
Check in the order all odd gentle numbers between first_n and last_n, and if print_bad then print all...
nat_type floor_square_root(nat_type n)
Return the square root of n rounded to below.
std::vector< nat_type > opencl_check_ns(const std::vector< nat_type > &ns)
Check all numbers in ns and return vector of all bad numbers found.
constexpr bool is_odd(nat_type n)
Return true iff n is odd.
std::string duration_ms_to_string(double duration_ms)
Return a string with the duration expressed in milliseconds, seconds, minutes and hours...
nat_type divide_until_odd(nat_type n)
Return n divided by 2 until the result is odd.
sigmaodd::nat_type nat_type
constexpr nat_type MAX_POSSIBLE_N
Lower bound of the bigger number such that it is possible to compute the result of the sigma function...
constexpr bool is_divide(nat_type d, nat_type n)
Return true iff d divide n, i.e. if n is divisible by d.
const prime_type * odd_primes_table_ptr()
Return a pointer to the first number in the precalculated table.
void print_error(cl_int code, std::string message, bool only_if_error, bool exit_if_error)
Print an error message corresponding to the error code.
constexpr nat_type sequential_sigma_odd_upper_bound_with_sqrt(nat_type n, const std::set< nat_type > &bad_table, nat_type bad_first_n, nat_type sqrt_n)
Return an upper bound of varsigma_odd(n).
std::string file_to_string(std::string filename)
Read the file and return its content to a string. If failed then print a error message and exit...
constexpr bool is_first_mersenne_prime_unitary_divide_or_square(nat_type n)
Return true iff is_first_mersenne_prime_unitary_divide(n) or is_square(n).
constexpr unsigned int odd_primes_table_nb()
Return the number of odd prime numbers in the precalculated table.
std::set< nat_type > opencl_check_gentle_varsigma_odd__parallelize_factorization(nat_type first_n, nat_type last_n, unsigned int opencl_nb, bool print_bad, bool print_time)
Like opencl_check_gentle_varsigma_odd() but instead parallelize on group of opencl_nb numbers...
sigmaodd::prime_type prime_type
Implementation of the OpenCL parallel algorithms presented in the report.
nat_type varsigma_odd(__global const prime_type *primes, nat_type n)
Return varsigma_odd(n), i.e. the sum of all odd divisors of n, divided by 2 until to be odd...