aboutsummaryrefslogtreecommitdiffstats
path: root/lib/fosphor/cl.c
diff options
context:
space:
mode:
authorSylvain Munaut <tnt@246tNt.com>2013-10-13 22:05:38 +0200
committerSylvain Munaut <tnt@246tNt.com>2013-10-13 22:05:38 +0200
commitaabc21b7f2055b8622d27402970f2064f05d9440 (patch)
treeb73aba7bde9da77169ff108e9be54870a1fa83ee /lib/fosphor/cl.c
parentedfee19c617d88a4e465757a1223cb2e67ddebcc (diff)
fosphor: Initial import of the core code
Signed-off-by: Sylvain Munaut <tnt@246tNt.com>
Diffstat (limited to 'lib/fosphor/cl.c')
-rw-r--r--lib/fosphor/cl.c632
1 files changed, 632 insertions, 0 deletions
diff --git a/lib/fosphor/cl.c b/lib/fosphor/cl.c
new file mode 100644
index 0000000..22f0d6d
--- /dev/null
+++ b/lib/fosphor/cl.c
@@ -0,0 +1,632 @@
+/*
+ * cl.c
+ *
+ * OpenCL base routines
+ *
+ * Copyright (C) 2013 Sylvain Munaut
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*! \addtogroup cl
+ * @{
+ */
+
+/*! \file cl.c
+ * \brief OpenCL base routines
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#include <CL/cl_gl.h>
+
+#include "cl_platform.h"
+
+#if defined(__APPLE__) || defined(MAXOSX)
+# include <OpenGL/OpenGL.h>
+# include <OpenGL/gl.h>
+#elif defined(_WIN32)
+# include <Windows.h>
+# include <Wingdi.h>
+#else
+# include <GL/glx.h>
+#endif
+
+#include "config.h"
+#include "cl.h"
+#include "gl.h"
+#include "resource.h"
+
+
+struct fosphor_cl_features
+{
+#define FLG_CL_GL_SHARING (1<<0)
+#define FLG_CL_NVIDIA_SM11 (1<<1)
+#define FLG_CL_OPENCL_11 (1<<2)
+
+ cl_device_type type;
+ int local_mem;
+ int flags;
+ int wg_size;
+ int wg_size_dim[2];
+};
+
+struct fosphor_cl_state
+{
+ cl_platform_id pl_id;
+ cl_device_id dev_id;
+ cl_context ctx;
+ cl_command_queue cq;
+
+ /* Features */
+ struct fosphor_cl_features feat;
+
+ /* FFT */
+ cl_mem mem_fft_in;
+ cl_mem mem_fft_out;
+
+ cl_program prog_fft;
+ cl_kernel kern_fft;
+
+ /* Display */
+ cl_mem mem_waterfall;
+ cl_mem mem_histogram;
+ cl_mem mem_spectrum;
+
+ cl_program prog_display;
+ cl_kernel kern_display;
+
+ /* Histogram range */
+ float histo_scale;
+ float histo_offset;
+
+ /* Waterfall position */
+ int waterfall_pos;
+};
+
+/* -------------------------------------------------------------------------- */
+/* Helpers / Internal API */
+/* -------------------------------------------------------------------------- */
+
+#define MAX_PLATFORMS 16
+#define MAX_DEVICES 16
+
+#define CL_ERR_CHECK(v, msg) \
+ if ((v) != CL_SUCCESS) { \
+ fprintf(stderr, "[!] CL Error (%d, %s:%d): %s\n", \
+ (v), __FILE__, __LINE__, msg); \
+ goto error; \
+ }
+
+
+static int
+cl_device_query(cl_device_id dev_id, struct fosphor_cl_features *feat)
+{
+ cl_ulong lmem;
+ char txt[2048];
+ cl_int err;
+ int has_nv_attr;
+
+ memset(feat, 0x00, sizeof(struct fosphor_cl_features));
+
+ /* Device type */
+ err = clGetDeviceInfo(dev_id, CL_DEVICE_TYPE, sizeof(cl_device_type), &feat->type, NULL);
+ if (err != CL_SUCCESS)
+ return -1;
+
+ /* Local memory size */
+ err = clGetDeviceInfo(dev_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &lmem, NULL);
+ if (err != CL_SUCCESS)
+ return -1;
+
+ feat->local_mem = lmem;
+
+ /* CL/GL extension */
+ err = clGetDeviceInfo(dev_id, CL_DEVICE_EXTENSIONS, sizeof(txt)-1, txt, NULL);
+ if (err != CL_SUCCESS)
+ return -1;
+
+ txt[sizeof(txt)-1] = 0;
+
+ /* Check for CL/GL sharing */
+ if (strstr(txt, "cl_khr_gl_sharing") || strstr(txt, "cl_APPLE_gl_sharing"))
+ feat->flags |= FLG_CL_GL_SHARING;
+
+ /* Check for NV attributes */
+ has_nv_attr = !!strstr(txt, "cl_nv_device_attribute_query");
+
+ /* Check OpenCL 1.1 compat */
+ err = clGetDeviceInfo(dev_id, CL_DEVICE_VERSION, sizeof(txt)-1, txt, NULL);
+ if (err != CL_SUCCESS)
+ return -1;
+
+ txt[sizeof(txt)-1] = 0;
+
+ if (!memcmp(txt, "OpenCL 1.", 9) && txt[9] >= '1')
+ feat->flags |= FLG_CL_OPENCL_11;
+
+ /* Check if a NVidia SM11 architecture */
+ if (has_nv_attr) {
+ cl_uint nv_maj, nv_min;
+
+ err = clGetDeviceInfo(dev_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
+ sizeof(cl_uint), &nv_maj, NULL);
+ if (err != CL_SUCCESS)
+ return -1;
+
+ err = clGetDeviceInfo(dev_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
+ sizeof(cl_uint), &nv_min, NULL);
+ if (err != CL_SUCCESS)
+ return -1;
+
+ if ((nv_maj == 1) && (nv_min == 1))
+ feat->flags |= FLG_CL_NVIDIA_SM11;
+ }
+
+ return 0;
+}
+
+static int
+cl_device_score(cl_device_id dev_id, struct fosphor_cl_features *feat)
+{
+ int rv, score = 0;
+
+ /* Query device */
+ rv = cl_device_query(dev_id, feat);
+ if (rv)
+ return rv;
+
+ /* Check compatibility */
+ if (!(feat->flags & FLG_CL_GL_SHARING))
+ return -1;
+
+ if (!(feat->flags & (FLG_CL_NVIDIA_SM11 | FLG_CL_OPENCL_11)))
+ return -1;
+
+ /* Prefer GPU */
+ if (feat->type == CL_DEVICE_TYPE_GPU)
+ score += 1000;
+
+ /* Bigger local mem */
+ score += (feat->local_mem >> 10);
+
+ return score;
+}
+
+static int
+cl_find_device(cl_platform_id *pl_id_p, cl_device_id *dev_id_p,
+ struct fosphor_cl_features *feat)
+{
+ cl_platform_id pl_list[MAX_PLATFORMS], pl_id;
+ cl_device_id dev_list[MAX_DEVICES], dev_id;
+ cl_uint pl_count, dev_count, i, j;
+ cl_int err;
+ int score = -1;
+
+ /* Scan each platforms */
+ err = clGetPlatformIDs(MAX_PLATFORMS, pl_list, &pl_count);
+ CL_ERR_CHECK(err, "Unable to fetch platform IDs");
+
+ for (i=0; i<pl_count; i++)
+ {
+ /* Scan all devices */
+ err = clGetDeviceIDs(pl_list[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, dev_list, &dev_count);
+ CL_ERR_CHECK(err, "Unable to fetch device IDs");
+
+ for (j=0; j<dev_count; j++)
+ {
+ struct fosphor_cl_features feat_cur;
+ int s = cl_device_score(dev_list[j], &feat_cur);
+ if (s > score) {
+ pl_id = pl_list[i];
+ dev_id = dev_list[j];
+ memcpy(feat, &feat_cur, sizeof(struct fosphor_cl_features));
+ score = s;
+ }
+ }
+ }
+
+ /* Did we get a good fit ? */
+ if (score >= 0) {
+ *pl_id_p = pl_id;
+ *dev_id_p = dev_id;
+ err = 0;
+ } else {
+ err = -ENODEV;
+ }
+
+error:
+ return err;
+}
+
+static cl_program
+cl_load_program(cl_device_id dev_id, cl_context ctx,
+ const char *resource_name, const char *opts)
+{
+ cl_program prog = NULL;
+ const char *src;
+ cl_int err;
+
+ /* Grab resource */
+ src = resource_get(resource_name, NULL);
+ if (!src) {
+ fprintf(stderr, "[!] Unable to load non-existent resource '%s'\n", resource_name);
+ goto error;
+ }
+
+ /* Create the program from sources */
+ prog = clCreateProgramWithSource(ctx, 1, (const char **)&src, NULL, &err);
+ CL_ERR_CHECK(err, "Failed to create program");
+
+ /* Build it */
+ err = clBuildProgram(prog, 0, NULL, opts, NULL, NULL);
+
+#ifndef DEBUG_CL
+ if (err != CL_SUCCESS)
+#endif
+ {
+ size_t len;
+ const int txt_buf_len = 1024 * 1024;
+ char *txt_buf;
+
+ txt_buf = malloc(txt_buf_len);
+ clGetProgramBuildInfo(prog, dev_id, CL_PROGRAM_BUILD_LOG, txt_buf_len, txt_buf, &len);
+ fprintf(stderr, "Build log for '%s':\n%s\n\n---\n", resource_name, txt_buf);
+ free(txt_buf);
+ }
+
+ CL_ERR_CHECK(err, "Failed to build program");
+
+ /* All good */
+ return prog;
+
+ /* Error path */
+error:
+ if (prog)
+ clReleaseProgram(prog);
+
+ return NULL;
+}
+
+static int
+cl_do_init(struct fosphor_cl_state *cl, struct fosphor_gl_state *gl)
+{
+ cl_context_properties ctx_props[7];
+ const char *disp_opts;
+ cl_int err;
+
+ /* Context */
+#if defined(__APPLE__) || defined(MAXOSX)
+
+ /* OSX variant */
+ ctx_props[0] = CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE;
+ ctx_props[1] = (cl_context_properties) CGLGetShareGroup(CGLGetCurrentContext());
+ ctx_props[2] = 0;
+
+#elif defined(__WIN32__)
+
+ /* Win 32 variant */
+ ctx_props[0] = CL_GL_CONTEXT_KHR;
+ ctx_props[1] = (cl_context_properties) wglGetCurrentContext();
+ ctx_props[2] = CL_WGL_HDC_KHR;
+ ctx_props[3] = (cl_context_properties) wglGetCurrentDC();
+ ctx_props[4] = CL_CONTEXT_PLATFORM;
+ ctx_props[5] = (cl_context_properties) cl->pl_id;
+ ctx_props[6] = 0;
+
+#else
+
+ /* Linux variant */
+ ctx_props[0] = CL_GL_CONTEXT_KHR;
+ ctx_props[1] = (cl_context_properties) glXGetCurrentContext();
+ ctx_props[2] = CL_GLX_DISPLAY_KHR;
+ ctx_props[3] = (cl_context_properties) glXGetCurrentDisplay();
+ ctx_props[4] = CL_CONTEXT_PLATFORM;
+ ctx_props[5] = (cl_context_properties) cl->pl_id;
+ ctx_props[6] = 0;
+
+#endif
+
+ cl->ctx = clCreateContext(ctx_props, 1, &cl->dev_id, NULL, NULL, &err);
+ CL_ERR_CHECK(err, "Unable to create context");
+
+ /* Command Queue */
+ cl->cq = clCreateCommandQueue(cl->ctx, cl->dev_id, 0, &err);
+ CL_ERR_CHECK(err, "Unable to create command queue");
+
+ /* FFT in/out buffers */
+ cl->mem_fft_in = clCreateBuffer(cl->ctx,
+ CL_MEM_READ_ONLY,
+ 2 * sizeof(cl_float) * FOSPHOR_FFT_LEN * FOSPHOR_FFT_MAX_BATCH,
+ NULL,
+ &err
+ );
+ CL_ERR_CHECK(err, "Unable to allocate FFT input buffer");
+
+ cl->mem_fft_out = clCreateBuffer(cl->ctx,
+ CL_MEM_READ_WRITE,
+ 2 * sizeof(cl_float) * FOSPHOR_FFT_LEN * FOSPHOR_FFT_MAX_BATCH,
+ NULL,
+ &err
+ );
+ CL_ERR_CHECK(err, "Unable to allocate FFT output buffer");
+
+ /* FFT program/kernels */
+ cl->prog_fft = cl_load_program(cl->dev_id, cl->ctx, "fft.cl", NULL);
+ if (!cl->prog_fft)
+ goto error;
+
+ cl->kern_fft = clCreateKernel(cl->prog_fft, "fft1D_1024", &err);
+ CL_ERR_CHECK(err, "Unable to create FFT kernel");
+
+ /* Configure static FFT kernel args */
+ err = clSetKernelArg(cl->kern_fft, 0, sizeof(cl_mem), &cl->mem_fft_in);
+ err |= clSetKernelArg(cl->kern_fft, 1, sizeof(cl_mem), &cl->mem_fft_out);
+
+ CL_ERR_CHECK(err, "Unable to configure FFT kernel");
+
+ /* GL shared objects */
+ /* Waterfall texture */
+ cl->mem_waterfall = clCreateFromGLTexture2D(cl->ctx,
+ CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0,
+ fosphor_gl_get_shared_id(gl, GL_ID_TEX_WATERFALL),
+ &err
+ );
+ CL_ERR_CHECK(err, "Unable to share waterfall texture into OpenCL context");
+
+ /* Histogram texture */
+ cl->mem_histogram = clCreateFromGLTexture2D(cl->ctx,
+ CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0,
+ fosphor_gl_get_shared_id(gl, GL_ID_TEX_HISTOGRAM),
+ &err
+ );
+ CL_ERR_CHECK(err, "Unable to share histogram texture into OpenCL context");
+
+ /* Spectrum VBO */
+ cl->mem_spectrum = clCreateFromGLBuffer(cl->ctx,
+ CL_MEM_WRITE_ONLY,
+ fosphor_gl_get_shared_id(gl, GL_ID_VBO_SPECTRUM),
+ &err
+ );
+ CL_ERR_CHECK(err, "Unable to share spectrum VBO into OpenCL context");
+
+ /* Display program/kernel */
+ disp_opts = (cl->feat.flags & FLG_CL_NVIDIA_SM11) ? "-DUSE_NV_SM11_ATOMICS" : NULL;
+ cl->prog_display = cl_load_program(cl->dev_id, cl->ctx, "display.cl", disp_opts);
+ if (!cl->prog_display)
+ goto error;
+
+ cl->kern_display = clCreateKernel(cl->prog_display, "display", &err);
+ CL_ERR_CHECK(err, "Unable to create display kernel");
+
+ /* Configure static display kernel args */
+ cl_uint fft_log2_len = 10;
+ cl_uint fft_batch = FOSPHOR_FFT_MAX_BATCH;
+ cl_float histo_t0r = 16.0f;
+ cl_float histo_t0d = 1024.0f;
+ cl_float live_alpha = 0.002f;
+
+ err = clSetKernelArg(cl->kern_display, 0, sizeof(cl_mem), &cl->mem_fft_out);
+ err |= clSetKernelArg(cl->kern_display, 1, sizeof(cl_int), &fft_log2_len);
+ err |= clSetKernelArg(cl->kern_display, 2, sizeof(cl_int), &fft_batch);
+
+ err |= clSetKernelArg(cl->kern_display, 3, sizeof(cl_mem), &cl->mem_waterfall);
+
+ err |= clSetKernelArg(cl->kern_display, 5, sizeof(cl_mem), &cl->mem_histogram);
+ err |= clSetKernelArg(cl->kern_display, 6, sizeof(cl_mem), &cl->mem_histogram);
+ err |= clSetKernelArg(cl->kern_display, 7, sizeof(cl_float), &histo_t0r);
+ err |= clSetKernelArg(cl->kern_display, 8, sizeof(cl_float), &histo_t0d);
+
+ err |= clSetKernelArg(cl->kern_display, 11, sizeof(cl_mem), &cl->mem_spectrum);
+ err |= clSetKernelArg(cl->kern_display, 12, sizeof(cl_float), &live_alpha);
+
+ CL_ERR_CHECK(err, "Unable to configure display kernel");
+
+ /* All done */
+ err = 0;
+
+error:
+ return err;
+}
+
+static void
+cl_do_release(struct fosphor_cl_state *cl)
+{
+ if (cl->kern_display)
+ clReleaseKernel(cl->kern_display);
+
+ if (cl->prog_display)
+ clReleaseProgram(cl->prog_display);
+
+ if (cl->mem_spectrum)
+ clReleaseMemObject(cl->mem_spectrum);
+
+ if (cl->mem_histogram)
+ clReleaseMemObject(cl->mem_histogram);
+
+ if (cl->mem_waterfall)
+ clReleaseMemObject(cl->mem_waterfall);
+
+ if (cl->kern_fft)
+ clReleaseKernel(cl->kern_fft);
+
+ if (cl->prog_fft)
+ clReleaseProgram(cl->prog_fft);
+
+ if (cl->mem_fft_out)
+ clReleaseMemObject(cl->mem_fft_out);
+
+ if (cl->mem_fft_in)
+ clReleaseMemObject(cl->mem_fft_in);
+
+ if (cl->cq)
+ clReleaseCommandQueue(cl->cq);
+
+ if (cl->ctx)
+ clReleaseContext(cl->ctx);
+}
+
+
+/* -------------------------------------------------------------------------- */
+/* Exposed API */
+/* -------------------------------------------------------------------------- */
+
+struct fosphor_cl_state *
+fosphor_cl_init(struct fosphor_gl_state *gl)
+{
+ struct fosphor_cl_state *cl;
+ char dev_name[128];
+ cl_int err;
+
+ /* Allocate structure */
+ cl = malloc(sizeof(struct fosphor_cl_state));
+ if (!cl)
+ return NULL;
+
+ memset(cl, 0, sizeof(struct fosphor_cl_state));
+
+ /* Find suitable device */
+ if (cl_find_device(&cl->pl_id, &cl->dev_id, &cl->feat)) {
+ fprintf(stderr, "[!] No suitable OpenCL device found\n");
+ goto error;
+ }
+
+ /* Report selected device */
+ err = clGetDeviceInfo(cl->dev_id, CL_DEVICE_NAME, sizeof(dev_name)-1, dev_name, NULL);
+ CL_ERR_CHECK(err, "Unable to fetch device name");
+
+ fprintf(stderr, "[+] Selected device: %s\n", dev_name);
+
+ /* Initialize selected platform / device */
+ err = cl_do_init(cl, gl);
+ if (err)
+ goto error;
+
+ /* Done */
+ return cl;
+
+ /* Error path */
+error:
+ fosphor_cl_release(cl);
+
+ return NULL;
+}
+
+void
+fosphor_cl_release(struct fosphor_cl_state *cl)
+{
+ /* Safety */
+ if (!cl)
+ return;
+
+ /* Release all allocated OpenCL resources */
+ cl_do_release(cl);
+
+ /* Release structure */
+ free(cl);
+}
+
+int
+fosphor_cl_process(struct fosphor_cl_state *cl,
+ void *samples, int len)
+{
+ cl_int err;
+ size_t local[2], global[2];
+ int n_spectra = len / FOSPHOR_FFT_LEN;
+ cl_mem objs[3];
+
+ /* Check length is multiple of FFT length */
+ if (len & (FOSPHOR_FFT_LEN-1))
+ return -EINVAL;
+
+ /* Copy samples data */
+ err = clEnqueueWriteBuffer(
+ cl->cq,
+ cl->mem_fft_in,
+ CL_FALSE,
+ 0, 2 * sizeof(cl_float) * len, samples,
+ 0, NULL, NULL
+ );
+ CL_ERR_CHECK(err, "Unable to copy data to FFT input buffer");
+
+ /* Execute FFT kernel */
+ global[0] = FOSPHOR_FFT_LEN / 8;
+ global[1] = n_spectra;
+
+ local[0] = global[0];
+ local[1] = 1;
+
+ err = clEnqueueNDRangeKernel(cl->cq, cl->kern_fft, 2, NULL, global, local, 0, NULL, NULL);
+ CL_ERR_CHECK(err, "Unable to queue FFT kernel execution");
+
+ /* GL Objects */
+ objs[0] = cl->mem_waterfall;
+ objs[2] = cl->mem_histogram;
+ objs[1] = cl->mem_spectrum;
+
+ /* Capture all GL objects */
+ err = clEnqueueAcquireGLObjects(cl->cq, 3, objs, 0, NULL, NULL);
+ CL_ERR_CHECK(err, "Unable to acquire GL objects");
+
+ /* Configure display kernel */
+ err = clSetKernelArg(cl->kern_display, 4, sizeof(cl_int), &cl->waterfall_pos);
+ err |= clSetKernelArg(cl->kern_display, 9, sizeof(cl_float), &cl->histo_scale);
+ err |= clSetKernelArg(cl->kern_display, 10, sizeof(cl_float), &cl->histo_offset);
+ CL_ERR_CHECK(err, "Unable to configure display kernel");
+
+ /* Execute display kernel */
+ global[0] = FOSPHOR_FFT_LEN;
+ global[1] = 16;
+ local[0] = 16;
+ local[1] = 16;
+
+ err = clEnqueueNDRangeKernel(cl->cq, cl->kern_display, 2, NULL, global, local, 0, NULL, NULL);
+ CL_ERR_CHECK(err, "Unable to queue display kernel execution");
+
+ /* Release all GL objects */
+ err = clEnqueueReleaseGLObjects(cl->cq, 3, objs, 0, NULL, NULL);
+ CL_ERR_CHECK(err, "Unable to release GL objects");
+
+ /* Ensure CL is done */
+ clFinish(cl->cq);
+
+ /* Advance waterfall */
+ cl->waterfall_pos = (cl->waterfall_pos + n_spectra) & 1023;
+
+ return 0;
+
+error:
+ return -EIO;
+}
+
+int
+fosphor_cl_get_waterfall_position(struct fosphor_cl_state *cl)
+{
+ return cl->waterfall_pos;
+}
+
+void
+fosphor_cl_set_histogram_range(struct fosphor_cl_state *cl,
+ float scale, float offset)
+{
+ cl->histo_scale = scale * 128.0f;
+ cl->histo_offset = offset;
+}
+
+/*! @} */