diff options
author | Sylvain Munaut <tnt@246tNt.com> | 2013-10-18 11:43:06 +0200 |
---|---|---|
committer | Sylvain Munaut <tnt@246tNt.com> | 2013-10-18 11:43:06 +0200 |
commit | 20fd7fac53d54607be9a5ea528689d84e0c6f0aa (patch) | |
tree | fee81720c62e00a5d403169443e202110de875c6 /lib/fosphor | |
parent | 72f71d5f0001a55373b48077754dfcd36134a521 (diff) |
fosphor/cl: Allow the use of cl_khr_local_int32_base_atomics extension
Apparently there are some cards that are neither Nvidia SM1.1, nor
OpenCL 1.1 but that do have cl_khr_local_int32_base_atomics extension.
This is enough for fosphor to implement the histogram so add the
required code to test for it and enable its use in the kernel if
required.
Signed-off-by: Sylvain Munaut <tnt@246tNt.com>
Diffstat (limited to 'lib/fosphor')
-rw-r--r-- | lib/fosphor/cl.c | 18 | ||||
-rw-r--r-- | lib/fosphor/display.cl | 12 |
2 files changed, 26 insertions, 4 deletions
diff --git a/lib/fosphor/cl.c b/lib/fosphor/cl.c index e7a2629..1769166 100644 --- a/lib/fosphor/cl.c +++ b/lib/fosphor/cl.c @@ -55,6 +55,7 @@ struct fosphor_cl_features #define FLG_CL_GL_SHARING (1<<0) #define FLG_CL_NVIDIA_SM11 (1<<1) #define FLG_CL_OPENCL_11 (1<<2) +#define FLG_CL_LOCAL_ATOMIC_EXT (1<<3) cl_device_type type; int local_mem; @@ -147,6 +148,10 @@ cl_device_query(cl_device_id dev_id, struct fosphor_cl_features *feat) /* Check for NV attributes */ has_nv_attr = !!strstr(txt, "cl_nv_device_attribute_query"); + /* Check for cl_khr_local_int32_base_atomics extension */ + if (strstr(txt, "cl_khr_local_int32_base_atomics")) + feat->flags |= FLG_CL_LOCAL_ATOMIC_EXT; + /* Check OpenCL 1.1 compat */ err = clGetDeviceInfo(dev_id, CL_DEVICE_VERSION, sizeof(txt)-1, txt, NULL); if (err != CL_SUCCESS) @@ -175,11 +180,12 @@ cl_device_query(cl_device_id dev_id, struct fosphor_cl_features *feat) feat->flags |= FLG_CL_NVIDIA_SM11; } #ifdef __APPLE__ - else if (!(feat->flags & FLG_CL_OPENCL_11)) + else if (!(feat->flags & (FLG_CL_OPENCL_11 | FLG_CL_LOCAL_ATOMIC_EXT))) { /* * OSX doesn't allow query of NV attributes even on NVidia * cards so we just assume any non-opencl 1.1 nvidia card + * without cl_khr_local_int32_base_atomics extension * that does OpenCL is a SM1.1 one */ err = clGetDeviceInfo(dev_id, CL_DEVICE_VENDOR, sizeof(txt)-1, txt, NULL); @@ -210,7 +216,7 @@ cl_device_score(cl_device_id dev_id, struct fosphor_cl_features *feat) if (!(feat->flags & FLG_CL_GL_SHARING)) return -1; - if (!(feat->flags & (FLG_CL_NVIDIA_SM11 | FLG_CL_OPENCL_11))) + if (!(feat->flags & (FLG_CL_NVIDIA_SM11 | FLG_CL_OPENCL_11 | FLG_CL_LOCAL_ATOMIC_EXT))) return -1; /* Prefer GPU */ @@ -421,7 +427,13 @@ cl_do_init(struct fosphor_cl_state *cl, struct fosphor_gl_state *gl) CL_ERR_CHECK(err, "Unable to share spectrum VBO into OpenCL context"); /* Display program/kernel */ - disp_opts = (cl->feat.flags & FLG_CL_NVIDIA_SM11) ? "-DUSE_NV_SM11_ATOMICS" : NULL; + if (cl->feat.flags & FLG_CL_NVIDIA_SM11) + disp_opts = "-DUSE_NV_SM11_ATOMICS"; + else if (!(cl->feat.flags & FLG_CL_OPENCL_11)) + disp_opts = "-DUSE_EXT_ATOMICS"; + else + disp_opts = NULL; + cl->prog_display = cl_load_program(cl->dev_id, cl->ctx, "display.cl", disp_opts); if (!cl->prog_display) goto error; diff --git a/lib/fosphor/display.cl b/lib/fosphor/display.cl index 8a1104b..b952d95 100644 --- a/lib/fosphor/display.cl +++ b/lib/fosphor/display.cl @@ -27,6 +27,14 @@ /* Enable or not use of NV SM11 histogram algo (set automatically) */ /* #define USE_NV_SM11_ATOMICS */ +/* Enable or not the use of cl_khr_local_int32_base_atomics to + * implement atomic add (set automatically) */ +/* #define USE_EXT_ATOMICS */ + +#ifdef USE_EXT_ATOMICS +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#endif + #define CLAMP //#define MAX_HOLD_LIVE @@ -160,8 +168,10 @@ __kernel void display( #endif /* Atomic Bin increment */ -#ifdef USE_NV_SM11_ATOMICS +#if defined(USE_NV_SM11_ATOMICS) nv_sm11_atomic_inc(&histo_buf[(bin << 4) + get_local_id(1)], tag); +#elif defined(USE_EXT_ATOMICS) + atom_inc(&histo_buf[(bin << 4) + get_local_id(0)]); #else atomic_inc(&histo_buf[(bin << 4) + get_local_id(0)]); #endif |