Skip to content

Conversation

@maleadt
Copy link
Member

@maleadt maleadt commented Jul 9, 2025

Fixes #294, making it impossible to clcall kernels with the wrong memory type.
This is slightly breaking, as you now have to correctly specify the pointer type when passing something to clcall.

@maleadt maleadt marked this pull request as draft July 9, 2025 13:37
@github-actions
Copy link
Contributor

github-actions bot commented Jul 9, 2025

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.
diff --git a/lib/cl/kernel.jl b/lib/cl/kernel.jl
index c899481..0b4fed7 100644
--- a/lib/cl/kernel.jl
+++ b/lib/cl/kernel.jl
@@ -55,11 +55,11 @@ function Base.getproperty(k::Kernel, s::Symbol)
     elseif s == :context
         result = Ref{cl_context}()
         clGetKernelInfo(k, CL_KERNEL_CONTEXT, sizeof(cl_context), result, C_NULL)
-        return Context(result[], retain=true)
+        return Context(result[], retain = true)
     elseif s == :program
         result = Ref{cl_program}()
         clGetKernelInfo(k, CL_KERNEL_PROGRAM, sizeof(cl_program), result, C_NULL)
-        return Program(result[], retain=true)
+        return Program(result[], retain = true)
     elseif s == :attributes
         size = Ref{Csize_t}()
         err = unchecked_clGetKernelInfo(k, CL_KERNEL_ATTRIBUTES, 0, C_NULL, size)
@@ -91,7 +91,7 @@ function Base.getproperty(ki::KernelWorkGroupInfo, s::Symbol)
         return result[]
     end
 
-    if s == :size
+    return if s == :size
         Int(get(CL_KERNEL_WORK_GROUP_SIZE, Csize_t))
     elseif s == :compile_size
         Int.(get(CL_KERNEL_COMPILE_WORK_GROUP_SIZE, NTuple{3, Csize_t}))
@@ -167,8 +167,8 @@ function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing;
     return Event(ret_event[], retain=false)
 end
 
-function enqueue_task(k::Kernel; wait_for=nothing)
-    n_evts  = 0
+function enqueue_task(k::Kernel; wait_for = nothing)
+    n_evts = 0
     evt_ids = C_NULL
     #TODO: this should be split out into its own function
     if wait_for !== nothing
@@ -263,17 +263,17 @@ function _to_tuple_type(t)
     else
         error("expected tuple type")
     end
-    t
+    return t
 end
 
-clcall(f::F, types::Tuple, args::Vararg{Any,N}; kwargs...) where {N,F} =
+clcall(f::F, types::Tuple, args::Vararg{Any, N}; kwargs...) where {N, F} =
     clcall(f, _to_tuple_type(types), args...; kwargs...)
 
-function clcall(k::Kernel, types::Type{T}, args::Vararg{Any,N}; kwargs...) where {T,N}
-    call_closure = function (converted_args::Vararg{Any,N})
-        call(k, converted_args...; kwargs...)
+function clcall(k::Kernel, types::Type{T}, args::Vararg{Any, N}; kwargs...) where {T, N}
+    call_closure = function (converted_args::Vararg{Any, N})
+        return call(k, converted_args...; kwargs...)
     end
-    convert_arguments(call_closure, types, args...)
+    return convert_arguments(call_closure, types, args...)
 end
 
 
@@ -317,15 +317,17 @@ function set_arg!(k::Kernel, idx::Integer, arg::T) where {T}
     tsize = sizeof(ref)
     err = unchecked_clSetKernelArg(k, idx - 1, tsize, ref)
     if err == CL_INVALID_ARG_SIZE
-        error("""Mismatch between Julia and OpenCL type for kernel argument $idx.
-
-                 Possible reasons:
-                 - OpenCL does not support empty types.
-                 - Vectors of length 3 (e.g., `float3`) are packed as 4-element vectors;
-                   consider padding your tuples.
-                 - The alignment of fields in your struct may not match the OpenCL layout.
-                   Make sure your Julia definition matches the OpenCL layout, e.g., by
-                   using `__attribute__((packed))` in your OpenCL struct definition.""")
+        error(
+            """Mismatch between Julia and OpenCL type for kernel argument $idx.
+
+            Possible reasons:
+            - OpenCL does not support empty types.
+            - Vectors of length 3 (e.g., `float3`) are packed as 4-element vectors;
+              consider padding your tuples.
+            - The alignment of fields in your struct may not match the OpenCL layout.
+              Make sure your Julia definition matches the OpenCL layout, e.g., by
+              using `__attribute__((packed))` in your OpenCL struct definition."""
+        )
     elseif err != CL_SUCCESS
         throw(CLError(err))
     end
@@ -334,7 +336,7 @@ end
 
 function set_arg!(k::Kernel, idx::Integer, arg::Nothing)
     @assert idx > 0
-    clSetKernelArg(k, idx - 1, sizeof(cl_mem), C_NULL)
+    return clSetKernelArg(k, idx - 1, sizeof(cl_mem), C_NULL)
 end
 
 
@@ -342,15 +344,17 @@ end
 
 # passing pointers directly requires the memory type to be specified
 set_arg!(k::Kernel, idx::Integer, arg::Union{Ptr, CLPtr}) =
-    error("""Cannot pass a pointer to a kernel without specifying the origin memory type.
-             Pass a memory object instead, or use the 4-arg version of `set_arg!` to indicate the memory type.""")
+    error(
+    """Cannot pass a pointer to a kernel without specifying the origin memory type.
+    Pass a memory object instead, or use the 4-arg version of `set_arg!` to indicate the memory type."""
+)
 function set_arg!(k::Kernel, idx::Integer, ptr::Union{Ptr, CLPtr}, typ::Type)
     # XXX: this assumes that the receiving argument is pointer-typed, which is not the case
     #      with Julia's `Ptr` ABI. Instead, one should reinterpret the pointer as a
     #      `Core.LLVMPtr`, which _is_ pointer-valued. We retain this handling for `Ptr` for
     #      users passing pointers to OpenCL C, and because `Ptr` is pointer-valued starting
     #      with Julia 1.12.
-    if typ == SharedVirtualMemory
+    return if typ == SharedVirtualMemory
         clSetKernelArgSVMPointer(k, idx - 1, ptr)
     elseif typ <: UnifiedMemory
         clSetKernelArgMemPointerINTEL(k, idx - 1, ptr)
@@ -367,16 +371,16 @@ end
 unsafe_clconvert(typ::Type{<:Union{Ptr, CLPtr}}, mem::AbstractMemoryObject) = mem
 function set_arg!(k::Kernel, idx::Integer, arg::AbstractMemoryObject)
     arg_boxed = Ref(arg.id)
-    clSetKernelArg(k, idx - 1, sizeof(cl_mem), arg_boxed)
+    return clSetKernelArg(k, idx - 1, sizeof(cl_mem), arg_boxed)
 end
 
 # raw memory: pass as a pointer, keeping track of the memory type
-struct TrackedPtr{T,M}
+struct TrackedPtr{T, M}
     ptr::Union{Ptr{T}, CLPtr{T}}
 end
 unsafe_clconvert(typ::Type{<:Union{Ptr{T}, CLPtr{T}}}, mem::AbstractPointerMemory) where {T} =
-    TrackedPtr{T,typeof(mem)}(Base.unsafe_convert(typ, mem))
-set_arg!(k::Kernel, idx::Integer, arg::TrackedPtr{<:Any,M}) where {M} =
+    TrackedPtr{T, typeof(mem)}(Base.unsafe_convert(typ, mem))
+set_arg!(k::Kernel, idx::Integer, arg::TrackedPtr{<:Any, M}) where {M} =
     set_arg!(k, idx, arg.ptr, M)
 set_arg!(k::Kernel, idx::Integer, arg::AbstractPointerMemory) =
     set_arg!(k, idx, pointer(arg), typeof(arg))
@@ -388,7 +392,7 @@ struct LocalMem{T}
     nbytes::Csize_t
 end
 
-function LocalMem(::Type{T}, len::Integer) where T
+function LocalMem(::Type{T}, len::Integer) where {T}
     @assert len > 0
     nbytes = sizeof(T) * len
     return LocalMem{T}(convert(Csize_t, nbytes))
@@ -403,5 +407,5 @@ Base.length(l::LocalMem{T}) where {T} = Int(l.nbytes ÷ sizeof(T))
 #      the problem is the size being passed to `clSetKernelArg`
 unsafe_clconvert(::Type{CLPtr{T}}, l::LocalMem{T}) where {T} = l
 function set_arg!(k::Kernel, idx::Integer, arg::LocalMem)
-    clSetKernelArg(k, idx - 1, arg.nbytes, C_NULL)
+    return clSetKernelArg(k, idx - 1, arg.nbytes, C_NULL)
 end
diff --git a/lib/cl/libopencl.jl b/lib/cl/libopencl.jl
index d8a47b8..abe6727 100644
--- a/lib/cl/libopencl.jl
+++ b/lib/cl/libopencl.jl
@@ -607,7 +607,8 @@ end
 
 @checked function clSetKernelArgSVMPointer(kernel, arg_index, arg_value)
     @ccall libopencl.clSetKernelArgSVMPointer(kernel::cl_kernel, arg_index::cl_uint,
-                                              arg_value::PtrOrCLPtr{Cvoid})::cl_int
+        arg_value::PtrOrCLPtr{Cvoid}
+    )::cl_int
 end
 
 @checked function clSetKernelExecInfo(kernel, param_name, param_value_size, param_value)
diff --git a/test/kernel.jl b/test/kernel.jl
index e7033c8..27d609d 100644
--- a/test/kernel.jl
+++ b/test/kernel.jl
@@ -97,9 +97,11 @@
         k = cl.Kernel(p, "test")
 
         # dimensions must be the same size
-        @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr;
+        @test_throws ArgumentError clcall(
+            k, Tuple{CLPtr{Float32}}, d_arr;
                                           global_size=(1,), local_size=(1,1))
-        @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr;
+        @test_throws ArgumentError clcall(
+            k, Tuple{CLPtr{Float32}}, d_arr;
                                           global_size=(1,1), local_size=(1,))
 
         # dimensions are bounded

@codecov
Copy link

codecov bot commented Jul 9, 2025

Codecov Report

All modified and coverable lines are covered by tests ✅

Project coverage is 79.07%. Comparing base (25dd395) to head (257fd03).
Report is 1 commits behind head on master.

Additional details and impacted files
@@           Coverage Diff           @@
##           master     #341   +/-   ##
=======================================
  Coverage   79.07%   79.07%           
=======================================
  Files          12       12           
  Lines         669      669           
=======================================
  Hits          529      529           
  Misses        140      140           

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

@maleadt maleadt marked this pull request as ready for review July 9, 2025 18:17
@maleadt maleadt merged commit d8c7167 into master Jul 9, 2025
45 checks passed
@maleadt maleadt deleted the tb/track-args branch July 9, 2025 18:18
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Invalid device buffer to CPU pointer conversions are allowed

2 participants