11import CUDAnative
22
3- function maxpool2d_kernel (state, A:: AbstractArray{T} , out, Asize, pool, stride_ , outSize) where T
3+ function maxpool2d_kernel (state, A:: AbstractArray{T} , out, Asize, pool, stride , outSize) where T
44 ilin = linear_index (state)
55 idx = GPUArrays. gpu_ind2sub (Asize, ilin)
66 if (idx[1 ] > outSize[1 ] || idx[2 ] > outSize[2 ] || idx[3 ] > outSize[3 ] || idx[4 ] > outSize[4 ])
77 return
88 end
99
10- temp_max = A[((idx[1 ] - 1 ) * stride_ ) + Asize[1 ] * (idx[2 ] - 1 ) * stride_ + (Asize[1 ] * Asize[2 ]) * (idx[3 ] - 1 ) + (Asize[1 ] * Asize[2 ] * Asize[3 ]) * (idx[4 ] - 1 ) + 1 ]
11- max_pos = ((idx[1 ] - 1 ) * stride_ ) + Asize[1 ] * (idx[2 ] - 1 ) * stride_ + (Asize[1 ] * Asize[2 ]) * (idx[3 ] - 1 ) + (Asize[1 ] * Asize[2 ] * Asize[3 ]) * (idx[4 ] - 1 ) + 1
12- curr_pos = ((idx[1 ] - 1 ) * stride_ ) + Asize[1 ] * (idx[2 ] - 1 ) * stride_ + (Asize[1 ] * Asize[2 ]) * (idx[3 ] - 1 ) + (Asize[1 ] * Asize[2 ] * Asize[3 ]) * (idx[4 ] - 1 ) + 1
10+ temp_max = A[((idx[1 ] - 1 ) * stride ) + Asize[1 ] * (idx[2 ] - 1 ) * stride + (Asize[1 ] * Asize[2 ]) * (idx[3 ] - 1 ) + (Asize[1 ] * Asize[2 ] * Asize[3 ]) * (idx[4 ] - 1 ) + 1 ]
11+ max_pos = ((idx[1 ] - 1 ) * stride ) + Asize[1 ] * (idx[2 ] - 1 ) * stride + (Asize[1 ] * Asize[2 ]) * (idx[3 ] - 1 ) + (Asize[1 ] * Asize[2 ] * Asize[3 ]) * (idx[4 ] - 1 ) + 1
12+ curr_pos = ((idx[1 ] - 1 ) * stride ) + Asize[1 ] * (idx[2 ] - 1 ) * stride + (Asize[1 ] * Asize[2 ]) * (idx[3 ] - 1 ) + (Asize[1 ] * Asize[2 ] * Asize[3 ]) * (idx[4 ] - 1 ) + 1
1313
1414 for p in 1 : pool
1515 for p in 1 : pool
@@ -27,14 +27,16 @@ function maxpool2d_kernel(state, A::AbstractArray{T}, out, Asize, pool, stride_,
2727end
2828
2929
30- function maxpool2d (a, pool; stride_ = 1 )
31- Asize = UInt32 .(size (a))
30+ function maxpool2d (a, pool; stride = 1 , pad = 0 )
31+ b = zeros (typeof (a), size (a,1 ) + pad * 2 , size (a,2 ) + pad * 2 , size (a,3 ), size (a,4 ))
32+ b[pad + 1 : pad + size (a,1 ), pad + 1 : pad + size (a,2 ), :, :] = a
33+ Asize = UInt32 .(size (b))
3234 pool = UInt32 (pool)
33- stride_ = UInt32 (stride_ )
34- out = similar (a )
35- out = out[1 : (div (Asize[1 ] - pool, stride_ ) + 1 ), 1 : (div (Asize[2 ] - pool, stride_ ) + 1 ), :, :]
35+ stride = UInt32 (stride )
36+ out = similar (b )
37+ out = out[1 : (div (Asize[1 ] - pool, stride ) + 1 ), 1 : (div (Asize[2 ] - pool, stride ) + 1 ), :, :]
3638 outSize = UInt32 .(size (out))
37- gpu_call (maxpool2d_kernel, a , (a , out, Asize, pool, stride_ , outSize))
39+ gpu_call (maxpool2d_kernel, b , (b , out, Asize, pool, stride , outSize))
3840 GPUArrays. synchronize (out)
3941 out
4042end
0 commit comments