0 N Tidk = threadIdx.x Tidk' = Tidk + blockDim.x Bl2k+2 Bl2k+3 Pid' Tid1 Tid2 Tid'2 Bl2k Bl2k+1 Shared: 0 blockDim.x 2*blockDim.x blockDim.x Read Write Read Write