-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompute.py
More file actions
397 lines (378 loc) · 14.5 KB
/
compute.py
File metadata and controls
397 lines (378 loc) · 14.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
from numba import cuda
import numpy as np
import cupy as cp
import random as rd
import math
import datetime
def init(settings):
#Validation
if not cuda.is_available():
print("No CUDA device found.")
exit()
#######################################
# Constants
#######################################
global POPULATION, SPEED, WRAP_AROUND, SPOTLIGHT
global COHESION, ALIGNMENT, SEPARATION, NEIGHBOR_DIST, NEIGHBOR_DIST_SQUARED, SEPARATION_DIST_SQUARED
global WIDTH, HEIGHT, HALF_WIDTH, HALF_HEIGHT, SPEED, PARALLEL, GRID_CELL_SIZE, GRID_WIDTH, GRID_HEIGHT, GRID_SIZE
POPULATION = settings.POPULATION
SPEED = settings.SPEED
COHESION = settings.COHESION
ALIGNMENT = settings.ALIGNMENT
SEPARATION = settings.SEPARATION
NEIGHBOR_DIST = settings.NEIGHBOR_DIST
NEIGHBOR_DIST_SQUARED = NEIGHBOR_DIST ** 2 # Useful for evaluating distances
SEPARATION_DIST_SQUARED = (settings.NEIGHBOR_DIST * settings.SEPARATION_DIST) ** 2
WIDTH = settings.WIDTH
HEIGHT = settings.HEIGHT
HALF_WIDTH = WIDTH/2
HALF_HEIGHT = HEIGHT/2
GRID_CELL_SIZE = settings.NEIGHBOR_DIST
GRID_WIDTH = WIDTH//GRID_CELL_SIZE
GRID_HEIGHT = HEIGHT//GRID_CELL_SIZE
GRID_SIZE = GRID_WIDTH*GRID_HEIGHT
WRAP_AROUND = settings.WRAP_AROUND
SPOTLIGHT = settings.SPOTLIGHT
#######################################
# Arrays
#######################################
global renderData, boidData, lookUpTable, cellIndexTable, boidPositionTable
renderData = np.zeros((POPULATION, 3), dtype=np.float32)
tempBoidData = np.zeros((POPULATION, 32), dtype=np.float32)
for i in range(POPULATION):
tempBoidData[i,0] = 0.0
tempBoidData[i,1] = rd.uniform(0, WIDTH)
tempBoidData[i,2] = rd.uniform(0, HEIGHT)
tempBoidData[i,3] = rd.random()*2-1
tempBoidData[i,4] = rd.random()*2-1
boidData = cuda.to_device(tempBoidData)
# Tables
# Boid Table - List of every boid and their current cell, sorted by cell
tempBoidTable = np.zeros((POPULATION,2), dtype=np.int32)
for index, cell in enumerate(tempBoidTable):
cell[0] = index
# Sends array to GPU with CuPy instead of numba, to use CuPy methods
boidPositionTable = cp.asarray(tempBoidTable)
# Tables
# Cell Index Table (gives start index of cell on sorted Boid Table)
cellIndexTable = np.zeros(GRID_SIZE, dtype=np.int32)
# Tables
# Look-Up table (for neighbor cells - read only)
tempLookUpTable = np.zeros((GRID_SIZE,9), dtype=np.int32)
cellOffset = {-1, 0, 1}
for index, cell in enumerate(tempLookUpTable):
x = index % GRID_WIDTH
y = index //GRID_WIDTH
col = 0
for i in cellOffset:
for j in cellOffset:
cell[col] = (x + i)%GRID_WIDTH + (y + j)%GRID_HEIGHT * GRID_WIDTH
col += 1
lookUpTable = cuda.to_device(tempLookUpTable)
#######################################
# Update
#######################################
def update(params):
updateParams(params)
start_time = datetime.datetime.now()
global renderData, boidData, lookUpTable, boidPositionTable, cellIndexTable
# Kernels are set to default stream and are executed sequentially
# 512 threads per block, as many blocks as we need
nthreads = 512
nblocks = (POPULATION + (nthreads - 1)) // nthreads
fillBoidPositionTable[nblocks, nthreads](boidData, boidPositionTable, renderData)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
print("positions:",time_diff.total_seconds() * 1000)
start_time = datetime.datetime.now()
# CuPy sort, a bit faster than on CPU
boidPositionTable = boidPositionTable[cp.argsort(boidPositionTable[:,1])]
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
print("sort:",time_diff.total_seconds() * 1000)
start_time = datetime.datetime.now()
# New index table according to data from sorted boid table
cellIndexTable.fill(-1)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
print("prefill:",time_diff.total_seconds() * 1000)
start_time = datetime.datetime.now()
fillCellIndexTable[nblocks, nthreads](boidPositionTable, cellIndexTable)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
print("index:",time_diff.total_seconds() * 1000)
start_time = datetime.datetime.now()
# 2D kernel. X axis is boids, Y axis is each of their 9 respective neighbor cells
neighborSearch[(nblocks,9), (nthreads,1)](boidData, lookUpTable, cellIndexTable, boidPositionTable, renderData, SPOTLIGHT, WRAP_AROUND, COHESION, ALIGNMENT, SEPARATION, SEPARATION_DIST_SQUARED)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
print("read:",time_diff.total_seconds() * 1000)
start_time = datetime.datetime.now()
# Update positions and write to render buffer
writeData[nblocks, nthreads](boidData, renderData, WRAP_AROUND, SPEED)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
print("write:",time_diff.total_seconds() * 1000)
# Update parameters with GUI input
def updateParams(params):
global POPULATION, SPEED, WRAP_AROUND, SPOTLIGHT
global COHESION, ALIGNMENT, SEPARATION, NEIGHBOR_DIST, NEIGHBOR_DIST_SQUARED, SEPARATION_DIST_SQUARED
POPULATION = params.POPULATION
SPEED = params.SPEED
COHESION = params.COHESION
ALIGNMENT = params.ALIGNMENT
SEPARATION = params.SEPARATION
SEPARATION_DIST_SQUARED = (params.NEIGHBOR_DIST * params.SEPARATION_DIST) ** 2
WRAP_AROUND = params.WRAP_AROUND
SPOTLIGHT = params.SPOTLIGHT
#######################################
# Kernels
#######################################
# Update Boid Table with new cell for every agent
@cuda.jit
def fillBoidPositionTable(boidData, boidPositionTable, renderData):
index = cuda.grid(1)
if index >= POPULATION:
return
x = int(boidData[index,1]//GRID_CELL_SIZE) % GRID_WIDTH
y = int(boidData[index,2]//GRID_CELL_SIZE) % GRID_HEIGHT
gridIndex = x + y * GRID_WIDTH
boidPositionTable[index,0] = index
boidPositionTable[index,1] = gridIndex
renderData[index, 2] = 0.0
# Update Index Table to give start index for every cell on boidPositionTable
@cuda.jit
def fillCellIndexTable(boidPositionTable, cellIndexTable):
index = cuda.grid(1)
if index >= POPULATION:
return
gridIndex = boidPositionTable[index,1]
if index > 0:
previousGridIndex = boidPositionTable[index-1,1]
if previousGridIndex == gridIndex:
return
cellIndexTable[gridIndex] = index
# For a certain agent and a certain cell in the agent's neighboring cells
# Get neighbor data and compute new direction vector with weight
# boidData -> boidData
@cuda.jit
def neighborSearch(boidData, lookUpTable, cellIndexTable, boidPositionTable, renderData, SPOTLIGHT, WRAP_AROUND, COHESION, ALIGNMENT, SEPARATION, SEPARATION_DIST_SQUARED):
lookUpIndex = cuda.blockIdx.y
index = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
if index >= POPULATION:
return
# Current position
x, y = boidData[index,1], boidData[index,2]
dx, dy = boidData[index,3],boidData[index,4]
# Boid direction
resX, resY = 0.0, 0.0
numNeighbors = 0
alignmentX, alignmentY = 0.0, 0.0
cohesionX, cohesionY = 0.0, 0.0
separationX, separationY = 0.0, 0.0
numSeparation = 0
#Check if close to edge (for distanche check)
if WRAP_AROUND and (x < NEIGHBOR_DIST or y < NEIGHBOR_DIST or WIDTH - x < NEIGHBOR_DIST or HEIGHT - y < NEIGHBOR_DIST):
onEdge = True
else:
onEdge = False
# Get cell neighbors
gridIndex = int(x//GRID_CELL_SIZE)%GRID_WIDTH + int(y//GRID_CELL_SIZE)%GRID_HEIGHT * GRID_WIDTH
cell = lookUpTable[gridIndex, lookUpIndex]
startIndex = cellIndexTable[cell]
if startIndex != -1: # True if there's at least one agent on the current cell
# Condition to check if there are more agents to observe on the cell
cond = cell
while cond == cell and startIndex < POPULATION:# and numNeighbors < 20:
agent = boidPositionTable[startIndex,0]
startIndex += 1
# Check if at the end of table
if startIndex >= POPULATION:
cond = -1
# Or set condition to check if all agents on the cell have been observed
else:
cond = boidPositionTable[startIndex,1]
# Get agent data and check if it's a neighbor
if agent != index:
ax = boidData[agent,1]
ay = boidData[agent,2]
adx = boidData[agent,3]
ady = boidData[agent,4]
if onEdge:
(ax, ay) = minimalToroidalDistance(x,y,ax,ay)
distX = x - ax
distY = y - ay
distLengthSquared = distX**2 + distY**2
isTooClose = (distLengthSquared < SEPARATION_DIST_SQUARED)
isNeighbor = (distLengthSquared < NEIGHBOR_DIST_SQUARED) and isInFOV(dx,dy,distX,distY,distLengthSquared)
# SPOTLIGHT
if SPOTLIGHT and index == 0:
# Agent spotlight
renderData[index, 2] = 1.0
if isTooClose:
renderData[agent, 2] = 2.0
elif isNeighbor:
renderData[agent, 2] = 3.0
else:
renderData[agent, 2] = 4.0
# Collect agent data
if isNeighbor:
numNeighbors += 1
alignmentX += adx
alignmentY += ady
cohesionX += ax
cohesionY += ay
if isTooClose:
# distX /= distLengthSquared
# distY /= distLengthSquared
separationX += distX
separationY += distY
numSeparation += 1
if numNeighbors > 0:
# Apply boid rules
# Alignment
# Normalize alignment vector
l = math.sqrt(alignmentX**2 + alignmentY**2)
alignmentX, alignmentY = alignmentX / l, alignmentY / l
# Add alignment vector to direction
resX += alignmentX * ALIGNMENT
resY += alignmentY * ALIGNMENT
# Cohesion
# Get vector towards center of mass
cohesionX /= numNeighbors
cohesionY /= numNeighbors
cohesionX -= x
cohesionY -= y
# Normalize cohesion vector
l = math.sqrt(cohesionX**2 + cohesionY**2)
cohesionX, cohesionY = cohesionX / l, cohesionY / l
# Add cohesion vector to direction
resX += cohesionX * COHESION
resY += cohesionY * COHESION
# Separation
if numSeparation > 0:
# Normalize separation vector
l = math.sqrt(separationX**2 + separationY**2)
separationX, separationY = separationX / l, separationY / l
# Add separation vector to direction
resX += separationX * SEPARATION
resY += separationY * SEPARATION
# Write to direction buffer at respective boid and cell index
boidData[index, lookUpIndex*3+5] = resX
boidData[index, lookUpIndex*3+6] = resY
boidData[index, lookUpIndex*3+7] = numNeighbors
# Reads direction vectors and weights for all neighbor cell calculations
# Gets final direction and position and writes to memory
# boidData -> renderData
@cuda.jit
def writeData(boidData, renderData, WRAP_AROUND, SPEED):
index = cuda.grid(1)
if index >= POPULATION:
return
x, y = boidData[index,1], boidData[index,2]
# Previous direction
pdx, pdy = boidData[index,3], boidData[index,4]
dx = 0.0
dy = 0.0
total = 0
for i in range(9):
weight = boidData[index, i*3+7]
total += weight
dx += boidData[index, i*3+5] * weight
dy += boidData[index, i*3+6] * weight
if total > 0:
dx /= total
dy /= total
# If Edge Wrapping is off, avoid walls
MARGIN = 200
if not WRAP_AROUND:
wallX, wallY = 0.0, 0.0
x2 = WIDTH - x
y2 = HEIGHT - y
if x < MARGIN:
wallX = MARGIN - x
wallX /= MARGIN
wallX = wallX ** 2
elif x2 < MARGIN:
wallX = MARGIN - x2
wallX /= MARGIN
wallX = wallX ** 2
wallX *= -1
if y < MARGIN:
wallY = MARGIN - y
wallY /= MARGIN
wallY = wallY ** 2
elif y2 < MARGIN:
wallY = MARGIN - y2
wallY /= MARGIN
wallY = wallY ** 2
wallY *= -1
dx += wallX / 10
dy += wallY / 10
# Update directions
dx = pdx + dx * 3 # Weight w, ratio of 1:w with old/new direction
dy = pdy + dy * 3
# Clamp speed
l = math.sqrt(dx**2 + dy**2)
if l > 1.0:
dx, dy = dx / l, dy / l
# Move position
x += dx * SPEED
y += dy * SPEED
# Edge wrap
if WRAP_AROUND:
if x >= WIDTH:
x = 0
elif x < 0 :
x = WIDTH
if y >= HEIGHT:
y = 0
elif y < 0 :
y = HEIGHT
# Render coordinates
rx = (x - HALF_WIDTH) / HALF_WIDTH
ry = (y - HALF_HEIGHT) / HALF_HEIGHT
# WRITE TO MEMORY
# Write screen position in [-1,1]
renderData[index,0] = rx
renderData[index,1] = ry
# renderData[index,2] = ry
# Store data for next calculation
boidData[index,1] = x
boidData[index,2] = y
boidData[index,3] = dx
boidData[index,4] = dy
# If agent is near an edge and WrapAround is set to True
# Checks if other agent is a neighbor for all neighboring cell configurations (9)
# Looks for minimal toroidal distance and returns perceived position
# Given what is provided by Numba we have to get the minimum manually
@cuda.jit(device=True)
def minimalToroidalDistance(x,y,ax,ay):
# X axis
ax1 = ax - WIDTH
ax2 = ax + WIDTH
mix = abs(x - ax)
mix1 = abs(x - ax1)
mix2 = abs(x - ax2)
if mix1 < mix:
mix = mix1
ax = ax1
if mix2 < mix:
mix = mix2
ax = ax2
# Y axis
ay1 = ay - HEIGHT
ay2 = ay + HEIGHT
miy = abs(y - ay)
miy1 = abs(y - ay1)
miy2 = abs(y - ay2)
if miy1 < miy:
miy = miy1
ay = ay1
if miy2 < miy:
miy = miy2
ay = ay2
return (ax,ay)
@cuda.jit(device=True)
def isInFOV(dx,dy,adx,ady,neighborVector):
return ( (dx * adx + dy * ady) / (math.sqrt(dx**2 + dy**2) * math.sqrt(neighborVector)) ) < 0.8