-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparallel_autotuning.py
113 lines (87 loc) · 3.44 KB
/
parallel_autotuning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from charm4py import charm, Chare, Array, Reducer, Future
import pyopencl as cl
import numpy as np
import grudge.loopy_dg_kernels as dgk
#from grudge.execution import diff_prg, elwise_linear
class AutotuneTask(Chare):
def __init__(self, platform_id, params):
self.platform_id = platform_id
self.params = params
def get_queue(self):
platform = cl.get_platforms()
gpu_devices = platform[self.platform_id].get_devices(device_type=cl.device_type.GPU)
n_gpus = len(gpu_devices)
ctx = cl.Context(devices=[gpu_devices[charm.myPe() % n_gpus]])
profiling = cl.command_queue_properties.PROFILING_ENABLE
queue = cl.CommandQueue(ctx, properties=profiling)
return queue
def run(self):
print([self.params, np.random.rand])
class Test(Chare):
def start(self):
print('I am element', self.thisIndex, 'on PE', charm.myPe(),
'sending a msg to element 1')
self.thisProxy[1].sayHi()
#@coro
def sayHi(self, future):
rn = np.random.rand()
print('Hello from element', self.thisIndex, 'on PE', charm.myPe(), 'random', rn)
self.reduce(future, rn, Reducer.max)
def get_queue(pe_num, platform_num=0):
platforms = cl.get_platforms()
gpu_devices = platforms[platform_num].get_devices(device_type=cl.device_type.GPU)
ctx = cl.Context(devices=[gpu_devices[pe_num % len(gpu_devices)]])
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
return queue
#return gpu_devices[pe_num % len(gpu_devices)].int_ptr
def do_work(args):
params = args[0]
knl = args[1]
queue = get_queue(charm.myPe())
print("PE: ", charm.myPe())
avg_time, transform_list = dgk.run_tests.apply_transformations_and_run_test(queue, knl, dgk.run_tests.generic_test, params)
return avg_time, params
def square(x):
return x**2
def main(args):
# Create queue, assume all GPUs on the machine are the same
"""
platforms = cl.get_platforms()
platform_id = 0
gpu_devices = platforms[platform_id].get_devices(device_type=cl.device_type.GPU)
n_gpus = len(gpu_devices)
ctx = cl.Context(devices=[gpu_devices[charm.myPe() % n_gpus]])
profiling = cl.command_queue_properties.PROFILING_ENABLE
queue = cl.CommandQueue(ctx, properties=profiling)
assert charm.numPes() > 1
assert charm.numPes() - 1 <= charm.numHosts()*len(gpu_devices)
# Check that it can assign one PE to each GPU
# The first PE is used for scheduling
# Not certain how this will work with multiple nodes
from grudge.execution import diff_prg, elwise_linear_prg
knl = diff_prg(3, 1000000, 10, np.float64)
params = dgk.run_tests.gen_autotune_list(queue, knl)
args = [[param, knl] for param in params]
# May help to balance workload
from random import shuffle
shuffle(args)
#a = Array(AutotuneTask, dims=(len(args)), args=args[0])
#a.get_queue()
result = charm.pool.map(do_work, args)
sort_key = lambda entry: entry[0]
result.sort(key=sort_key)
for r in result:
print(r)
#knl = diff_prg(3, 100000, 56, np.float64)
#autotune_list = gen_autotune_list(queue, knl)
#print(autotune_list)
"""
print(charm.numHosts(), charm.numPes())
f = Future()
#a = Array(Test, a.numPes())
#a.sayHi(f)
#result = f.get()
#print(result)
print("All finished")
charm.exit()
charm.start(main)