-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinv-test1.py
125 lines (94 loc) · 3.06 KB
/
inv-test1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#%%
import math
import time
import numba
from numba import cuda
import ZMCIntegral
import linearalgcuda as la
import numpy as np
N = 2
#I = np.eye(N, dtype=np.complex64)
#B = numba.cuda.to_device(I)
#%%
# user defined function
@numba.cuda.jit(device=True)
def my_func(x):
# Declare empty arrays to be filled with the values passed into the function
# and the inverse of A into B
A = numba.cuda.shared.array((N,N), dtype=numba.types.complex64)
B = numba.cuda.shared.array((N,N), dtype=numba.types.complex64)
#tmp = numba.cuda.shared.array((N,N), dtype=numba.types.complex64)
# Assign the values in the array
A[0, 0] = 1 / complex(x[0], x[1])
A[0, 1] = complex(0, 0)
A[1, 0] = complex(0, 0)
A[1, 1] = 1 / complex(x[2], x[3])
for i in range(N):
for j in range(N):
if i == j:
B[i,j] = complex(1,0)
else:
B[i,j] = complex(0,0)
# B = la.myInvSZ(A, B, N)
for k in range(N-1):
if (A[k,k].real != 1) or (A[k,k].imag != 0):
scale = A[k,k]
for j in range(N):
B[k,j] = B[k,j] / scale
A[k,j] = A[k,j] / scale
for i in range(k+1, N):
if (A[i,k].real != 0) or (A[i,k].imag != 0):
ratio = A[i,k]
for j in range(N):
A[i,j] = A[i,j] - ratio * A[k,j]
B[i,j] = B[i,j] - ratio * B[k,j]
if (A[N-1,N-1].real != 1) or (A[N-1,N-1].imag != 0):
for j in range(N):
B[N-1,j] = B[N-1,j] / A[N-1,N-1]
A[N-1,N-1] = complex(1,0)
# # ELIMINATE UPPER TRIANGLE
for k in range(1, N):
for i in range(k):
ratio = A[i,k]
for j in range(N):
A[i,j] = A[i,j] - ratio * A[k,j]
B[i,j] = B[i,j] - ratio * B[k,j]
tr = 0 + 0j
tr = la.trace(B, N, tr)
return (tr * tr.conjugate()).real
#%%
@numba.cuda.jit()
def kernelfunc(points, results, samples):
tid = cuda.threadIdx.x
blkid = cuda.blockIdx.x
blkdim = cuda.blockDim.x
i = tid + blkid * blkdim
if(i < samples):
results[i] = my_func(points[i])
#%%
samples = 10000000
a1 = 1; b1 = 2
x = (b1 - a1) * np.random.random_sample((samples,1)) + a1
a2 = 2; b2 = 3
y = (b2 - a2) * np.random.random_sample((samples,1)) + a2
a3 = 3; b3 = 4
z = (b3 - a3) * np.random.random_sample((samples,1)) + a3
a4 = 4; b4 = 5
q = (b4 - a4) * np.random.random_sample((samples,1)) + a4
matrix = np.concatenate([x, y, z, q], axis=1)
results = np.zeros(samples)
threadsperblock = 32
blockspergrid = (samples + (threadsperblock - 1)) // threadsperblock
tic = time.time()
kernelfunc[blockspergrid, threadsperblock](matrix, results, samples)
norm = (b1-a1) * (b2-a2) * (b3-a3) * (b4-a4)
Q_N = norm / samples * np.sum(results)
Q_err = np.var(results) / samples
toc = time.time()
#%%
# print the formatted result
print('result = {:.5E}, error?= {:.3E}'.format(Q_N, Q_err))
print('Time to calculate: %5.4f s' % (toc-tic))
#%%
result = 7.43323E+01, error?= 4.939E-06
Time to calculate: 1.1564 s