Skip to content

Commit bb2255e

Browse files
committed
vertical softmax + version bump
1 parent 8f7c251 commit bb2255e

File tree

5 files changed

+180
-22
lines changed

5 files changed

+180
-22
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ plugins {
33
}
44

55
group 'com.github.TannerLow'
6-
version '0.4'
6+
version '0.5'
77
description 'Matrix Library with GPU compatibility.'
88

99
repositories {

src/main/java/com/github/TannerLow/JavaMatrixMath/Matrix.java

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ public Matrix vectorizedReluDerivative() {
122122
return result;
123123
}
124124

125-
public Matrix softmax() {
125+
public Matrix horizontalSoftmax() {
126126
Matrix result = new Matrix(rows, cols);
127127

128128
float[] buffer = new float[rows];
@@ -154,6 +154,37 @@ public Matrix softmax() {
154154
return result;
155155
}
156156

157+
public Matrix verticalSoftmax() {
158+
Matrix result = new Matrix(rows, cols);
159+
160+
float[] buffer = new float[cols];
161+
for(int col = 0; col < cols; col++) {
162+
// calculate the max values
163+
buffer[col] = -Float.MAX_VALUE;
164+
for(int i = 0; i < rows; i++) {
165+
float value = data[col + i * cols];
166+
if(value > buffer[col]) {
167+
buffer[col] = value;
168+
}
169+
}
170+
171+
// calculate the sums
172+
float sum = 0;
173+
float max = buffer[col];
174+
for(int i = 0; i < rows; i++) {
175+
sum += Math.exp(data[col + i * cols] - max);
176+
}
177+
178+
// calculate the softmax vectors
179+
for(int i = 0; i < rows; i++) {
180+
int index = col + i * cols;
181+
result.data[index] = (float) (Math.exp(data[index] - max) / sum);
182+
}
183+
}
184+
185+
return result;
186+
}
187+
157188
// public Matrix fastBatchSoftmaxDerivative(Matrix output) {
158189
// Matrix partialDerivatives = new Matrix(cols, cols);
159190
//
@@ -187,20 +218,25 @@ public static boolean isCompatibleWithGPU(GPU gpu) {
187218
return gpu.isInitialized() &&
188219
gpu.getKernel("Matrices::matrixMultiply") != null &&
189220
gpu.getKernel("Matrices::addRowToRows") != null &&
190-
gpu.getKernel("Matrices::relu") != null;
221+
gpu.getKernel("Matrices::addColToCols") != null &&
222+
gpu.getKernel("Matrices::relu") != null &&
223+
gpu.getKernel("Matrices::horizontalSoftmax") != null &&
224+
gpu.getKernel("Matrices::verticalSoftmax") != null;
191225
}
192226

193227
public Matrix multiply(GPU gpu, Matrix other) {
194228
if(cols != other.rows) {
195-
return null;
229+
final int[] dimensionsA = {rows, cols};
230+
final int[] dimensionsB = {other.rows, other.cols};
231+
throw new DimensionsMismatchException(dimensionsA, dimensionsB);
196232
}
197233

198234
cl_context context = gpu.getContext();
199235
cl_command_queue commandQueue = gpu.getCommandQueue();
200236
cl_kernel kernel = gpu.getKernel("Matrices::matrixMultiply");
201237

202238
if(kernel == null) {
203-
return null;
239+
throw new NullPointerException("Matrices::matrixMultiply not found to be loaded in GPU");
204240
}
205241

206242
Matrix result = new Matrix(rows, other.cols);
@@ -369,7 +405,7 @@ public Matrix relu(GPU gpu) {
369405
cl_kernel kernel = gpu.getKernel("Matrices::relu");
370406

371407
if(kernel == null) {
372-
return null;
408+
throw new NullPointerException("Matrices::relu not found to be loaded in GPU");
373409
}
374410

375411
Matrix result = new Matrix(rows, cols);
@@ -409,13 +445,13 @@ public Matrix relu(GPU gpu) {
409445
return result;
410446
}
411447

412-
public Matrix softmax(GPU gpu) {
448+
public Matrix horizontalSoftmax(GPU gpu) {
413449
cl_context context = gpu.getContext();
414450
cl_command_queue commandQueue = gpu.getCommandQueue();
415-
cl_kernel kernel = gpu.getKernel("Matrices::softmax");
451+
cl_kernel kernel = gpu.getKernel("Matrices::horizontalSoftmax");
416452

417453
if(kernel == null) {
418-
return null;
454+
throw new NullPointerException("Matrices::horizontalSoftmax not found to be loaded in GPU");
419455
}
420456

421457
Matrix result = new Matrix(rows, cols);
@@ -454,4 +490,51 @@ public Matrix softmax(GPU gpu) {
454490

455491
return result;
456492
}
493+
494+
public Matrix verticalSoftmax(GPU gpu) {
495+
cl_context context = gpu.getContext();
496+
cl_command_queue commandQueue = gpu.getCommandQueue();
497+
cl_kernel kernel = gpu.getKernel("Matrices::verticalSoftmax");
498+
499+
if(kernel == null) {
500+
throw new NullPointerException("Matrices::verticalSoftmax not found to be loaded in GPU");
501+
}
502+
503+
Matrix result = new Matrix(rows, cols);
504+
505+
Pointer pointerIn = Pointer.to(data);
506+
Pointer pointerOut = Pointer.to(result.data);
507+
508+
// Allocate the memory objects for the input- and output data
509+
cl_mem memoryIn = clCreateBuffer(context,
510+
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
511+
Sizeof.cl_float * data.length, pointerIn, null);
512+
cl_mem memoryOut = clCreateBuffer(context,
513+
CL_MEM_READ_WRITE,
514+
Sizeof.cl_float * result.data.length, null, null);
515+
516+
// Set the arguments for the kernel
517+
int argNum = 0;
518+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryOut));
519+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryIn));
520+
clSetKernelArg(kernel, argNum++, Sizeof.cl_uint, Pointer.to(new int[]{cols}));
521+
clSetKernelArg(kernel, argNum++, Sizeof.cl_uint, Pointer.to(new int[]{rows}));
522+
523+
// Set the work-item dimensions
524+
long local_work_sizes[] = new long[]{1};
525+
long global_work_sizes[] = new long[]{cols};
526+
527+
// Execute the kernel
528+
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
529+
global_work_sizes, local_work_sizes, 0, null, null);
530+
531+
// Read the output data
532+
clEnqueueReadBuffer(commandQueue, memoryOut, CL_TRUE, 0,
533+
result.data.length * Sizeof.cl_float, pointerOut, 0, null, null);
534+
535+
clReleaseMemObject(memoryIn);
536+
clReleaseMemObject(memoryOut);
537+
538+
return result;
539+
}
457540
}

src/main/resources/kernels/Matrices.cl

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ relu(__global float* output,
106106
}
107107
}
108108

109-
// Add row to rows: C = exp(A[i]) for all rows i.
110-
__kernel void softmax(__global float* output,
111-
__global float* input,
112-
const int rowSize)
109+
// Softmax each element of each row with all elements of that row
110+
__kernel void horizontalSoftmax(__global float* output,
111+
__global float* input,
112+
const int rowSize)
113113
{
114114
int globalRow = get_global_id(0);
115115

@@ -138,3 +138,35 @@ __kernel void softmax(__global float* output,
138138
output[index] = exp(input[index] - max) / sum;
139139
}
140140
}
141+
142+
// Softmax each element of each column with all elements of that column
143+
__kernel void verticalSoftmax(__global float* output,
144+
__global float* input,
145+
const int rowSize,
146+
const int colSize)
147+
{
148+
int globalCol = get_global_id(0);
149+
150+
// get the max value of the column
151+
float max = -3.4028235E37f;
152+
float value;
153+
for (int i = 0; i < colSize; i++) {
154+
value = input[globalCol + i * rowSize];
155+
if(value > max) {
156+
max = value;
157+
}
158+
}
159+
160+
// Calculate sum of exponentials of input elements
161+
float sum = 0.0f;
162+
for (int i = 0; i < colSize; i++) {
163+
sum += exp(input[globalCol + i * rowSize] - max);
164+
}
165+
166+
// Calculate softmax for each element
167+
int index;
168+
for (int i = 0; i < colSize; i++) {
169+
index = globalCol + i * rowSize;
170+
output[index] = exp(input[index] - max) / sum;
171+
}
172+
}

src/test/java/com/github/TannerLow/JavaMatrixMath/CpuTest.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ public static void testAll() {
1010
testAddColToCols();
1111
testRelu();
1212
testVectorizedReluDerivative();
13-
testSoftmax();
13+
testHorizontalSoftmax();
14+
testVerticalSoftmax();
1415
}
1516

1617
private static void testMultiply() {
@@ -114,13 +115,32 @@ private static void testVectorizedReluDerivative() {
114115
}
115116
}
116117

117-
private static void testSoftmax() {
118+
private static void testHorizontalSoftmax() {
118119
float[] data = {1.1f,2.2f,0.2f,-1.7f};
119120
float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
120121

121122
Matrix m = new Matrix(1, 4, data);
122123

123-
Matrix result = m.softmax();
124+
Matrix result = m.horizontalSoftmax();
125+
126+
if(result.rows != m.rows || result.cols != m.cols) {
127+
throw new TestFailedException();
128+
}
129+
130+
for(int i = 0; i < result.data.length; i++) {
131+
if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
132+
throw new TestFailedException();
133+
}
134+
}
135+
}
136+
137+
private static void testVerticalSoftmax() {
138+
float[] data = {1.1f,2.2f,0.2f,-1.7f};
139+
float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
140+
141+
Matrix m = new Matrix(4, 1, data);
142+
143+
Matrix result = m.verticalSoftmax();
124144

125145
if(result.rows != m.rows || result.cols != m.cols) {
126146
throw new TestFailedException();

src/test/java/com/github/TannerLow/JavaMatrixMath/GpuTest.java

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ public static void testAll() throws IOException {
1919
testAddRowToRows();
2020
testAddColToCols();
2121
testRelu();
22-
testSoftmax();
22+
testHorizontalSoftmax();
23+
testVerticalSoftmax();
2324
}
2425
}
2526

@@ -35,10 +36,12 @@ private static void setup() throws IOException {
3536
int programId = gpu.loadProgram(matricesKernelCode);
3637
gpu.loadKernel(programId, "Matrices", "matrixMultiply");
3738
gpu.loadKernel(programId, "Matrices", "addRowToRows");
39+
gpu.loadKernel(programId, "Matrices", "addColToCols");
3840
gpu.loadKernel(programId, "Matrices", "relu");
39-
gpu.loadKernel(programId, "Matrices", "softmax");
41+
gpu.loadKernel(programId, "Matrices", "horizontalSoftmax");
42+
gpu.loadKernel(programId, "Matrices", "verticalSoftmax");
4043

41-
if(!gpu.isInitialized()) {
44+
if(!gpu.isInitialized() || !Matrix.isCompatibleWithGPU(gpu)) {
4245
throw new IllegalStateException("GPU in unexpected state.");
4346
}
4447
}
@@ -93,7 +96,7 @@ private static void testAddColToCols() {
9396
Matrix a = new Matrix(3,2, aData);
9497
Matrix b = new Matrix(3,1, bData);
9598

96-
Matrix result = a.addColToCols(b);
99+
Matrix result = a.addColToCols(gpu, b);
97100

98101
if(result.rows != a.rows || result.cols != a.cols) {
99102
throw new TestFailedException();
@@ -125,13 +128,33 @@ private static void testRelu() {
125128
}
126129
}
127130

128-
private static void testSoftmax() {
131+
private static void testHorizontalSoftmax() {
129132
float[] data = {1.1f,2.2f,0.2f,-1.7f};
130133
float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
131134

132135
Matrix m = new Matrix(1, 4, data);
133136

134-
Matrix result = m.softmax(gpu);
137+
Matrix result = m.horizontalSoftmax(gpu);
138+
139+
if(result.rows != m.rows || result.cols != m.cols) {
140+
throw new TestFailedException();
141+
}
142+
143+
for(int i = 0; i < result.data.length; i++) {
144+
if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
145+
System.out.println(expected[i] + " vs. " + result.data[i]);
146+
throw new TestFailedException();
147+
}
148+
}
149+
}
150+
151+
private static void testVerticalSoftmax() {
152+
float[] data = {1.1f,2.2f,0.2f,-1.7f};
153+
float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
154+
155+
Matrix m = new Matrix(4, 1, data);
156+
157+
Matrix result = m.verticalSoftmax(gpu);
135158

136159
if(result.rows != m.rows || result.cols != m.cols) {
137160
throw new TestFailedException();

0 commit comments

Comments
 (0)