From 7b2ea0587a76db7b075e2586427ac3ead96316d0 Mon Sep 17 00:00:00 2001 From: JulioJerez Date: Sun, 5 Nov 2023 16:17:26 -0800 Subject: [PATCH] using tanh as activation and making better polling algorithm (wip) --- .../ndSandbox/toolbox/ndTestDeepBrain.cpp | 6 +- .../ndBrainLayerConvolutionalMaxPooling.cpp | 204 +++++++++++------- 2 files changed, 132 insertions(+), 78 deletions(-) diff --git a/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp b/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp index 64f52afa80..39f7575104 100644 --- a/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp +++ b/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp @@ -529,21 +529,21 @@ static void MnistTrainingSet() //layers.PushBack(new ndBrainLayerConvolutional(width, height, 1, 5, 16)); layers.PushBack(new ndBrainLayerConvolutional(width, height, 1, 5, 3)); - layers.PushBack(new ndBrainLayerReluActivation(layers[layers.GetCount() - 1]->GetOutputSize())); + layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); conv = (ndBrainLayerConvolutional*)(layers[layers.GetCount() - 2]); layers.PushBack(new ndBrainLayerConvolutionalMaxPooling(conv->GetOutputWidth(), conv->GetOutputHeight(), conv->GetOutputChannels())); pooling = (ndBrainLayerConvolutionalMaxPooling*)(layers[layers.GetCount() - 1]); //layers.PushBack(new ndBrainLayerConvolutional(pooling->GetOutputWidth(), pooling->GetOutputHeight(), pooling->GetOutputChannels(), 3, 16)); layers.PushBack(new ndBrainLayerConvolutional(pooling->GetOutputWidth(), pooling->GetOutputHeight(), pooling->GetOutputChannels(), 3, 3)); - layers.PushBack(new ndBrainLayerReluActivation(layers[layers.GetCount() - 1]->GetOutputSize())); + layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); conv = (ndBrainLayerConvolutional*)(layers[layers.GetCount() - 2]); layers.PushBack(new ndBrainLayerConvolutionalMaxPooling(conv->GetOutputWidth(), conv->GetOutputHeight(), conv->GetOutputChannels())); pooling = (ndBrainLayerConvolutionalMaxPooling*)(layers[layers.GetCount() - 1]); //layers.PushBack(new ndBrainLayerConvolutional(pooling->GetOutputWidth(), pooling->GetOutputHeight(), pooling->GetOutputChannels(), 3, 32)); layers.PushBack(new ndBrainLayerConvolutional(pooling->GetOutputWidth(), pooling->GetOutputHeight(), pooling->GetOutputChannels(), 3, 2)); - layers.PushBack(new ndBrainLayerReluActivation(layers[layers.GetCount() - 1]->GetOutputSize())); + layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); conv = (ndBrainLayerConvolutional*)(layers[layers.GetCount() - 2]); layers.PushBack(new ndBrainLayerConvolutionalMaxPooling(conv->GetOutputWidth(), conv->GetOutputHeight(), conv->GetOutputChannels())); pooling = (ndBrainLayerConvolutionalMaxPooling*)(layers[layers.GetCount() - 1]); diff --git a/newton-4.00/sdk/dBrain/ndBrainLayerConvolutionalMaxPooling.cpp b/newton-4.00/sdk/dBrain/ndBrainLayerConvolutionalMaxPooling.cpp index 05acdf4981..a33e45ef79 100644 --- a/newton-4.00/sdk/dBrain/ndBrainLayerConvolutionalMaxPooling.cpp +++ b/newton-4.00/sdk/dBrain/ndBrainLayerConvolutionalMaxPooling.cpp @@ -102,98 +102,152 @@ ndBrainLayer* ndBrainLayerConvolutionalMaxPooling::Load(const ndBrainLoad* const //return layer; } +void ndBrainLayerConvolutionalMaxPooling::InputDerivative(const ndBrainVector& output, const ndBrainVector& outputDerivative, ndBrainVector& inputDerivative) const +{ + ndAssert(output.GetCount() == outputDerivative.GetCount()); + ndAssert(m_index.GetCount() == outputDerivative.GetCount()); + + inputDerivative.Set(ndBrainFloat(0.0f)); + for (ndInt32 i = m_index.GetCount() - 1; i >= 0; --i) + { + ndInt32 index = m_index[i]; + inputDerivative[index] = outputDerivative[i]; + } +} + + void ndBrainLayerConvolutionalMaxPooling::MakePrediction(const ndBrainVector& input, ndBrainVector& output) const { ndAssert(input.GetCount() == GetInputSize()); ndAssert(output.GetCount() == GetOutputSize()); - ndInt32 baseOut = 0; + //ndInt32 baseOut___ = 0; + //for (ndInt32 k = 0; k < m_channels; ++k) + //{ + // const ndInt32 base = k * m_height * m_width; + // const ndBrainMemVector in(&input[base], m_height * m_width); + // + // ndInt32 baseIn = 0; + // for (ndInt32 i = 0; i < (m_height & -2); i += 2) + // { + // for (ndInt32 j = 0; j < (m_width & -2); j += 2) + // { + // ndInt32 index = baseIn + j; + // ndBrainFloat maxValue = in[index]; + // if (in[baseIn + j + 1] > maxValue) + // { + // index = baseIn + j + 1; + // maxValue = in[index]; + // } + // if (in[baseIn + m_width + j] > maxValue) + // { + // index = baseIn + m_width + j; + // maxValue = in[index]; + // } + // if (in[baseIn + m_width + j + 1] > maxValue) + // { + // index = baseIn + m_width + j + 1; + // maxValue = in[index]; + // } + // output[baseOut___ + (j >> 1)] = maxValue; + // m_index[baseOut___ + (j >> 1)] = base + index; + // } + // + // if (m_width & 1) + // { + // ndInt32 index = baseIn + m_width - 1; + // ndBrainFloat maxValue = in[index]; + // if (in[baseIn + m_width + m_width - 1] > maxValue) + // { + // index = baseIn + m_width + m_width - 1; + // maxValue = in[index]; + // } + // output[baseOut___ + (m_width >> 1)] = maxValue; + // m_index[baseOut___ + (m_width >> 1)] = base + index; + // } + // + // baseIn += m_width * 2; + // baseOut___ += (m_width + 1) >> 1; + // } + // + // if (m_height & 1) + // { + // for (ndInt32 j = 0; j < (m_width & -2); j += 2) + // { + // ndInt32 index = baseIn + j; + // ndBrainFloat maxValue = in[index]; + // if (in[baseIn + j + 1] > maxValue) + // { + // index = baseIn + j + 1; + // maxValue = in[index]; + // } + // output[baseOut___ + (j >> 1)] = maxValue; + // m_index[baseOut___ + (j >> 1)] = base + index; + // } + // + // if (m_width & 1) + // { + // ndInt32 index = baseIn + m_width - 1; + // ndBrainFloat maxValue = in[index]; + // output[baseOut___ + (m_width >> 1)] = maxValue; + // m_index[baseOut___ + (m_width >> 1)] = base + index; + // } + // + // baseIn += m_width * 2; + // baseOut___ += (m_width + 1) >> 1; + // } + //} + + ndBrainFloat block[4]; + const ndBrainFloat minValue = ndBrainFloat(-1.0e20f); + const ndInt32 inputSize = m_height * m_width; + + ndInt32 offsetOut = 0; + ndInt32 inputOffset = 0; for (ndInt32 k = 0; k < m_channels; ++k) { - const ndInt32 base = k * m_height * m_width; - const ndBrainMemVector in(&input[base], m_height * m_width); - - ndInt32 baseIn = 0; - for (ndInt32 i = 0; i < (m_height & -2); i += 2) + ndInt32 inputStride = 0; + const ndBrainMemVector in(&input[inputOffset], inputSize); + for (ndInt32 y = 0; y < m_height; y += 2) { - for (ndInt32 j = 0; j < (m_width & -2); j += 2) + ndInt32 yMask = (y + 1) < m_width; + for (ndInt32 x = 0; x < m_width; x += 2) { - ndInt32 index = baseIn + j; - ndBrainFloat maxValue = in[index]; - if (in[baseIn + j + 1] > maxValue) - { - index = baseIn + j + 1; - maxValue = in[index]; - } - if (in[baseIn + m_width + j] > maxValue) - { - index = baseIn + m_width + j; - maxValue = in[index]; - } - if (in[baseIn + m_width + j + 1] > maxValue) + ndInt32 xMask = (x + 1) < m_width; + + ndInt32 x0 = inputStride + x; + ndInt32 x1 = x0 + 1; + ndInt32 x2 = x0 + m_width; + ndInt32 x3 = x2 + 1; + block[0] = in[x0]; + block[1] = xMask ? in[x1] : minValue; + block[2] = yMask ? in[x2] : minValue; + block[3] = (xMask & yMask) ? in[x3] : minValue; + ndInt32 index = x0; + ndBrainFloat maxValue = block[0]; + if (block[1] > maxValue) { - index = baseIn + m_width + j + 1; - maxValue = in[index]; + index = x1; + maxValue = block[1]; } - output[baseOut + (j >> 1)] = maxValue; - m_index[baseOut + (j >> 1)] = base + index; - } - - if (m_width & 1) - { - ndInt32 index = baseIn + m_width - 1; - ndBrainFloat maxValue = in[index]; - if (in[baseIn + m_width + m_width - 1] > maxValue) + if (block[2] > maxValue) { - index = baseIn + m_width + m_width - 1; - maxValue = in[index]; + index = x2; + maxValue = block[2]; } - output[baseOut + (m_width >> 1)] = maxValue; - m_index[baseOut + (m_width >> 1)] = base + index; - } - - baseIn += m_width * 2; - baseOut += (m_width + 1) >> 1; - } - - if (m_height & 1) - { - for (ndInt32 j = 0; j < (m_width & -2); j += 2) - { - ndInt32 index = baseIn + j; - ndBrainFloat maxValue = in[index]; - if (in[baseIn + j + 1] > maxValue) + if (block[3] > maxValue) { - index = baseIn + j + 1; - maxValue = in[index]; + index = x3; + maxValue = block[3]; } - output[baseOut + (j >> 1)] = maxValue; - m_index[baseOut + (j >> 1)] = base + index; - } - - if (m_width & 1) - { - ndInt32 index = baseIn + m_width - 1; - ndBrainFloat maxValue = in[index]; - output[baseOut + (m_width >> 1)] = maxValue; - m_index[baseOut + (m_width >> 1)] = base + index; + output[offsetOut + (x >> 1)] = maxValue; + m_index[offsetOut + (x >> 1)] = inputOffset + index; } - baseIn += m_width * 2; - baseOut += (m_width + 1) >> 1; + inputStride += m_width * 2; + offsetOut += (m_width + 1) >> 1; } - } -} - -void ndBrainLayerConvolutionalMaxPooling::InputDerivative(const ndBrainVector& output, const ndBrainVector& outputDerivative, ndBrainVector& inputDerivative) const -{ - ndAssert(output.GetCount() == outputDerivative.GetCount()); - ndAssert(m_index.GetCount() == outputDerivative.GetCount()); - inputDerivative.Set(ndBrainFloat(0.0f)); - for (ndInt32 i = m_index.GetCount() - 1; i >= 0; --i) - { - ndInt32 index = m_index[i]; - inputDerivative[index] = outputDerivative[i]; + inputOffset += inputSize; } -} +} \ No newline at end of file