Skip to content

Commit

Permalink
firs controller that actually follows the animation (wip)
Browse files Browse the repository at this point in the history
not very robust by seem a start point
  • Loading branch information
JulioJerez committed Oct 22, 2023
1 parent 327a525 commit 48a6184
Show file tree
Hide file tree
Showing 6 changed files with 348 additions and 36 deletions.
260 changes: 260 additions & 0 deletions newton-4.00/applications/media/ndQuadruped_1VPG.dnn

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

namespace ndQuadruped_1
{
#define ND_TRAIN_MODEL
//#define ND_TRAIN_MODEL

#define CONTROLLER_NAME "ndQuadruped_1-VPG.dnn"
#define CONTROLLER_NAME "ndQuadruped_1VPG.dnn"

#define D_MAX_SWING_DIST_X ndReal(0.10f)
#define D_MAX_SWING_DIST_Z ndReal(0.15f)
Expand Down
37 changes: 16 additions & 21 deletions newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,36 +185,31 @@ namespace ndUnicycle
return m_model->GetReward();
}

#ifdef D_USE_VANILLA_POLICY_GRAD
virtual void ApplyActions(ndBrainFloat* const actions) const
{
m_model->ApplyActions(actions);
}
#else
virtual void ApplyActions(ndBrainFloat* const actions) const
{
if (GetEpisodeFrames() >= 15000)
{
for (ndInt32 i = 0; i < m_actionsSize; ++i)
#ifndef D_USE_VANILLA_POLICY_GRAD
if (GetEpisodeFrames() >= 15000)
{
ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(2.0f)));
ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
actions[i] = clippiedNoisyAction;
for (ndInt32 i = 0; i < m_actionsSize; ++i)
{
ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(2.0f)));
ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
actions[i] = clippiedNoisyAction;
}
}
}
else if (GetEpisodeFrames() >= 10000)
{
for (ndInt32 i = 0; i < m_actionsSize; ++i)
else if (GetEpisodeFrames() >= 10000)
{
ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(1.0f)));
ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
actions[i] = clippiedNoisyAction;
for (ndInt32 i = 0; i < m_actionsSize; ++i)
{
ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(1.0f)));
ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
actions[i] = clippiedNoisyAction;
}
}
}
#endif

m_model->ApplyActions(actions);
}
#endif

void GetObservation(ndBrainFloat* const observation)
{
Expand Down
5 changes: 4 additions & 1 deletion newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class ndBrainAgentContinueVPG_Trainer : public ndBrainAgent, public ndBrainThrea
m_extraTrajectorySteps = 1024 * 2;

m_hiddenLayersNumberOfNeurons = 64;
//m_hiddenLayersNumberOfNeurons = 64 + 3;

m_sigma = ndBrainFloat(0.5f);
m_learnRate = ndBrainFloat(0.0005f);
Expand Down Expand Up @@ -171,13 +172,15 @@ ndBrainAgentContinueVPG_Trainer<statesDim, actionDim>::ndBrainAgentContinueVPG_T
{
// build neural net
ndFixSizeArray<ndBrainLayer*, 32> layers;

layers.PushBack(new ndBrainLayerLinear(statesDim, hyperParameters.m_hiddenLayersNumberOfNeurons));
layers.PushBack(new ndBrainLayerTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize()));
for (ndInt32 i = 1; i < hyperParameters.m_numberOfHiddenLayers; ++i)
{
ndAssert(layers[layers.GetCount() - 1]->GetOutputSize() == hyperParameters.m_hiddenLayersNumberOfNeurons);
layers.PushBack(new ndBrainLayerLinear(hyperParameters.m_hiddenLayersNumberOfNeurons, hyperParameters.m_hiddenLayersNumberOfNeurons));
layers.PushBack(new ndBrainLayerTanhActivation(hyperParameters.m_hiddenLayersNumberOfNeurons));
//layers.PushBack(new ndBrainLayerTanhActivation(hyperParameters.m_hiddenLayersNumberOfNeurons));
layers.PushBack(new ndBrainLayerApproximateTanhActivation(hyperParameters.m_hiddenLayersNumberOfNeurons));
}
layers.PushBack(new ndBrainLayerLinear(hyperParameters.m_hiddenLayersNumberOfNeurons, m_actionsSize));
//layers.PushBack(new ndBrainLayerSoftmaxActivation(m_actionsSize));
Expand Down
76 changes: 65 additions & 11 deletions newton-4.00/sdk/dBrain/ndBrainLayerTanhActivation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,59 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&
void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector& input, ndBrainVector& output) const
{
// rational approximation of tanh, approximation 4 time faster that standard tanh.
// error bound lower than 1-e6 for the entire rage[-10, 10]
// error bound lower than 2.0e-6 for the entire rage[-10, 10]
// only problem is that is not exact zero for input zero,
// however this can be very good for dense hidden layers,
// in fact it seems to produce better or equal result than the standard tanh

#if 0
auto ScalarTanhApproximation = [](ndBrainFloat in)
{
const ndBrainFloat c1 = ndReal(0.03138777f);
const ndBrainFloat c2 = ndReal(0.276281267f);
const ndBrainFloat log2f = ndReal(1.442695022f);

ndBrainFloat v = log2f * ndClamp(in, ndBrainFloat(-10.0f), ndBrainFloat(10.0f));
ndBrainFloat floatIntPart = ndBrainFloat(ndFloor(v));
ndBrainFloat x = v - floatIntPart;
ndBrainFloat xx = x * x;
ndBrainFloat v1 = log2f + c2 * xx;
ndBrainFloat v2 = x + xx * c1 * x;
ndBrainFloat v3 = v2 + v1;
ndBrainFloat v4 = v2 - v1;
#ifdef D_BRAIN_USES_REAL
* ((ndInt32*)&v3) += ndInt32(floatIntPart) << 24;
#else
* ((ndInt64*)&v3) += ndInt64(floatIntPart) << 53;
#endif
return (v3 + v4) / (v3 - v4);
};

auto VectorTanhApproximation = [](const ndBrainVector4& in)
{
ndBrainVector4 v(m_log2f * in.GetMin(m_max).GetMax(m_min));
ndBrainVector4 intPart(v.GetInt());
ndBrainVector4 x(v - v.Floor());
ndBrainVector4 xx(x * x);
ndBrainVector4 v1(m_log2f + m_c2 * xx);
ndBrainVector4 v2(x + xx * m_c1 * x);
ndBrainVector4 v3(v2 + v1);
for (ndInt32 i = 0; i < 4; ++i)
{
#ifdef D_BRAIN_USES_REAL
v3.m_i[i] += intPart.m_i[i] << 24;
#else
v3.m_i[i] += intPart.m_i[i] << 53;
#endif
}
ndBrainVector4 v4(v2 - v1);
ndBrainVector4 num(v3 + v4);
ndBrainVector4 den(v3 - v4);
return num.Divide(den);
};

#else

const ndBrainVector4 c1(m_c1);
const ndBrainVector4 c2(m_c2);
const ndBrainVector4 min(m_min);
Expand All @@ -139,10 +187,6 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&

auto ScalarTanhApproximation = [c1, c2, min, max, log2f](ndBrainFloat in)
{
//const ndBrainFloat c1 = ndReal(0.03138777f);
//const ndBrainFloat c2 = ndReal(0.276281267f);
//const ndBrainFloat log2f = ndReal(1.442695022f);

ndBrainFloat v = log2f[0] * ndClamp(in, min[0], max[0]);
ndBrainFloat floatIntPart = ndBrainFloat(ndFloor(v));
ndBrainFloat x = v - floatIntPart;
Expand Down Expand Up @@ -181,12 +225,7 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&
ndBrainVector4 den(v3 - v4);
return num.Divide(den);
};

// check accuracy, expected error less that 1e-6 of all cases.
//ndVector xxx0(-1.3f, 0.0f, 1.25f, 6.0f);
//ndVector xxx1(ndTanh(xxx0[0]), ndTanh(xxx0[1]), ndTanh(xxx0[2]), ndTanh(xxx0[3]));
//ndVector xxx2(ScalarTanhApproximation(xxx0[0]), ScalarTanhApproximation(xxx0[1]), ScalarTanhApproximation(xxx0[2]), ScalarTanhApproximation(xxx0[3]));
//ndVector xxx3(VectorTanhApproximation(xxx0));
#endif

const ndInt32 count = input.GetCount() / 4;
ndBrainVector4* const vectorOutput = (ndBrainVector4*)&output[0];
Expand All @@ -195,10 +234,25 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&
for (ndInt32 i = count - 1; i >= 0 ; --i)
{
vectorOutput[i] = VectorTanhApproximation(vectorInput[i]);
#if _DEBUG
// check accuracy, expected error less that 2.0e-6 of all cases.
ndBrainVector4 xxx0(vectorOutput[i]);
ndBrainVector4 xxx1 (ndTanh(vectorInput[i].m_x), ndTanh(vectorInput[i].m_y), ndTanh(vectorInput[i].m_z), ndTanh(vectorInput[i].m_w));
ndAssert(ndAbs(xxx0.m_x - xxx1.m_x) < ndFloat32(2.0e-6f));
ndAssert(ndAbs(xxx0.m_y - xxx1.m_y) < ndFloat32(2.0e-6f));
ndAssert(ndAbs(xxx0.m_z - xxx1.m_z) < ndFloat32(2.0e-6f));
ndAssert(ndAbs(xxx0.m_w - xxx1.m_w) < ndFloat32(2.0e-6f));
#endif
}
for (ndInt32 i = input.GetCount() - 1; i >= count * 4; --i)
{
output[i] = ndBrainFloat(ScalarTanhApproximation(input[i]));
#if _DEBUG
// check accuracy, expected error less that 2.0e-6 of all cases.
ndFloat32 xxx0 = output[i];
ndFloat32 xxx1 = ndTanh(input[i]);
ndAssert(ndAbs(xxx1 - xxx0) < ndFloat32(2.0e-6f));
#endif
}

output.FlushToZero();
Expand Down
2 changes: 1 addition & 1 deletion newton-4.00/sdk/dBrain/ndBrainLayerTanhActivation.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ class ndBrainLayerApproximateTanhActivation : public ndBrainLayerTanhActivation

static ndBrainVector4 m_c1;
static ndBrainVector4 m_c2;
static ndBrainVector4 m_log2f;
static ndBrainVector4 m_max;
static ndBrainVector4 m_min;
static ndBrainVector4 m_log2f;
};

#endif
Expand Down

0 comments on commit 48a6184

Please sign in to comment.