firs controller that actually follows the animation (wip)

not very robust by seem a start point
MADEAPPS · Oct 22, 2023 · 48a6184 · 48a6184
1 parent 327a525
commit 48a6184
Show file tree

Hide file tree

Showing 6 changed files with 348 additions and 36 deletions.
diff --git a/newton-4.00/applications/media/ndQuadruped_1VPG.dnn b/newton-4.00/applications/media/ndQuadruped_1VPG.dnn
diff --git a/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp b/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp
@@ -22,9 +22,9 @@
 
 namespace ndQuadruped_1
 {
-	#define ND_TRAIN_MODEL
+	//#define ND_TRAIN_MODEL
 
-	#define CONTROLLER_NAME "ndQuadruped_1-VPG.dnn"
+	#define CONTROLLER_NAME "ndQuadruped_1VPG.dnn"
 
 	#define D_MAX_SWING_DIST_X		ndReal(0.10f)
 	#define D_MAX_SWING_DIST_Z		ndReal(0.15f)

diff --git a/newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp b/newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp
@@ -185,36 +185,31 @@ namespace ndUnicycle
 				return m_model->GetReward();
 			}
 
-			#ifdef D_USE_VANILLA_POLICY_GRAD
-			virtual void ApplyActions(ndBrainFloat* const actions) const
-			{
-				m_model->ApplyActions(actions);
-			}
-			#else		
 			virtual void ApplyActions(ndBrainFloat* const actions) const
 			{
-				if (GetEpisodeFrames() >= 15000)
-				{
-					for (ndInt32 i = 0; i < m_actionsSize; ++i)
+				#ifndef D_USE_VANILLA_POLICY_GRAD
+					if (GetEpisodeFrames() >= 15000)
 					{
-						ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(2.0f)));
-						ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
-						actions[i] = clippiedNoisyAction;
+						for (ndInt32 i = 0; i < m_actionsSize; ++i)
+						{
+							ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(2.0f)));
+							ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
+							actions[i] = clippiedNoisyAction;
+						}
 					}
-				}
-				else if (GetEpisodeFrames() >= 10000)
-				{
-					for (ndInt32 i = 0; i < m_actionsSize; ++i)
+					else if (GetEpisodeFrames() >= 10000)
 					{
-						ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(1.0f)));
-						ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
-						actions[i] = clippiedNoisyAction;
+						for (ndInt32 i = 0; i < m_actionsSize; ++i)
+						{
+							ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(1.0f)));
+							ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
+							actions[i] = clippiedNoisyAction;
+						}
 					}
-				}
+				#endif
 
 				m_model->ApplyActions(actions);
 			}
-			#endif
 
 			void GetObservation(ndBrainFloat* const observation)
 			{

diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG_Trainer.h b/newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG_Trainer.h
@@ -48,6 +48,7 @@ class ndBrainAgentContinueVPG_Trainer : public ndBrainAgent, public ndBrainThrea
 			m_extraTrajectorySteps = 1024 * 2;
 
 			m_hiddenLayersNumberOfNeurons = 64;
+			//m_hiddenLayersNumberOfNeurons = 64 + 3;
 
 			m_sigma = ndBrainFloat(0.5f);
 			m_learnRate = ndBrainFloat(0.0005f);
@@ -171,13 +172,15 @@ ndBrainAgentContinueVPG_Trainer<statesDim, actionDim>::ndBrainAgentContinueVPG_T
 {
 	// build neural net
 	ndFixSizeArray<ndBrainLayer*, 32> layers;
+
 	layers.PushBack(new ndBrainLayerLinear(statesDim, hyperParameters.m_hiddenLayersNumberOfNeurons));
 	layers.PushBack(new ndBrainLayerTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize()));
 	for (ndInt32 i = 1; i < hyperParameters.m_numberOfHiddenLayers; ++i)
 	{
 		ndAssert(layers[layers.GetCount() - 1]->GetOutputSize() == hyperParameters.m_hiddenLayersNumberOfNeurons);
 		layers.PushBack(new ndBrainLayerLinear(hyperParameters.m_hiddenLayersNumberOfNeurons, hyperParameters.m_hiddenLayersNumberOfNeurons));
-		layers.PushBack(new ndBrainLayerTanhActivation(hyperParameters.m_hiddenLayersNumberOfNeurons));
+		//layers.PushBack(new ndBrainLayerTanhActivation(hyperParameters.m_hiddenLayersNumberOfNeurons));
+		layers.PushBack(new ndBrainLayerApproximateTanhActivation(hyperParameters.m_hiddenLayersNumberOfNeurons));
 	}
 	layers.PushBack(new ndBrainLayerLinear(hyperParameters.m_hiddenLayersNumberOfNeurons, m_actionsSize));
 	//layers.PushBack(new ndBrainLayerSoftmaxActivation(m_actionsSize));

diff --git a/newton-4.00/sdk/dBrain/ndBrainLayerTanhActivation.cpp b/newton-4.00/sdk/dBrain/ndBrainLayerTanhActivation.cpp
@@ -126,11 +126,59 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&
 void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector& input, ndBrainVector& output) const
 {
 	// rational approximation of tanh, approximation 4 time faster that standard tanh.
-	// error bound lower than 1-e6 for the entire rage[-10, 10] 
+	// error bound lower than 2.0e-6 for the entire rage[-10, 10] 
 	// only problem is that is not exact zero for input zero, 
 	// however this can be very good for dense hidden layers, 
 	// in fact it seems to produce better or equal result than the standard tanh  
 
+#if 0
+	auto ScalarTanhApproximation = [](ndBrainFloat in)
+	{
+		const ndBrainFloat c1 = ndReal(0.03138777f);
+		const ndBrainFloat c2 = ndReal(0.276281267f);
+		const ndBrainFloat log2f = ndReal(1.442695022f);
+
+		ndBrainFloat v = log2f * ndClamp(in, ndBrainFloat(-10.0f), ndBrainFloat(10.0f));
+		ndBrainFloat floatIntPart = ndBrainFloat(ndFloor(v));
+		ndBrainFloat x = v - floatIntPart;
+		ndBrainFloat xx = x * x;
+		ndBrainFloat v1 = log2f + c2 * xx;
+		ndBrainFloat v2 = x + xx * c1 * x;
+		ndBrainFloat v3 = v2 + v1;
+		ndBrainFloat v4 = v2 - v1;
+		#ifdef D_BRAIN_USES_REAL
+			* ((ndInt32*)&v3) += ndInt32(floatIntPart) << 24;
+		#else
+			* ((ndInt64*)&v3) += ndInt64(floatIntPart) << 53;
+		#endif
+		return (v3 + v4) / (v3 - v4);
+	};
+
+	auto VectorTanhApproximation = [](const ndBrainVector4& in)
+	{
+		ndBrainVector4 v(m_log2f * in.GetMin(m_max).GetMax(m_min));
+		ndBrainVector4 intPart(v.GetInt());
+		ndBrainVector4 x(v - v.Floor());
+		ndBrainVector4 xx(x * x);
+		ndBrainVector4 v1(m_log2f + m_c2 * xx);
+		ndBrainVector4 v2(x + xx * m_c1 * x);
+		ndBrainVector4 v3(v2 + v1);
+		for (ndInt32 i = 0; i < 4; ++i)
+		{
+			#ifdef D_BRAIN_USES_REAL
+				v3.m_i[i] += intPart.m_i[i] << 24;
+			#else
+				v3.m_i[i] += intPart.m_i[i] << 53;
+			#endif
+		}
+		ndBrainVector4 v4(v2 - v1);
+		ndBrainVector4 num(v3 + v4);
+		ndBrainVector4 den(v3 - v4);
+		return num.Divide(den);
+	};
+
+#else
+
 	const ndBrainVector4 c1(m_c1);
 	const ndBrainVector4 c2(m_c2);
 	const ndBrainVector4 min(m_min);
@@ -139,10 +187,6 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&
 
 	auto ScalarTanhApproximation = [c1, c2, min, max, log2f](ndBrainFloat in)
 	{
-		//const ndBrainFloat c1 = ndReal(0.03138777f);
-		//const ndBrainFloat c2 = ndReal(0.276281267f);
-		//const ndBrainFloat log2f = ndReal(1.442695022f);
-
 		ndBrainFloat v = log2f[0] * ndClamp(in, min[0], max[0]);
 		ndBrainFloat floatIntPart = ndBrainFloat(ndFloor(v));
 		ndBrainFloat x = v - floatIntPart;
@@ -181,12 +225,7 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&
 		ndBrainVector4 den(v3 - v4);
 		return num.Divide(den);
 	};
-
-	// check accuracy, expected error less that 1e-6 of all cases.
-	//ndVector xxx0(-1.3f, 0.0f, 1.25f, 6.0f);
-	//ndVector xxx1(ndTanh(xxx0[0]), ndTanh(xxx0[1]), ndTanh(xxx0[2]), ndTanh(xxx0[3]));
-	//ndVector xxx2(ScalarTanhApproximation(xxx0[0]), ScalarTanhApproximation(xxx0[1]), ScalarTanhApproximation(xxx0[2]), ScalarTanhApproximation(xxx0[3]));
-	//ndVector xxx3(VectorTanhApproximation(xxx0));
+#endif
 
 	const ndInt32 count = input.GetCount() / 4;
 	ndBrainVector4* const vectorOutput = (ndBrainVector4*)&output[0];
@@ -195,10 +234,25 @@ void ndBrainLayerApproximateTanhActivation::MakePrediction(const ndBrainVector&
 	for (ndInt32 i = count - 1; i >= 0 ; --i)
 	{
 		vectorOutput[i] = VectorTanhApproximation(vectorInput[i]);
+		#if _DEBUG
+			// check accuracy, expected error less that 2.0e-6 of all cases.
+			ndBrainVector4 xxx0(vectorOutput[i]);
+			ndBrainVector4 xxx1 (ndTanh(vectorInput[i].m_x), ndTanh(vectorInput[i].m_y), ndTanh(vectorInput[i].m_z), ndTanh(vectorInput[i].m_w));
+			ndAssert(ndAbs(xxx0.m_x - xxx1.m_x) < ndFloat32(2.0e-6f));
+			ndAssert(ndAbs(xxx0.m_y - xxx1.m_y) < ndFloat32(2.0e-6f));
+			ndAssert(ndAbs(xxx0.m_z - xxx1.m_z) < ndFloat32(2.0e-6f));
+			ndAssert(ndAbs(xxx0.m_w - xxx1.m_w) < ndFloat32(2.0e-6f));
+		#endif
 	}
 	for (ndInt32 i = input.GetCount() - 1; i >= count * 4; --i)
 	{
 		output[i] = ndBrainFloat(ScalarTanhApproximation(input[i]));
+		#if _DEBUG
+			// check accuracy, expected error less that 2.0e-6 of all cases.
+			ndFloat32 xxx0 = output[i];
+			ndFloat32 xxx1 = ndTanh(input[i]);
+			ndAssert(ndAbs(xxx1 - xxx0) < ndFloat32(2.0e-6f));
+		#endif
 	}
 
 	output.FlushToZero();

diff --git a/newton-4.00/sdk/dBrain/ndBrainLayerTanhActivation.h b/newton-4.00/sdk/dBrain/ndBrainLayerTanhActivation.h
@@ -51,9 +51,9 @@ class ndBrainLayerApproximateTanhActivation : public ndBrainLayerTanhActivation
 
 	static ndBrainVector4 m_c1;
 	static ndBrainVector4 m_c2;
-	static ndBrainVector4 m_log2f;
 	static ndBrainVector4 m_max;
 	static ndBrainVector4 m_min;
+	static ndBrainVector4 m_log2f;
 };
 
 #endif