Skip to content

Commit

Permalink
training quad robot to stand on three legs (wip)
Browse files Browse the repository at this point in the history
for teh first time agent is successful, but the agent is too fragile at 20 million steps.
trying 50 million training steps.
  • Loading branch information
JulioJerez committed Feb 25, 2024
1 parent 0443a6b commit bd5570e
Show file tree
Hide file tree
Showing 17 changed files with 119 additions and 130 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ namespace ndCarpole_1
m_model->GetObservation(observation);
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
m_model->ApplyActions(actions);
}
Expand Down Expand Up @@ -133,12 +133,12 @@ namespace ndCarpole_1
}
}

ndBrainFloat GetReward() const
ndBrainFloat CalculateReward()
{
return m_model->GetReward();
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
if (GetEpisodeFrames() >= 15000)
{
Expand Down Expand Up @@ -275,7 +275,7 @@ namespace ndCarpole_1
return ndReal(reward);
}

void ApplyActions(ndBrainFloat* const actions) const
void ApplyActions(ndBrainFloat* const actions)
{
ndVector force(m_cart->GetForce());
ndBrainFloat action = actions[0];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ namespace ndCarpole_0
m_model->GetObservation(observation);
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
m_model->ApplyActions(actions);
}
Expand Down Expand Up @@ -138,12 +138,12 @@ namespace ndCarpole_0
}
}

ndBrainFloat GetReward() const
ndBrainFloat CalculateReward()
{
return m_model->GetReward();
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
if (GetEpisodeFrames() >= 10000)
{
Expand Down Expand Up @@ -269,7 +269,7 @@ namespace ndCarpole_0
return ndReal(reward);
}

void ApplyActions(ndBrainFloat* const actions) const
void ApplyActions(ndBrainFloat* const actions)
{
ndVector force(m_cart->GetForce());
ndInt32 action = ndInt32(actions[0]);
Expand Down
57 changes: 29 additions & 28 deletions newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ namespace ndQuadruped_1
ndMemCpy(&m_currentTransition[0], observation, m_observationsSize);
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
m_model->ApplyActions(actions, &m_currentTransition[0]);
}
Expand Down Expand Up @@ -432,7 +432,7 @@ namespace ndQuadruped_1
return reward;
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
m_model->ApplyActions(actions, &m_currentTransition[0]);
}
Expand Down Expand Up @@ -1490,6 +1490,20 @@ namespace ndQuadruped_1
}
#else

#define ND_TRAIN_MODEL

#define CONTROLLER_NAME "ndQuadruped_1VPG.dnn"

#define D_MAX_SWING_DIST_X ndReal(0.10f)
#define D_MAX_SWING_DIST_Z ndReal(0.15f)
#define D_POSE_REST_POSITION_Y ndReal (-0.3f)
#define D_MIN_REWARD_ANGLE ndReal(ndFloat32 (30.0f) * ndDegreeToRad)

#define D_SWING_STEP ndReal(0.005f)
#define D_EFFECTOR_STEP ndReal(0.1f)
#define ND_AGENT_INPUTSIZE (4 * m_legObservationsSize + m_actionsSize)


enum ndActionSpace
{
m_move_x,
Expand All @@ -1510,19 +1524,6 @@ namespace ndQuadruped_1
m_legObservationsSize
};

#define ND_TRAIN_MODEL

#define CONTROLLER_NAME "ndQuadruped_1VPG.dnn"

#define D_MAX_SWING_DIST_X ndReal(0.10f)
#define D_MAX_SWING_DIST_Z ndReal(0.15f)
#define D_POSE_REST_POSITION_Y ndReal (-0.3f)
#define D_MIN_REWARD_ANGLE ndReal(ndFloat32 (30.0f) * ndDegreeToRad)

#define D_SWING_STEP ndReal(0.005f)
#define D_EFFECTOR_STEP ndReal(0.1f)
#define ND_AGENT_INPUTSIZE (4 * m_legObservationsSize + m_actionsSize)

class ndRobot : public ndModelArticulation
{
public:
Expand Down Expand Up @@ -1693,7 +1694,7 @@ namespace ndQuadruped_1
m_model->GetObservation(observation);
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
m_model->ApplyActions(actions);
}
Expand Down Expand Up @@ -1744,11 +1745,10 @@ namespace ndQuadruped_1
,m_model(nullptr)
,m_timer(ndGetTimeInMicroseconds())
,m_maxGain(-1.0e10f)
,m_maxFrames(3000)
,m_maxFrames(5000)
,m_killCounter(0)
,m_startTraining(0)
//,m_stopTraining(5000000)
,m_stopTraining(1000000)
,m_stopTraining(50000000)
,m_modelIsTrained(false)
{
SetName(CONTROLLER_NAME);
Expand All @@ -1775,7 +1775,7 @@ namespace ndQuadruped_1
}
}

ndBrainFloat GetReward() const
ndBrainFloat CalculateReward()
{
ndBrainFloat reward = m_model->CalculateReward();
if (reward > ndBrainFloat(0.4f))
Expand All @@ -1791,7 +1791,7 @@ namespace ndQuadruped_1
return reward;
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
m_model->ApplyActions(actions);
}
Expand Down Expand Up @@ -1888,12 +1888,13 @@ namespace ndQuadruped_1
}
}

if (stopTraining == m_stopTraining)
if (stopTraining >= m_stopTraining)
{
char fileName[1024];
m_modelIsTrained = true;
m_actor.CopyFrom(m_bestActor);
ndGetWorkingFileName(GetName().GetStr(), fileName);
//ndGetWorkingFileName(GetName().GetStr(), fileName);
ndGetWorkingFileName("ndQuadruped_1VPG_.dnn", fileName);
SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
Expand All @@ -1911,7 +1912,7 @@ namespace ndQuadruped_1
ndUnsigned64 m_timer;
ndFloat32 m_maxGain;
ndInt32 m_maxFrames;
mutable ndInt32 m_killCounter;
ndInt32 m_killCounter;
ndInt32 m_startTraining;
ndInt32 m_stopTraining;
bool m_modelIsTrained;
Expand Down Expand Up @@ -2176,7 +2177,7 @@ namespace ndQuadruped_1
input[m_move_z] = m_control->m_z;
}

ndBrainFloat CalculateReward() const
ndBrainFloat CalculateReward()
{
ndFixSizeArray<ndBigVector, 4> desiredSupportPoint;
for (ndInt32 i = 0; i < m_animPose.GetCount(); ++i)
Expand Down Expand Up @@ -2210,8 +2211,8 @@ namespace ndQuadruped_1
//ndFloat32 dist = ndFloat32(1.0f) - ndFloat32 (ndSqrt (error.DotProduct(error).GetScalar()));
//ndFloat32 dist = ndFloat32(1.0f) - ndFloat32 (error.DotProduct(error).GetScalar());
//reward = (dist2 < ndBrainFloat(1.0e-5f)) ? ndBrainFloat(1.0f) : ndBrainFloat(0.0f);
reward = ndBrainFloat(ndExp(-ndBrainFloat(100.0f) * dist2));
ndTrace(("d2(% f) r(% f)\n", dist2, reward));
reward = ndBrainFloat(ndExp(-ndBrainFloat(10000.0f) * dist2));
//ndTrace(("d2(% f) r(% f)\n", dist2, reward));
}
else
{
Expand Down Expand Up @@ -2319,7 +2320,7 @@ namespace ndQuadruped_1
ndBrainAgentContinueVPG_Trainer<ND_AGENT_INPUTSIZE, m_actionsSize>::HyperParameters hyperParameters;
//hyperParameters.m_sigma = ndReal(0.25f);
hyperParameters.m_discountFactor = ndReal(0.99f);
hyperParameters.m_extraTrajectorySteps = 6000;
hyperParameters.m_maxTrajectorySteps = 6000;
ndSharedPtr<ndBrainAgent> agent(new ndRobot::ndControllerAgent_trainer(hyperParameters));
#else
char fileName[1024];
Expand Down
8 changes: 4 additions & 4 deletions newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ namespace ndUnicycle
m_model->GetObservation(observation);
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
m_model->ApplyActions(actions);
}
Expand Down Expand Up @@ -184,7 +184,7 @@ namespace ndUnicycle
return fail;
}

ndBrainFloat GetReward() const
ndBrainFloat CalculateReward()
{
if (IsTerminal())
{
Expand All @@ -211,7 +211,7 @@ namespace ndUnicycle
}
}

virtual void ApplyActions(ndBrainFloat* const actions) const
virtual void ApplyActions(ndBrainFloat* const actions)
{
#ifndef D_USE_VANILLA_POLICY_GRAD
if (GetEpisodeFrames() >= 15000)
Expand Down Expand Up @@ -378,7 +378,7 @@ namespace ndUnicycle
}
}

void ApplyActions(ndBrainFloat* const actions) const
void ApplyActions(ndBrainFloat* const actions)
{
ndFloat32 legAngle = ndFloat32(actions[m_softLegControl]) * ND_MAX_LEG_ANGLE_STEP + m_legJoint->GetAngle();
legAngle = ndClamp (legAngle, -ND_MAX_LEG_JOINT_ANGLE, ND_MAX_LEG_JOINT_ANGLE);
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/applications/ndSandbox/ndDemoEntityManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@
//#define DEFAULT_SCENE 17 // rag doll
//#define DEFAULT_SCENE 18 // cart pole discrete controller
//#define DEFAULT_SCENE 19 // cart pole continue controller
#define DEFAULT_SCENE 20 // unit cycle controller
//#define DEFAULT_SCENE 21 // quadruped test 1
//#define DEFAULT_SCENE 20 // unit cycle controller
#define DEFAULT_SCENE 21 // quadruped test 1
//#define DEFAULT_SCENE 22 // quadruped test 2
//#define DEFAULT_SCENE 23 // quadruped test 3
//#define DEFAULT_SCENE 24 // biped test 1
Expand Down
2 changes: 1 addition & 1 deletion newton-4.00/sdk/dBrain/ndBrainAgent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ ndBrainAgent::~ndBrainAgent()
{
}

void ndBrainAgent::SaveToFile(const char* const pathFilename) const
void ndBrainAgent::SaveToFile(const char* const pathFilename)
{
class SaveAgent: public ndBrainSave
{
Expand Down
8 changes: 4 additions & 4 deletions newton-4.00/sdk/dBrain/ndBrainAgent.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ndBrainAgent: public ndClassAlloc

const ndString& GetName() const;
void SetName(const ndString& name);
void SaveToFile(const char* const filename) const;
void SaveToFile(const char* const filename);

virtual void InitWeights() = 0;
virtual bool IsTrainer() const = 0;
Expand All @@ -46,10 +46,10 @@ class ndBrainAgent: public ndClassAlloc
protected:
virtual void ResetModel() = 0;
virtual bool IsTerminal() const = 0;
virtual ndBrainFloat GetReward() const = 0;
virtual ndBrainFloat CalculateReward() = 0;
virtual ndInt32 GetEpisodeFrames() const = 0;
virtual void Save(ndBrainSave* const loadSave) const = 0;
virtual void ApplyActions(ndBrainFloat* const actions) const = 0;
virtual void Save(ndBrainSave* const loadSave) = 0;
virtual void ApplyActions(ndBrainFloat* const actions)= 0;
virtual void GetObservation(ndBrainFloat* const observation) = 0;

ndString m_name;
Expand Down
8 changes: 4 additions & 4 deletions newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ class ndBrainAgentContinueVPG: public ndBrainAgent
void OptimizeStep();
bool IsTrainer() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndBrainFloat CalculateReward();
ndInt32 GetEpisodeFrames() const;
void Save(ndBrainSave* const loadSave) const;
void Save(ndBrainSave* const loadSave);

void InitWeights();
void InitWeights(ndBrainFloat weighVariance, ndBrainFloat biasVariance);
Expand Down Expand Up @@ -90,7 +90,7 @@ bool ndBrainAgentContinueVPG<statesDim, actionDim>::IsTerminal() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
ndBrainFloat ndBrainAgentContinueVPG<statesDim, actionDim>::GetReward() const
ndBrainFloat ndBrainAgentContinueVPG<statesDim, actionDim>::CalculateReward()
{
ndAssert(0);
return ndBrainFloat(0.0f);
Expand All @@ -110,7 +110,7 @@ ndInt32 ndBrainAgentContinueVPG<statesDim, actionDim>::GetEpisodeFrames() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentContinueVPG<statesDim, actionDim>::Save(ndBrainSave* const) const
void ndBrainAgentContinueVPG<statesDim, actionDim>::Save(ndBrainSave* const)
{
ndAssert(0);
}
Expand Down
Loading

0 comments on commit bd5570e

Please sign in to comment.