diff --git a/Assets/Brains/CozmoLearningBrain.asset b/Assets/Brains/CozmoLearningBrain.asset index a78b7b5..d9861a5 100644 --- a/Assets/Brains/CozmoLearningBrain.asset +++ b/Assets/Brains/CozmoLearningBrain.asset @@ -23,5 +23,5 @@ MonoBehaviour: vectorActionDescriptions: - vectorActionSpaceType: 0 - model: {fileID: 11400000, guid: 24baaa8c605c1a74785880885cf04847, type: 3} + model: {fileID: 11400000, guid: 7f8f0f4568060e84bb933886fd122e7e, type: 3} inferenceDevice: 0 diff --git a/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn b/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn new file mode 100644 index 0000000..0a4c626 Binary files /dev/null and b/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn differ diff --git a/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn.meta b/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn.meta new file mode 100644 index 0000000..6a76c82 --- /dev/null +++ b/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn.meta @@ -0,0 +1,10 @@ +fileFormatVersion: 2 +guid: 7f8f0f4568060e84bb933886fd122e7e +ScriptedImporter: + fileIDToRecycleName: + 11400000: Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: + script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3} diff --git a/Assets/Scenes/CozmoTraining.unity b/Assets/Scenes/CozmoTraining.unity index ef7f3ae..af17fef 100644 --- a/Assets/Scenes/CozmoTraining.unity +++ b/Assets/Scenes/CozmoTraining.unity @@ -942,6 +942,7 @@ MonoBehaviour: - {fileID: 11400000, guid: 0d9d5ce547064a648b466ad7f208cf3d, type: 2} _brainsToControl: - {fileID: 0} + - {fileID: 11400000, guid: 0d9d5ce547064a648b466ad7f208cf3d, type: 2} maxSteps: 1500 trainingConfiguration: width: 80 @@ -1631,6 +1632,11 @@ PrefabInstance: propertyPath: timeBetweenDecisionsAtInference value: 0.04 objectReference: {fileID: 0} + - target: {fileID: 7570006596986120125, guid: 0f97dac5215d69a4795763340d82925d, + type: 3} + propertyPath: maxStoredMovementStates + value: 5 + objectReference: {fileID: 0} - target: {fileID: 891713638, guid: 0f97dac5215d69a4795763340d82925d, type: 3} propertyPath: m_TagString value: Untagged @@ -1660,6 +1666,16 @@ PrefabInstance: propertyPath: m_Enabled value: 1 objectReference: {fileID: 0} + - target: {fileID: 7570006596986120128, guid: 0f97dac5215d69a4795763340d82925d, + type: 3} + propertyPath: m_TurnSpeed + value: 35 + objectReference: {fileID: 0} + - target: {fileID: 7570006596986120128, guid: 0f97dac5215d69a4795763340d82925d, + type: 3} + propertyPath: m_Speed + value: 0.0535 + objectReference: {fileID: 0} m_RemovedComponents: [] m_SourcePrefab: {fileID: 100100000, guid: 0f97dac5215d69a4795763340d82925d, type: 3} --- !u!1 &7570006596986120123 stripped diff --git a/Assets/Scripts/ML Cozmo/CozmoAgent.cs b/Assets/Scripts/ML Cozmo/CozmoAgent.cs index fbbae1a..094a585 100644 --- a/Assets/Scripts/ML Cozmo/CozmoAgent.cs +++ b/Assets/Scripts/ML Cozmo/CozmoAgent.cs @@ -11,6 +11,7 @@ using MLAgents; using OpenCvSharp; using System; +using System.Collections.Generic; using UnityEngine; namespace Cozmo @@ -31,6 +32,7 @@ namespace Cozmo [Tooltip("Reference to the CozmoMovement script")] public CozmoMovementController movementController; public float timeBetweenDecisionsAtInference; + public int maxStoredMovementStates = 1; private Academy academy; // CozmoAcademy private float timeSinceDecision; // time since last decision @@ -39,6 +41,8 @@ namespace Cozmo private int centerOfImageX = 0; // Middle of the image in x direction private MovementState lastChosenMovement = MovementState.Stop; // The last action/movement that was executed + private Queue lastActions = new Queue(); + private double startTime = Time.time; private void Start() @@ -64,29 +68,48 @@ namespace Cozmo // Set ActionMask for training private void SetMask() { - switch (lastChosenMovement) + // Do not allow stop decision after a stop + if (lastChosenMovement == MovementState.Stop) { - // Do not allow stop decision after a stop - case (MovementState.Stop): - SetActionMask(STOP); - break; - // Do not allow stop after forward - case (MovementState.Forward): - SetActionMask(STOP); - break; - // Do not allow stop & left after right - case (MovementState.Right): - SetActionMask(STOP); - SetActionMask(LEFT); - break; - // Do not allow stop & right after left - case (MovementState.Left): - SetActionMask(STOP); - SetActionMask(RIGHT); - break; - default: - throw new ArgumentException("Invalid MovementState."); + SetActionMask(STOP); } + + // Do not allow left decision if right was in the last actions + if (lastActions.Contains(MovementState.Right)) + { + SetActionMask(LEFT); + } + + // Do not allow right decision if left was in the last actions + if (lastActions.Contains(MovementState.Left)) + { + SetActionMask(RIGHT); + } + + //switch (lastChosenMovement) + //{ + // // Do not allow stop decision after a stop + // case (MovementState.Stop): + // SetActionMask(STOP); + // break; + // // Do not allow stop after forward + // case (MovementState.Forward): + // //SetActionMask(STOP); + // break; + // // Do not allow stop & left after right + // case (MovementState.Right): + // //SetActionMask(STOP); + // if (lastActions.Contains(MovementState.)) + // SetActionMask(LEFT); + // break; + // // Do not allow stop & right after left + // case (MovementState.Left): + // //SetActionMask(STOP); + // SetActionMask(RIGHT); + // break; + // default: + // throw new ArgumentException("Invalid MovementState."); + //} } // to be implemented by the developer @@ -107,31 +130,33 @@ namespace Cozmo movementController.currentMovementState = MovementState.Stop; lastChosenMovement = MovementState.Stop; //Test - SetReward(-0.1f); + SetReward(-0.02f); break; case FORWARD: movementController.currentMovementState = MovementState.Forward; lastChosenMovement = MovementState.Forward; //Test - SetReward(0.01f); + SetReward(0.02f); break; case RIGHT: movementController.currentMovementState = MovementState.Right; lastChosenMovement = MovementState.Right; //Test - SetReward(-0.02f); + SetReward(0.01f); break; case LEFT: movementController.currentMovementState = MovementState.Left; lastChosenMovement = MovementState.Left; //Test - SetReward(-0.02f); + SetReward(0.01f); break; default: //movement.Move(0); throw new ArgumentException("Invalid action value. Stop movement."); } + CollectLastMovementStates(lastChosenMovement); + // Render new image after movement in order to update the centerOfGravity if (renderCamera != null) { @@ -188,6 +213,26 @@ namespace Cozmo SetReward(reward); } + // Store the last movementStates in a Queue + private void CollectLastMovementStates(MovementState movementState) + { + // Check if Queue exists and values should be stored + if ((lastActions != null) && (maxStoredMovementStates > 0)) + { + // maxStoredMovementStates is reached + if (lastActions.Count >= maxStoredMovementStates) + { + // deque first value(s) when maxStoredMovementStates is reached + for (int i = 0; i <= (lastActions.Count - maxStoredMovementStates); i++) + { + lastActions.Dequeue(); + } + } + // add last action to queue + lastActions.Enqueue(movementState); + } + } + // to be implemented by the developer public override void AgentReset() {