Change CozmoAgent to give him custom memory size ad other model brain

2019-08-24 16:24:40 +02:00 · 2019-08-24 16:24:40 +02:00 · d96e23e8c2
commit d96e23e8c2
parent 021662632e
5 changed files with 97 additions and 26 deletions
--- a/Assets/Brains/CozmoLearningBrain.asset
+++ b/Assets/Brains/CozmoLearningBrain.asset
@ -23,5 +23,5 @@ MonoBehaviour:
    vectorActionDescriptions:
    - 
    vectorActionSpaceType: 0
-  model: {fileID: 11400000, guid: 24baaa8c605c1a74785880885cf04847, type: 3}
+  model: {fileID: 11400000, guid: 7f8f0f4568060e84bb933886fd122e7e, type: 3}
  inferenceDevice: 0
--- a/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn
+++ b/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn
--- a/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn.meta
+++ b/Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn.meta
@ -0,0 +1,10 @@
+fileFormatVersion: 2
+guid: 7f8f0f4568060e84bb933886fd122e7e
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Brains/CozmoLearningBrain_cozmo_training-0.nn
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
--- a/Assets/Scenes/CozmoTraining.unity
+++ b/Assets/Scenes/CozmoTraining.unity
@ -942,6 +942,7 @@ MonoBehaviour:
    - {fileID: 11400000, guid: 0d9d5ce547064a648b466ad7f208cf3d, type: 2}
    _brainsToControl:
    - {fileID: 0}
+    - {fileID: 11400000, guid: 0d9d5ce547064a648b466ad7f208cf3d, type: 2}
  maxSteps: 1500
  trainingConfiguration:
    width: 80
@ -1631,6 +1632,11 @@ PrefabInstance:
      propertyPath: timeBetweenDecisionsAtInference
      value: 0.04
      objectReference: {fileID: 0}
+    - target: {fileID: 7570006596986120125, guid: 0f97dac5215d69a4795763340d82925d,
+        type: 3}
+      propertyPath: maxStoredMovementStates
+      value: 5
+      objectReference: {fileID: 0}
    - target: {fileID: 891713638, guid: 0f97dac5215d69a4795763340d82925d, type: 3}
      propertyPath: m_TagString
      value: Untagged
@ -1660,6 +1666,16 @@ PrefabInstance:
      propertyPath: m_Enabled
      value: 1
      objectReference: {fileID: 0}
+    - target: {fileID: 7570006596986120128, guid: 0f97dac5215d69a4795763340d82925d,
+        type: 3}
+      propertyPath: m_TurnSpeed
+      value: 35
+      objectReference: {fileID: 0}
+    - target: {fileID: 7570006596986120128, guid: 0f97dac5215d69a4795763340d82925d,
+        type: 3}
+      propertyPath: m_Speed
+      value: 0.0535
+      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: 0f97dac5215d69a4795763340d82925d, type: 3}
 --- !u!1 &7570006596986120123 stripped
--- a/Cozmo/CozmoAgent.cs
+++ b/Cozmo/CozmoAgent.cs
@ -11,6 +11,7 @@
 using MLAgents;
 using OpenCvSharp;
 using System;
+using System.Collections.Generic;
 using UnityEngine;

 namespace Cozmo
@ -31,6 +32,7 @@ namespace Cozmo
        [Tooltip("Reference to the CozmoMovement script")]
        public CozmoMovementController movementController;
        public float timeBetweenDecisionsAtInference;
+        public int maxStoredMovementStates = 1;

        private Academy academy;                        // CozmoAcademy
        private float timeSinceDecision;                // time since last decision
@ -39,6 +41,8 @@ namespace Cozmo
        private int centerOfImageX = 0;                 // Middle of the image in x direction
        private MovementState lastChosenMovement = MovementState.Stop;  // The last action/movement that was executed 

+        private Queue<MovementState> lastActions = new Queue<MovementState>();
+
        private double startTime = Time.time;

        private void Start()
@ -64,29 +68,48 @@ namespace Cozmo
        // Set ActionMask for training
        private void SetMask()
        {
-            switch (lastChosenMovement)
+            // Do not allow stop decision after a stop 
+            if (lastChosenMovement == MovementState.Stop)
            {
-                // Do not allow stop decision after a stop 
-                case (MovementState.Stop):
-                    SetActionMask(STOP);
-                    break;
-                // Do not allow stop after forward
-                case (MovementState.Forward):
-                    SetActionMask(STOP);
-                    break;
-                // Do not allow stop & left after right
-                case (MovementState.Right):
-                    SetActionMask(STOP);
-                    SetActionMask(LEFT);
-                    break;
-                // Do not allow stop & right after left
-                case (MovementState.Left):
-                    SetActionMask(STOP);
-                    SetActionMask(RIGHT);
-                    break;
-                default:
-                    throw new ArgumentException("Invalid MovementState.");
+                SetActionMask(STOP);
            }
+
+            // Do not allow left decision if right was in the last actions
+            if (lastActions.Contains(MovementState.Right))
+            {
+                SetActionMask(LEFT);
+            }
+
+            // Do not allow right decision if left was in the last actions
+            if (lastActions.Contains(MovementState.Left))
+            {
+                SetActionMask(RIGHT);
+            }
+
+            //switch (lastChosenMovement)
+            //{
+            //    // Do not allow stop decision after a stop 
+            //    case (MovementState.Stop):
+            //        SetActionMask(STOP);
+            //        break;
+            //    // Do not allow stop after forward
+            //    case (MovementState.Forward):
+            //        //SetActionMask(STOP);
+            //        break;
+            //    // Do not allow stop & left after right
+            //    case (MovementState.Right):
+            //        //SetActionMask(STOP);
+            //        if (lastActions.Contains(MovementState.))
+            //            SetActionMask(LEFT);
+            //        break;
+            //    // Do not allow stop & right after left
+            //    case (MovementState.Left):
+            //        //SetActionMask(STOP);
+            //        SetActionMask(RIGHT);
+            //        break;
+            //    default:
+            //        throw new ArgumentException("Invalid MovementState.");
+            //}
        }

        // to be implemented by the developer
@ -107,31 +130,33 @@ namespace Cozmo
                    movementController.currentMovementState = MovementState.Stop;
                    lastChosenMovement = MovementState.Stop;
                    //Test
-                    SetReward(-0.1f);
+                    SetReward(-0.02f);
                    break;
                case FORWARD:
                    movementController.currentMovementState = MovementState.Forward;
                    lastChosenMovement = MovementState.Forward;
                    //Test
-                    SetReward(0.01f);
+                    SetReward(0.02f);
                    break;
                case RIGHT:
                    movementController.currentMovementState = MovementState.Right;
                    lastChosenMovement = MovementState.Right;
                    //Test
-                    SetReward(-0.02f);
+                    SetReward(0.01f);
                    break;
                case LEFT:
                    movementController.currentMovementState = MovementState.Left;
                    lastChosenMovement = MovementState.Left;
                    //Test
-                    SetReward(-0.02f);
+                    SetReward(0.01f);
                    break;
                default:
                    //movement.Move(0);
                    throw new ArgumentException("Invalid action value. Stop movement.");
            }

+            CollectLastMovementStates(lastChosenMovement);
+
            // Render new image after movement in order to update the centerOfGravity
            if (renderCamera != null)
            {
@ -188,6 +213,26 @@ namespace Cozmo
            SetReward(reward);
        }

+        // Store the last movementStates in a Queue
+        private void CollectLastMovementStates(MovementState movementState)
+        {
+            // Check if Queue exists and values should be stored
+            if ((lastActions != null) && (maxStoredMovementStates > 0))
+            {
+                // maxStoredMovementStates is reached
+                if (lastActions.Count >= maxStoredMovementStates)
+                {
+                    // deque first value(s) when maxStoredMovementStates is reached
+                    for (int i = 0; i <= (lastActions.Count - maxStoredMovementStates); i++)
+                    {
+                        lastActions.Dequeue();
+                    }
+                }
+                // add last action to queue
+                lastActions.Enqueue(movementState);
+            }
+        }
+
        // to be implemented by the developer
        public override void AgentReset()
        {