285 lines
11 KiB
C#
Raw Normal View History

2019-07-29 16:29:21 +02:00
///-----------------------------------------------------------------
/// Namespace: <Cozmo>
/// Class: <CozmoAgent>
/// Description: <The actual agent in the scene. Collects observations and executes the actions.
/// Also rewards the agent and sets an actionmask.>
/// Author: <Tobias Hassel> Date: <29.07.2019>
/// Notes: <>
///-----------------------------------------------------------------
///
using MLAgents;
using OpenCvSharp;
using System;
using System.Collections.Generic;
2019-04-10 13:39:36 +02:00
using UnityEngine;
2019-07-29 16:29:21 +02:00
namespace Cozmo
2019-04-10 13:39:36 +02:00
{
2019-07-29 16:29:21 +02:00
public class CozmoAgent : Agent
{
2019-07-29 16:29:21 +02:00
// Possible Actions
private const int STOP = 0;
private const int FORWARD = 1;
private const int RIGHT = 2;
private const int LEFT = 3;
// Used to determine different areas in the image (near to the center, far away)
private const float NEAR_AREA_PERCENTAGE_OFFSET = 0.3f;
[Tooltip("The virtual Cozmo camera")]
public Camera renderCamera;
[Tooltip("Reference to the CozmoMovement script")]
public CozmoMovementController movementController;
2019-08-24 16:45:23 +02:00
[Tooltip("The time between decisions at inference")]
2019-07-29 16:29:21 +02:00
public float timeBetweenDecisionsAtInference;
2019-08-24 16:45:23 +02:00
[Tooltip("The amout of actions that should be remembered by the agent.")]
public int maxStoredMovementStates = 1;
2019-08-24 16:45:23 +02:00
[Tooltip("If activated the maxStoredMovementStates has no impact on the training.")]
public bool useOriginalActionMasking = true;
2019-07-29 16:29:21 +02:00
private Academy academy; // CozmoAcademy
private float timeSinceDecision; // time since last decision
private ImageProcessor imageProcessor; // reference to the ImageProcessor
private int nearAreaLimit = 0; // X coordinate limit for the near to the imagecenter area
private int centerOfImageX = 0; // Middle of the image in x direction
private MovementState lastChosenMovement = MovementState.Stop; // The last action/movement that was executed
2019-08-24 16:45:23 +02:00
private Queue<MovementState> lastActions = new Queue<MovementState>(); // Queue to store the last chosen Actions
2019-07-29 16:29:21 +02:00
2019-08-24 16:45:23 +02:00
private double startTime = 0;
2019-07-29 16:29:21 +02:00
private void Start()
2019-06-20 17:32:18 +02:00
{
2019-08-24 16:45:23 +02:00
startTime = Time.time;
2019-07-29 16:29:21 +02:00
academy = FindObjectOfType(typeof(CozmoAcademy)) as CozmoAcademy;
imageProcessor = renderCamera.GetComponent<ImageProcessor>();
nearAreaLimit = (int)(renderCamera.targetTexture.width / 2 * NEAR_AREA_PERCENTAGE_OFFSET);
centerOfImageX = renderCamera.targetTexture.width / 2;
2019-06-20 17:32:18 +02:00
}
2019-07-29 16:29:21 +02:00
public void FixedUpdate()
{
WaitTimeInference();
}
2019-07-29 16:29:21 +02:00
public override void CollectObservations()
{
2019-07-29 16:29:21 +02:00
SetMask();
}
2019-07-29 16:29:21 +02:00
// Set ActionMask for training
2019-08-24 16:45:23 +02:00
// Needs to be called from CollectObservations()
2019-07-29 16:29:21 +02:00
private void SetMask()
{
2019-08-24 16:45:23 +02:00
if (useOriginalActionMasking)
2019-07-29 16:29:21 +02:00
{
2019-08-24 16:45:23 +02:00
// Stop is never allowed
switch (lastChosenMovement)
{
// Do not allow stop decision after a stop
case (MovementState.Stop):
SetActionMask(STOP);
break;
// Do not allow stop after forward
case (MovementState.Forward):
SetActionMask(STOP);
break;
// Do not allow stop & left after right
case (MovementState.Right):
SetActionMask(STOP);
SetActionMask(LEFT);
break;
// Do not allow stop & right after left
case (MovementState.Left):
SetActionMask(STOP);
SetActionMask(RIGHT);
break;
default:
throw new ArgumentException("Invalid MovementState.");
}
}
2019-08-24 16:45:23 +02:00
else
{
2019-08-24 16:45:23 +02:00
// Do not allow stop decision after a stop
if (lastChosenMovement == MovementState.Stop)
{
SetActionMask(STOP);
}
// Do not allow left decision if right was in the last actions
if (lastActions.Contains(MovementState.Right))
{
SetActionMask(LEFT);
}
// Do not allow right decision if left was in the last actions
if (lastActions.Contains(MovementState.Left))
{
SetActionMask(RIGHT);
}
2019-07-29 16:29:21 +02:00
}
}
2019-07-29 16:29:21 +02:00
// to be implemented by the developer
public override void AgentAction(float[] vectorAction, string textAction)
{
double elapsedTime = Time.time - startTime;
//Debug.Log("Elapsed time: " + elapsedTime);
2019-07-29 16:29:21 +02:00
startTime = Time.time;
2019-07-29 16:29:21 +02:00
int action = Mathf.FloorToInt(vectorAction[0]);
Point centerOfGravity = imageProcessor.CenterOfGravity;
2019-07-29 16:29:21 +02:00
AddReward(-0.01f);
2019-05-23 15:28:26 +02:00
2019-07-29 16:29:21 +02:00
switch (action)
{
case STOP:
movementController.currentMovementState = MovementState.Stop;
lastChosenMovement = MovementState.Stop;
SetReward(-0.02f);
2019-07-29 16:29:21 +02:00
break;
case FORWARD:
movementController.currentMovementState = MovementState.Forward;
lastChosenMovement = MovementState.Forward;
SetReward(0.02f);
2019-07-29 16:29:21 +02:00
break;
case RIGHT:
movementController.currentMovementState = MovementState.Right;
lastChosenMovement = MovementState.Right;
SetReward(0.01f);
2019-07-29 16:29:21 +02:00
break;
case LEFT:
movementController.currentMovementState = MovementState.Left;
lastChosenMovement = MovementState.Left;
SetReward(0.01f);
2019-07-29 16:29:21 +02:00
break;
default:
//movement.Move(0);
throw new ArgumentException("Invalid action value. Stop movement.");
}
2019-08-24 16:45:23 +02:00
if (!useOriginalActionMasking)
CollectLastMovementStates(lastChosenMovement);
2019-07-29 16:29:21 +02:00
// Render new image after movement in order to update the centerOfGravity
if (renderCamera != null)
{
renderCamera.Render();
}
2019-07-29 16:29:21 +02:00
RewardAgent();
}
2019-07-29 16:29:21 +02:00
// Set the reward for the agent based on how far away the center of gravity is from the center of the image
private void RewardAgent()
{
2019-07-29 16:29:21 +02:00
float centerOfGravityX = imageProcessor.CenterOfGravity.X;
float reward = 0;
// Center of gravity is far away from the center (left)
if (centerOfGravityX <= centerOfImageX - nearAreaLimit && centerOfGravityX >= 0)
{
float range = centerOfImageX - nearAreaLimit;
reward = -(1 - (centerOfGravityX / range));
2019-08-17 14:45:12 +02:00
// Clamp the reward to max -1 and divide it by 2
2019-07-29 16:29:21 +02:00
reward = Mathf.Clamp(reward, -1, 0) / 2;
}
// Center of gravity is near left of the center
else if ((centerOfGravityX <= centerOfImageX) && (centerOfGravityX >= (centerOfImageX - nearAreaLimit)))
{
float range = centerOfImageX - (centerOfImageX - nearAreaLimit);
float distanceToLeftFarBorder = centerOfGravityX - (centerOfImageX - nearAreaLimit);
reward = (distanceToLeftFarBorder / range);
}
// Center of gravity is far away from the center (right)
else if ((centerOfGravityX >= (centerOfImageX + nearAreaLimit)) && (centerOfGravityX <= renderCamera.targetTexture.width))
{
float range = renderCamera.targetTexture.width - (centerOfImageX + nearAreaLimit);
reward = -(((centerOfGravityX - (centerOfImageX + nearAreaLimit)) / range));
// Clamp the reward to max -1 in order to handle rewards if the center of gravity is outside of the image
reward = Mathf.Clamp(reward, -1, 0) / 2;
}
// Center of gravity is near right of the center
else if ((centerOfGravityX >= centerOfImageX) && (centerOfGravityX <= (centerOfImageX + nearAreaLimit)))
{
float range = (centerOfImageX + nearAreaLimit) - centerOfImageX;
float distanceToCenterOfImage = centerOfGravityX - centerOfImageX;
reward = (1 - distanceToCenterOfImage / range);
}
else
{
SetReward(-1);
AgentReset();
Debug.Log("Out of image range");
}
Debug.Log("Reward: " + reward);
SetReward(reward);
}
// Store the last movementStates in a Queue
private void CollectLastMovementStates(MovementState movementState)
{
// Check if Queue exists and values should be stored
if ((lastActions != null) && (maxStoredMovementStates > 0))
{
// maxStoredMovementStates is reached
if (lastActions.Count >= maxStoredMovementStates)
{
// deque first value(s) when maxStoredMovementStates is reached
for (int i = 0; i <= (lastActions.Count - maxStoredMovementStates); i++)
{
lastActions.Dequeue();
}
}
// add last action to queue
lastActions.Enqueue(movementState);
}
}
2019-07-29 16:29:21 +02:00
// to be implemented by the developer
public override void AgentReset()
{
2019-07-29 16:29:21 +02:00
academy.AcademyReset();
}
2019-07-29 16:29:21 +02:00
private void OnTriggerEnter(Collider other)
{
2019-07-29 16:29:21 +02:00
if (other.transform.CompareTag("Goal"))
{
Done();
}
}
2019-07-29 16:29:21 +02:00
private void WaitTimeInference()
{
2019-07-29 16:29:21 +02:00
if (renderCamera != null)
{
renderCamera.Render();
}
if (!academy.GetIsInference())
{
RequestDecision();
}
else
{
2019-07-29 16:29:21 +02:00
if (timeSinceDecision >= timeBetweenDecisionsAtInference)
{
timeSinceDecision = 0f;
RequestDecision();
}
else
{
timeSinceDecision += Time.fixedDeltaTime;
}
}
}
2019-07-29 16:29:21 +02:00
}
2019-04-10 13:39:36 +02:00
}