You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GridAgent.cs 4.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. using System;
  2. using UnityEngine;
  3. using System.Linq;
  4. using MLAgents;
  5. public class GridAgent : Agent
  6. {
  7. [Header("Specific to GridWorld")]
  8. private GridAcademy academy;
  9. public float timeBetweenDecisionsAtInference;
  10. private float timeSinceDecision;
  11. [Tooltip("Because we want an observation right before making a decision, we can force " +
  12. "a camera to render before making a decision. Place the agentCam here if using " +
  13. "RenderTexture as observations.")]
  14. public Camera renderCamera;
  15. [Tooltip("Selecting will turn on action masking. Note that a model trained with action " +
  16. "masking turned on may not behave optimally when action masking is turned off.")]
  17. public bool maskActions = true;
  18. private const int NoAction = 0; // do nothing!
  19. private const int Up = 1;
  20. private const int Down = 2;
  21. private const int Left = 3;
  22. private const int Right = 4;
  23. public override void InitializeAgent()
  24. {
  25. academy = FindObjectOfType(typeof(GridAcademy)) as GridAcademy;
  26. }
  27. public override void CollectObservations()
  28. {
  29. // There are no numeric observations to collect as this environment uses visual
  30. // observations.
  31. // Mask the necessary actions if selected by the user.
  32. if (maskActions)
  33. {
  34. SetMask();
  35. }
  36. }
  37. /// <summary>
  38. /// Applies the mask for the agents action to disallow unnecessary actions.
  39. /// </summary>
  40. private void SetMask()
  41. {
  42. // Prevents the agent from picking an action that would make it collide with a wall
  43. var positionX = (int)transform.position.x;
  44. var positionZ = (int)transform.position.z;
  45. var maxPosition = academy.gridSize - 1;
  46. if (positionX == 0)
  47. {
  48. SetActionMask(Left);
  49. }
  50. if (positionX == maxPosition)
  51. {
  52. SetActionMask(Right);
  53. }
  54. if (positionZ == 0)
  55. {
  56. SetActionMask(Down);
  57. }
  58. if (positionZ == maxPosition)
  59. {
  60. SetActionMask(Up);
  61. }
  62. }
  63. // to be implemented by the developer
  64. public override void AgentAction(float[] vectorAction, string textAction)
  65. {
  66. AddReward(-0.01f);
  67. int action = Mathf.FloorToInt(vectorAction[0]);
  68. Vector3 targetPos = transform.position;
  69. switch (action)
  70. {
  71. case NoAction:
  72. // do nothing
  73. break;
  74. case Right:
  75. targetPos = transform.position + new Vector3(1f, 0, 0f);
  76. break;
  77. case Left:
  78. targetPos = transform.position + new Vector3(-1f, 0, 0f);
  79. break;
  80. case Up:
  81. targetPos = transform.position + new Vector3(0f, 0, 1f);
  82. break;
  83. case Down:
  84. targetPos = transform.position + new Vector3(0f, 0, -1f);
  85. break;
  86. default:
  87. throw new ArgumentException("Invalid action value");
  88. }
  89. Collider[] blockTest = Physics.OverlapBox(targetPos, new Vector3(0.3f, 0.3f, 0.3f));
  90. if (blockTest.Where(col => col.gameObject.CompareTag("wall")).ToArray().Length == 0)
  91. {
  92. transform.position = targetPos;
  93. if (blockTest.Where(col => col.gameObject.CompareTag("goal")).ToArray().Length == 1)
  94. {
  95. SetReward(1f);
  96. }
  97. if (blockTest.Where(col => col.gameObject.CompareTag("pit")).ToArray().Length == 1)
  98. {
  99. SetReward(-1f);
  100. }
  101. }
  102. }
  103. // to be implemented by the developer
  104. public override void AgentReset()
  105. {
  106. academy.AcademyReset();
  107. }
  108. public void FixedUpdate()
  109. {
  110. WaitTimeInference();
  111. }
  112. private void WaitTimeInference()
  113. {
  114. if (renderCamera != null)
  115. {
  116. renderCamera.Render();
  117. }
  118. if (!academy.GetIsInference())
  119. {
  120. RequestDecision();
  121. }
  122. else
  123. {
  124. if (timeSinceDecision >= timeBetweenDecisionsAtInference)
  125. {
  126. timeSinceDecision = 0f;
  127. RequestDecision();
  128. }
  129. else
  130. {
  131. timeSinceDecision += Time.fixedDeltaTime;
  132. }
  133. }
  134. }
  135. }