// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "ray/gcs/gcs_actor_scheduler.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "ray/common/asio/asio_util.h"
#include "ray/common/ray_config.h"
#include "ray/util/time.h"

namespace ray {
namespace gcs {

GcsActorScheduler::GcsActorScheduler(
    instrumented_io_context &io_context,
    GcsActorTable &gcs_actor_table,
    const GcsNodeManager &gcs_node_manager,
    ClusterLeaseManager &cluster_lease_manager,
    GcsActorSchedulerFailureCallback schedule_failure_handler,
    GcsActorSchedulerSuccessCallback schedule_success_handler,
    rpc::RayletClientPool &raylet_client_pool,
    rpc::CoreWorkerClientPool &worker_client_pool,
    ray::observability::MetricInterface &scheduler_placement_time_ms_histogram,
    std::function<void(const NodeID &, const rpc::ResourcesData &)>
        normal_task_resources_changed_callback)
    : io_context_(io_context),
      gcs_actor_table_(gcs_actor_table),
      gcs_node_manager_(gcs_node_manager),
      cluster_lease_manager_(cluster_lease_manager),
      schedule_failure_handler_(std::move(schedule_failure_handler)),
      schedule_success_handler_(std::move(schedule_success_handler)),
      raylet_client_pool_(raylet_client_pool),
      worker_client_pool_(worker_client_pool),
      scheduler_placement_time_ms_histogram_(scheduler_placement_time_ms_histogram),
      normal_task_resources_changed_callback_(
          std::move(normal_task_resources_changed_callback)) {
  RAY_CHECK(schedule_failure_handler_ != nullptr && schedule_success_handler_ != nullptr);
}

void GcsActorScheduler::Schedule(std::shared_ptr<GcsActor> actor) {
  RAY_CHECK(actor->GetNodeID().IsNil() && actor->GetWorkerID().IsNil());

  if (RayConfig::instance().gcs_actor_scheduling_enabled() &&
      !actor->GetLeaseSpecification().GetRequiredResources().IsEmpty()) {
    ScheduleByGcs(actor);
  } else {
    ScheduleByRaylet(actor);
  }
}

void GcsActorScheduler::ScheduleByGcs(std::shared_ptr<GcsActor> actor) {
  auto reply = std::make_shared<rpc::RequestWorkerLeaseReply>();
  auto send_reply_callback = [this, actor, reply](Status status,
                                                  std::function<void()> success,
                                                  std::function<void()> failure) {
    if (reply->canceled()) {
      HandleRequestWorkerLeaseCanceled(
          actor,
          NodeID::Nil(),
          reply->failure_type(),
          /*scheduling_failure_message*/ reply->scheduling_failure_message());
      return;
    }
    const auto &retry_at_raylet_address = reply->retry_at_raylet_address();
    RAY_CHECK(!retry_at_raylet_address.node_id().empty());
    auto node_id = NodeID::FromBinary(retry_at_raylet_address.node_id());
    auto node = gcs_node_manager_.GetAliveNode(node_id);
    RAY_CHECK(node.has_value());

    // Update the address of the actor as it is tied to a node.
    rpc::Address address;
    address.set_node_id(node.value()->node_id());
    actor->UpdateAddress(address);

    RAY_CHECK(node_to_actors_when_leasing_[actor->GetNodeID()]
                  .emplace(actor->GetActorID())
                  .second);

    actor->SetAcquiredResources(ResourceMapToResourceRequest(
        actor->GetLeaseSpecification().GetRequiredResources().GetResourceMap(), false));
    // Lease worker directly from the node.
    actor->SetGrantOrReject(true);
    LeaseWorkerFromNode(actor, node.value());
  };

  // Queue and schedule the actor locally (gcs).
  const auto &owner_node = gcs_node_manager_.GetAliveNode(actor->GetOwnerNodeID());
  RayLease lease(
      actor->GetLeaseSpecification(),
      owner_node.has_value() ? actor->GetOwnerNodeID().Binary() : std::string());
  cluster_lease_manager_.QueueAndScheduleLease(
      std::move(lease),
      /*grant_or_reject=*/false,
      /*is_selected_based_on_locality=*/false,
      {ray::raylet::internal::ReplyCallback(std::move(send_reply_callback),
                                            reply.get())});
}

void GcsActorScheduler::ScheduleByRaylet(std::shared_ptr<GcsActor> actor) {
  // Select a node to where the actor is forwarded.
  auto node_id = SelectForwardingNode(actor);

  auto node = gcs_node_manager_.GetAliveNode(node_id);
  if (!node.has_value()) {
    // There are no available nodes to schedule the actor, so just trigger the failed
    // handler.
    schedule_failure_handler_(std::move(actor),
                              rpc::RequestWorkerLeaseReply::SCHEDULING_FAILED,
                              "No available nodes to schedule the actor");
    return;
  }

  // Update the address of the actor as it is tied to a node.
  rpc::Address address;
  address.set_node_id(node.value()->node_id());
  actor->UpdateAddress(address);

  RAY_CHECK(node_to_actors_when_leasing_[actor->GetNodeID()]
                .emplace(actor->GetActorID())
                .second);

  // Lease worker directly from the node.
  actor->SetGrantOrReject(false);
  LeaseWorkerFromNode(actor, node.value());
}

NodeID GcsActorScheduler::SelectForwardingNode(std::shared_ptr<GcsActor> actor) {
  // Select a node to lease worker for the actor.
  std::shared_ptr<const rpc::GcsNodeInfo> node;

  // If an actor has resource requirements, we will try to schedule it on the same node as
  // the owner if possible.
  const auto &lease_spec = actor->GetLeaseSpecification();
  if (!lease_spec.GetRequiredResources().IsEmpty()) {
    auto maybe_node = gcs_node_manager_.GetAliveNode(actor->GetOwnerNodeID());
    node = maybe_node.has_value() ? maybe_node.value()
                                  : gcs_node_manager_.SelectRandomAliveNode();
  } else {
    node = gcs_node_manager_.SelectRandomAliveNode();
  }

  return node ? NodeID::FromBinary(node->node_id()) : NodeID::Nil();
}

void GcsActorScheduler::Reschedule(std::shared_ptr<GcsActor> actor) {
  if (!actor->GetWorkerID().IsNil()) {
    RAY_LOG(INFO) << "Actor " << actor->GetActorID()
                  << " is already tied to a leased worker. Create actor directly on "
                     "worker. Job id = "
                  << actor->GetActorID().JobId();
    auto leased_worker = std::make_shared<GcsLeasedWorker>(
        actor->GetAddress(),
        VectorFromProtobuf(actor->GetMutableActorTableData()->resource_mapping()),
        actor->GetActorID());
    auto iter_node = node_to_workers_when_creating_.find(actor->GetNodeID());
    if (iter_node != node_to_workers_when_creating_.end()) {
      if (0 == iter_node->second.count(leased_worker->GetWorkerID())) {
        iter_node->second.emplace(leased_worker->GetWorkerID(), leased_worker);
      }
    } else {
      node_to_workers_when_creating_[actor->GetNodeID()].emplace(
          leased_worker->GetWorkerID(), leased_worker);
    }
    CreateActorOnWorker(actor, leased_worker);
  } else {
    Schedule(actor);
  }
}

std::vector<ActorID> GcsActorScheduler::CancelOnNode(const NodeID &node_id) {
  // Remove all the actors from the map associated with this node, and return them as they
  // will be reconstructed later.
  std::vector<ActorID> actor_ids;

  // Remove all actors in phase of leasing.
  {
    auto iter = node_to_actors_when_leasing_.find(node_id);
    if (iter != node_to_actors_when_leasing_.end()) {
      actor_ids.insert(actor_ids.end(), iter->second.begin(), iter->second.end());
      node_to_actors_when_leasing_.erase(iter);
    }
  }

  // Remove all actors in phase of creating.
  {
    auto iter = node_to_workers_when_creating_.find(node_id);
    if (iter != node_to_workers_when_creating_.end()) {
      for (auto &entry : iter->second) {
        actor_ids.emplace_back(entry.second->GetAssignedActorID());
      }
      node_to_workers_when_creating_.erase(iter);
    }
  }

  return actor_ids;
}

void GcsActorScheduler::CancelOnLeasing(const NodeID &node_id,
                                        const ActorID &actor_id,
                                        const LeaseID &lease_id) {
  // NOTE: This method will cancel the outstanding lease request and remove leasing
  // information from the internal state.
  RAY_LOG(DEBUG) << "Canceling worker lease request " << lease_id;
  auto node_it = node_to_actors_when_leasing_.find(node_id);
  RAY_CHECK(node_it != node_to_actors_when_leasing_.end());
  node_it->second.erase(actor_id);
  if (node_it->second.empty()) {
    node_to_actors_when_leasing_.erase(node_it);
  }

  const auto alive_nodes = gcs_node_manager_.GetAllAliveNodes();
  const auto &iter = alive_nodes.find(node_id);
  if (iter != alive_nodes.end()) {
    const auto &node_info = iter->second;
    rpc::Address address;
    address.set_node_id(node_info->node_id());
    address.set_ip_address(node_info->node_manager_address());
    address.set_port(node_info->node_manager_port());
    auto raylet_client = raylet_client_pool_.GetOrConnectByAddress(address);
    raylet_client->CancelWorkerLease(
        lease_id, [](const Status &status, const rpc::CancelWorkerLeaseReply &reply) {});
  }
}

ActorID GcsActorScheduler::CancelOnWorker(const NodeID &node_id,
                                          const WorkerID &worker_id) {
  // Remove the worker from creating map and return ID of the actor associated with the
  // removed worker if exist, else return NilID.
  ActorID assigned_actor_id;
  auto iter = node_to_workers_when_creating_.find(node_id);
  if (iter != node_to_workers_when_creating_.end()) {
    auto actor_iter = iter->second.find(worker_id);
    if (actor_iter != iter->second.end()) {
      assigned_actor_id = actor_iter->second->GetAssignedActorID();
      iter->second.erase(actor_iter);
      if (iter->second.empty()) {
        node_to_workers_when_creating_.erase(iter);
      }
    }
  }
  return assigned_actor_id;
}

void GcsActorScheduler::ReleaseUnusedActorWorkers(
    const absl::flat_hash_map<NodeID, std::vector<WorkerID>> &node_to_workers) {
  // The purpose of this function is to release leased workers that may be leaked.
  // When GCS restarts, it doesn't know which workers it has leased in the previous
  // lifecycle. In this case, GCS will send a list of worker ids that are still needed.
  // And Raylet will release other leased workers.
  // If the node is dead, there is no need to send the request of release unused
  // workers.
  const auto alive_nodes = gcs_node_manager_.GetAllAliveNodes();
  for (const auto &alive_node : alive_nodes) {
    const auto &node_id = alive_node.first;
    nodes_of_releasing_unused_workers_.insert(node_id);

    rpc::Address address;
    address.set_node_id(alive_node.second->node_id());
    address.set_ip_address(alive_node.second->node_manager_address());
    address.set_port(alive_node.second->node_manager_port());
    auto raylet_client = raylet_client_pool_.GetOrConnectByAddress(address);
    auto release_unused_workers_callback =
        [this, node_id](const Status &status,
                        const rpc::ReleaseUnusedActorWorkersReply &reply) {
          nodes_of_releasing_unused_workers_.erase(node_id);
        };
    auto iter = node_to_workers.find(alive_node.first);

    // When GCS restarts, the reply of RequestWorkerLease may not be processed, so some
    // nodes do not have leased workers. In this case, GCS will send an empty list.
    auto workers_in_use =
        iter != node_to_workers.end() ? iter->second : std::vector<WorkerID>{};
    raylet_client->ReleaseUnusedActorWorkers(workers_in_use,
                                             release_unused_workers_callback);
  }
}

void GcsActorScheduler::LeaseWorkerFromNode(
    std::shared_ptr<GcsActor> actor, std::shared_ptr<const rpc::GcsNodeInfo> node) {
  RAY_CHECK(actor && node);

  auto node_id = NodeID::FromBinary(node->node_id());
  RAY_LOG(INFO)
          .WithField(actor->GetActorID())
          .WithField(actor->GetActorID().JobId())
          .WithField(node_id)
      << "Leasing worker for actor.";

  // We need to ensure that the RequestWorkerLease won't be sent before the reply of
  // ReleaseUnusedActorWorkers is returned.
  if (nodes_of_releasing_unused_workers_.contains(node_id)) {
    RetryLeasingWorkerFromNode(actor, node);
    return;
  }

  rpc::Address remote_address;
  remote_address.set_node_id(node->node_id());
  remote_address.set_ip_address(node->node_manager_address());
  remote_address.set_port(node->node_manager_port());
  auto raylet_client = raylet_client_pool_.GetOrConnectByAddress(remote_address);
  // Actor leases should be sent to the raylet immediately, so we should never build up a
  // backlog in GCS.
  // Counter for generating unique lease IDs.
  static uint32_t lease_id_counter = 0;
  actor->GetMutableLeaseSpec()->set_lease_id(
      LeaseID::FromWorker(WorkerID::FromRandom(), lease_id_counter++).Binary());
  raylet_client->RequestWorkerLease(
      actor->GetLeaseSpecification().GetMessage(),
      actor->GetGrantOrReject(),
      [this, actor, node](const Status &status,
                          const rpc::RequestWorkerLeaseReply &reply) {
        HandleWorkerLeaseReply(actor, node, status, reply);
      },
      0);
}

void GcsActorScheduler::RetryLeasingWorkerFromNode(
    std::shared_ptr<GcsActor> actor, std::shared_ptr<const rpc::GcsNodeInfo> node) {
  RAY_UNUSED(execute_after(
      io_context_,
      [this, node, actor] { DoRetryLeasingWorkerFromNode(actor, node); },
      std::chrono::milliseconds(
          RayConfig::instance().gcs_lease_worker_retry_interval_ms())));
}

void GcsActorScheduler::DoRetryLeasingWorkerFromNode(
    std::shared_ptr<GcsActor> actor, std::shared_ptr<const rpc::GcsNodeInfo> node) {
  auto iter = node_to_actors_when_leasing_.find(actor->GetNodeID());
  if (iter != node_to_actors_when_leasing_.end()) {
    // If the node is still available, the actor must be still in the
    // leasing map as it is erased from leasing map only when
    // `CancelOnNode`, `RequestWorkerLeaseReply` or `CancelOnLeasing` is received, so try
    // leasing again.
    if (iter->second.count(actor->GetActorID())) {
      LeaseWorkerFromNode(actor, node);
    }
  }
}

void GcsActorScheduler::HandleWorkerLeaseGrantedReply(
    std::shared_ptr<GcsActor> actor,
    const ray::rpc::RequestWorkerLeaseReply &reply,
    std::shared_ptr<const rpc::GcsNodeInfo> node) {
  const auto &retry_at_raylet_address = reply.retry_at_raylet_address();
  const auto &worker_address = reply.worker_address();
  if (worker_address.node_id().empty()) {
    // The worker did not succeed in the lease, but the specified node returned a new
    // node, and then try again on the new node.
    RAY_CHECK(!retry_at_raylet_address.node_id().empty());
    auto spill_back_node_id = NodeID::FromBinary(retry_at_raylet_address.node_id());
    auto maybe_spill_back_node = gcs_node_manager_.GetAliveNode(spill_back_node_id);
    if (maybe_spill_back_node.has_value()) {
      auto spill_back_node = maybe_spill_back_node.value();
      actor->UpdateAddress(retry_at_raylet_address);
      RAY_CHECK(node_to_actors_when_leasing_[actor->GetNodeID()]
                    .emplace(actor->GetActorID())
                    .second);
      // When receving the lease request, the spillback node only detects whether there
      // are enough resources locally. If not, it rejects the request and we will then go
      // back to the actor's owner's node for scheduling again. This design aims to
      // reducing scheduling latency due to the stale resource view of spillback nodes.
      actor->SetGrantOrReject(true);
      LeaseWorkerFromNode(actor, spill_back_node);
    } else {
      // If the spill back node is dead, we need to schedule again.
      actor->UpdateAddress(rpc::Address());
      actor->GetMutableActorTableData()->clear_resource_mapping();
      Schedule(actor);
    }
  } else {
    // The worker is leased successfully from the specified node.
    std::vector<rpc::ResourceMapEntry> resources;
    for (auto &resource : reply.resource_mapping()) {
      resources.emplace_back(resource);
      actor->GetMutableActorTableData()->add_resource_mapping()->CopyFrom(resource);
    }
    auto leased_worker = std::make_shared<GcsLeasedWorker>(
        worker_address, std::move(resources), actor->GetActorID());
    auto node_id = leased_worker->GetNodeID();
    RAY_CHECK(node_to_workers_when_creating_[node_id]
                  .emplace(leased_worker->GetWorkerID(), leased_worker)
                  .second);
    rpc::Address actor_local_raylet_address;
    actor_local_raylet_address.set_node_id(node->node_id());
    actor_local_raylet_address.set_ip_address(node->node_manager_address());
    actor_local_raylet_address.set_port(node->node_manager_port());
    actor->UpdateLocalRayletAddress(actor_local_raylet_address);
    actor->UpdateAddress(leased_worker->GetAddress());
    actor->GetMutableActorTableData()->set_pid(reply.worker_pid());
    actor->GetMutableTaskSpec()->set_lease_grant_timestamp_ms(current_sys_time_ms());
    actor->GetCreationTaskSpecification().EmitTaskMetrics(
        scheduler_placement_time_ms_histogram_);
    // Make sure to connect to the client before persisting actor info to GCS.
    // Without this, there could be a possible race condition. Related issues:
    // https://github.com/ray-project/ray/pull/9215/files#r449469320
    worker_client_pool_.GetOrConnect(leased_worker->GetAddress());
    gcs_actor_table_.Put(actor->GetActorID(),
                         actor->GetActorTableData(),
                         {[this, actor, leased_worker](Status status) {
                            RAY_CHECK_OK(status);
                            if (actor->GetState() == rpc::ActorTableData::DEAD) {
                              // Actor has already been killed.
                              return;
                            }
                            CreateActorOnWorker(actor, leased_worker);
                          },
                          io_context_});
  }
}

void GcsActorScheduler::HandleRequestWorkerLeaseCanceled(
    std::shared_ptr<GcsActor> actor,
    const NodeID &node_id,
    rpc::RequestWorkerLeaseReply::SchedulingFailureType failure_type,
    const std::string &scheduling_failure_message) {
  RAY_LOG(INFO)
          .WithField(actor->GetActorID())
          .WithField(actor->GetActorID().JobId())
          .WithField(node_id)
      << "Lease request was canceled: "
      << rpc::RequestWorkerLeaseReply::SchedulingFailureType_Name(failure_type);

  schedule_failure_handler_(actor, failure_type, scheduling_failure_message);
}

void GcsActorScheduler::CreateActorOnWorker(std::shared_ptr<GcsActor> actor,
                                            std::shared_ptr<GcsLeasedWorker> worker) {
  RAY_CHECK(actor && worker);
  RAY_LOG(INFO)
          .WithField(actor->GetActorID())
          .WithField(worker->GetWorkerID())
          .WithField(actor->GetNodeID())
          .WithField(actor->GetActorID().JobId())
      << "Submitting actor creation task to worker.";

  auto request = std::make_unique<rpc::PushTaskRequest>();
  request->set_intended_worker_id(worker->GetWorkerID().Binary());
  request->mutable_task_spec()->CopyFrom(
      actor->GetCreationTaskSpecification().GetMessage());
  google::protobuf::RepeatedPtrField<rpc::ResourceMapEntry> resources;
  for (auto resource : worker->GetLeasedResources()) {
    resources.Add(std::move(resource));
  }
  request->mutable_resource_mapping()->CopyFrom(resources);

  auto client = worker_client_pool_.GetOrConnect(worker->GetAddress());
  client->PushNormalTask(
      std::move(request),
      [this, actor, worker](Status status, const rpc::PushTaskReply &reply) {
        // If the actor is still in the creating map and the status is ok, remove the
        // actor from the creating map and invoke the schedule_success_handler_.
        // Otherwise, create again, because it may be a network exception.
        // If the actor is not in the creating map, it means that the actor has been
        // cancelled as the worker or node is dead, just do nothing in this case because
        // the gcs_actor_manager will reconstruct it again.
        auto iter = node_to_workers_when_creating_.find(actor->GetNodeID());
        if (iter != node_to_workers_when_creating_.end()) {
          auto worker_iter = iter->second.find(actor->GetWorkerID());
          if (worker_iter != iter->second.end()) {
            RAY_LOG(DEBUG) << "Worker " << worker_iter->first << " is in creating map.";
            // The worker is still in the creating map.
            if (status.ok()) {
              // Remove related worker in phase of creating.
              iter->second.erase(worker_iter);
              if (iter->second.empty()) {
                node_to_workers_when_creating_.erase(iter);
              }
              RAY_LOG(INFO)
                      .WithField(actor->GetActorID())
                      .WithField(worker->GetWorkerID())
                      .WithField(actor->GetActorID().JobId())
                      .WithField(actor->GetNodeID())
                  << "Actor creation task succeeded.";
              schedule_success_handler_(actor, reply);
            } else {
              RAY_LOG(INFO)
                      .WithField(actor->GetActorID())
                      .WithField(worker->GetWorkerID())
                      .WithField(actor->GetActorID().JobId())
                      .WithField(actor->GetNodeID())
                  << "Actor creation task failed, will be retried.";
              RetryCreatingActorOnWorker(actor, worker);
            }
          }
        } else {
          RAY_LOG(DEBUG) << "Actor " << actor->GetActorID()
                         << " has been removed from creating map. Actor status "
                         << actor->GetState();
          auto actor_id = status.ok() ? actor->GetActorID() : ActorID::Nil();
          if (actor->LocalRayletAddress().has_value()) {
            KillLeasedWorkerForActor(
                actor->LocalRayletAddress().value(), worker->GetAddress(), actor_id);
          }
        }
      });
}

void GcsActorScheduler::RetryCreatingActorOnWorker(
    std::shared_ptr<GcsActor> actor, std::shared_ptr<GcsLeasedWorker> worker) {
  RAY_LOG(DEBUG) << "Retry creating actor " << actor->GetActorID() << " on worker "
                 << worker->GetWorkerID();
  RAY_UNUSED(execute_after(
      io_context_,
      [this, actor, worker] { DoRetryCreatingActorOnWorker(actor, worker); },
      std::chrono::milliseconds(
          RayConfig::instance().gcs_create_actor_retry_interval_ms())));
}

void GcsActorScheduler::DoRetryCreatingActorOnWorker(
    std::shared_ptr<GcsActor> actor, std::shared_ptr<GcsLeasedWorker> worker) {
  auto iter = node_to_workers_when_creating_.find(actor->GetNodeID());
  if (iter != node_to_workers_when_creating_.end()) {
    auto worker_iter = iter->second.find(actor->GetWorkerID());
    if (worker_iter != iter->second.end()) {
      // The worker is still in the creating map, try create again.
      // The worker is erased from creating map only when `CancelOnNode`
      // or `CancelOnWorker` or the actor is created successfully.
      CreateActorOnWorker(actor, worker);
    }
  }
}

bool GcsActorScheduler::KillLeasedWorkerForActor(const rpc::Address &raylet_address,
                                                 const rpc::Address &worker_address,
                                                 ActorID actor_id) {
  if (raylet_address.node_id().empty() || worker_address.node_id().empty()) {
    RAY_LOG(DEBUG) << "Invalid raylet or worker address, skip the killing of actor "
                   << actor_id;
    return false;
  }

  auto raylet_client = raylet_client_pool_.GetOrConnectByAddress(raylet_address);
  rpc::KillLocalActorRequest request;
  // death_cause is not set because the actor was already killed and we are just cleaning
  // up the worker leased to the actor.
  request.set_intended_actor_id(actor_id.Binary());
  request.set_worker_id(worker_address.worker_id());
  request.set_force_kill(true);

  raylet_client->KillLocalActor(
      request, [actor_id](const Status &status, const rpc::KillLocalActorReply &) {
        if (!status.ok()) {
          RAY_LOG(ERROR) << "Failed to kill actor " << actor_id
                         << ", return status: " << status.ToString();
        } else {
          RAY_LOG(INFO) << "Killed actor " << actor_id << " successfully.";
        }
      });
  return true;
}

std::string GcsActorScheduler::DebugString() const {
  std::ostringstream stream;
  stream << "GcsActorScheduler: "
         << "\n- node_to_actors_when_leasing_: " << node_to_actors_when_leasing_.size()
         << "\n- node_to_workers_when_creating_: "
         << node_to_workers_when_creating_.size()
         << "\n- nodes_of_releasing_unused_workers_: "
         << nodes_of_releasing_unused_workers_.size();
  return stream.str();
}

void GcsActorScheduler::HandleWorkerLeaseReply(
    std::shared_ptr<GcsActor> actor,
    std::shared_ptr<const rpc::GcsNodeInfo> node,
    const Status &status,
    const rpc::RequestWorkerLeaseReply &reply) {
  // If the actor is still in the leasing map and the status is ok, remove the actor
  // from the leasing map and handle the reply. Otherwise, lease again, because it
  // may be a network exception.
  // If the actor is not in the leasing map, it means that the actor has been
  // cancelled as the node is dead, just do nothing in this case because the
  // gcs_actor_manager will reconstruct it again.
  auto node_id = NodeID::FromBinary(node->node_id());
  auto iter = node_to_actors_when_leasing_.find(node_id);
  if (iter != node_to_actors_when_leasing_.end()) {
    auto actor_iter = iter->second.find(actor->GetActorID());
    if (actor_iter == iter->second.end()) {
      // if actor is not in leasing state, it means it is cancelled.
      RAY_LOG(INFO).WithField(actor->GetActorID()).WithField(actor->GetActorID().JobId())
          << "Ignoring granted lease for canceled lease request.";
      if (actor->GetState() == rpc::ActorTableData::DEAD) {
        // If the actor has been killed, we need to kill the worker too
        // otherwise, the worker will be leaked.
        RAY_LOG(DEBUG) << "Actor " << actor->GetActorID() << " is dead, kill the worker.";
        auto raylet_address = rpc::RayletClientPool::GenerateRayletAddress(
            node_id, node->node_manager_address(), node->node_manager_port());
        KillLeasedWorkerForActor(raylet_address, reply.worker_address(), ActorID::Nil());
      }
      return;
    }

    if (status.ok()) {
      if (reply.canceled()) {
        HandleRequestWorkerLeaseCanceled(
            actor,
            node_id,
            reply.failure_type(),
            /*scheduling_failure_message*/ reply.scheduling_failure_message());
        return;
      }

      if (reply.worker_address().node_id().empty() &&
          reply.retry_at_raylet_address().node_id().empty() && !reply.rejected()) {
        // Actor creation task has been cancelled. It is triggered by `ray.kill`. If
        // the number of remaining restarts of the actor is not equal to 0, GCS will
        // reschedule the actor, so it return directly here.
        RAY_LOG(DEBUG) << "Actor " << actor->GetActorID()
                       << " creation task has been cancelled.";
        return;
      }

      // Remove the actor from the leasing map as the reply is returned from the
      // remote node.
      iter->second.erase(actor_iter);
      if (iter->second.empty()) {
        node_to_actors_when_leasing_.erase(iter);
      }

      if (reply.rejected()) {
        RAY_LOG(INFO) << "Failed to lease worker from node " << node_id << " for actor "
                      << actor->GetActorID()
                      << " as the resources are not enough, job id = "
                      << actor->GetActorID().JobId();
        HandleWorkerLeaseRejectedReply(actor, reply);
      } else {
        RAY_LOG(INFO) << "Finished leasing worker from " << node_id << " for actor "
                      << actor->GetActorID()
                      << ", job id = " << actor->GetActorID().JobId();
        HandleWorkerLeaseGrantedReply(actor, reply, node);
      }
    } else {
      RetryLeasingWorkerFromNode(actor, node);
    }
  } else if (actor->GetState() == rpc::ActorTableData::DEAD) {
    // If the actor has been killed, we need to kill the worker too
    // otherwise, the worker will be leaked.
    RAY_LOG(DEBUG) << "Actor " << actor->GetActorID() << " is dead, kill the worker.";
    auto raylet_address = rpc::RayletClientPool::GenerateRayletAddress(
        node_id, node->node_manager_address(), node->node_manager_port());
    KillLeasedWorkerForActor(raylet_address, reply.worker_address(), ActorID::Nil());
  }
}

void GcsActorScheduler::HandleWorkerLeaseRejectedReply(
    std::shared_ptr<GcsActor> actor, const rpc::RequestWorkerLeaseReply &reply) {
  // The request was rejected because of insufficient resources.
  if (!actor->GetAcquiredResources().IsEmpty()) {
    // Return the actor's acquired resources, which updates GCS' resource view.
    ReturnActorAcquiredResources(actor);
  }
  if (normal_task_resources_changed_callback_ &&
      RayConfig::instance().gcs_actor_scheduling_enabled()) {
    normal_task_resources_changed_callback_(actor->GetNodeID(), reply.resources_data());
  }
  actor->UpdateAddress(rpc::Address());
  Reschedule(actor);
}

void GcsActorScheduler::OnActorDestruction(std::shared_ptr<GcsActor> actor) {
  if (!actor->GetAcquiredResources().IsEmpty()) {
    ReturnActorAcquiredResources(actor);
    cluster_lease_manager_.ScheduleAndGrantLeases();
  }
}

void GcsActorScheduler::ReturnActorAcquiredResources(std::shared_ptr<GcsActor> actor) {
  auto &cluster_resource_manager =
      cluster_lease_manager_.GetClusterResourceScheduler().GetClusterResourceManager();
  cluster_resource_manager.AddNodeAvailableResources(
      scheduling::NodeID(actor->GetNodeID().Binary()),
      actor->GetAcquiredResources().GetResourceSet());
  actor->SetAcquiredResources(ResourceRequest());
}

size_t GcsActorScheduler::GetPendingActorsCount() const {
  return cluster_lease_manager_.GetInfeasibleQueueSize() +
         cluster_lease_manager_.GetPendingQueueSize();
}

bool GcsActorScheduler::CancelInFlightActorScheduling(
    const std::shared_ptr<GcsActor> &actor) {
  return cluster_lease_manager_.CancelLease(actor->GetLeaseSpecification().LeaseId());
}

}  // namespace gcs
}  // namespace ray
