load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm")
load("//xla:xla.default.bzl", "xla_cc_test")
load("//xla/pjrt/gpu:package_groups.bzl", "xla_gpu_internal_packages")
load("//xla/tests:build_defs.bzl", "xla_test")
load("//xla/tsl:tsl.bzl", "internal_visibility")
load("//xla/tsl/platform:rules_cc.bzl", "cc_library")

# Integrate with PJRT rather than the GPU client directly.
package(
    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
    default_visibility = ["//visibility:private"],
    licenses = ["notice"],
)

xla_gpu_internal_packages()

cc_library(
    name = "tfrt_gpu_client",
    srcs = [
        "tfrt_gpu_async_host_to_device_transfer_manager.cc",
        "tfrt_gpu_buffer.cc",
        "tfrt_gpu_client.cc",
        "tfrt_gpu_device.cc",
        "tfrt_gpu_executable.cc",
        "utils.cc",
    ],
    hdrs = [
        "tfrt_gpu_async_host_to_device_transfer_manager.h",
        "tfrt_gpu_buffer.h",
        "tfrt_gpu_client.h",
        "tfrt_gpu_device.h",
        "tfrt_gpu_executable.h",
        "utils.h",
    ],
    defines = if_cuda(["GOOGLE_CUDA=1"]) + if_rocm(["TENSORFLOW_USE_ROCM=1"]),
    visibility = internal_visibility(["//xla/pjrt/gpu:legacy_gpu_client_users"]),
    deps = [
        ":gpu_event",
        ":thread_checker",
        ":tracked_gpu_device_buffer",
        "//xla:debug_options_flags",
        "//xla:executable_run_options",
        "//xla:future",
        "//xla:literal",
        "//xla:shape_layout",
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/gpu/collectives:gpu_clique_key",
        "//xla/backends/gpu/collectives:gpu_cliques",
        "//xla/backends/gpu/collectives:gpu_collectives",
        "//xla/client:executable_build_options",
        "//xla/client:local_client",
        "//xla/core/collectives",
        "//xla/core/collectives:clique_key",
        "//xla/core/collectives:collectives_registry",
        "//xla/hlo/builder:xla_computation",
        "//xla/hlo/ir:hlo",
        "//xla/mlir_hlo:mhlo_passes",
        "//xla/pjrt:abstract_tracked_device_buffer",
        "//xla/pjrt:async_work_runner",
        "//xla/pjrt:device_event",
        "//xla/pjrt:host_callback",
        "//xla/pjrt:host_memory_allocator",
        "//xla/pjrt:host_memory_spaces",
        "//xla/pjrt:layout_mode",
        "//xla/pjrt:mlir_to_hlo",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_common",
        "//xla/pjrt:pjrt_compiler",
        "//xla/pjrt:pjrt_device_description",
        "//xla/pjrt:pjrt_executable",
        "//xla/pjrt:pjrt_stream_executor_device_description",
        "//xla/pjrt:raw_buffer",
        "//xla/pjrt:scoped_async_tracking_event",
        "//xla/pjrt:semaphore",
        "//xla/pjrt:stream_executor_executable",
        "//xla/pjrt:transpose",
        "//xla/pjrt:utils",
        "//xla/pjrt:worker_thread",
        "//xla/pjrt/distributed:in_memory_key_value_store",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/pjrt/distributed:protocol_proto_cc",
        "//xla/pjrt/distributed:topology_util",
        "//xla/pjrt/dump",
        "//xla/pjrt/gpu:gpu_helpers",
        "//xla/pjrt/gpu:se_gpu_topology_description",
        "//xla/pjrt/plugin/xla_gpu:xla_gpu_allocator_config",
        "//xla/pjrt/plugin/xla_gpu:xla_gpu_client_options",
        "//xla/pjrt/profiling:device_time_measurement",
        "//xla/pjrt/proto:compile_options_proto_cc",
        "//xla/runtime:device_id",
        "//xla/service:buffer_assignment",
        "//xla/service:compiled_module",
        "//xla/service:compiler",
        "//xla/service:computation_placer_hdr",
        "//xla/service:executable",
        "//xla/service:generic_transfer_manager",
        "//xla/service:gpu_topology",
        "//xla/service:gpu_topology_proto_cc",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_proto_cc",
        "//xla/service:maybe_owning_device_address",
        "//xla/service:shaped_buffer",
        "//xla/service:transfer_manager",
        "//xla/service/gpu:gpu_executable_run_options",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:memory_space",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/integrations:tf_allocator_adapter",
        "//xla/tsl/concurrency:async_value",
        "//xla/tsl/concurrency:ref_count",
        "//xla/tsl/framework:allocator",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:logging",
        "//xla/tsl/platform:status",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:btree",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/functional:bind_front",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@com_google_protobuf//:protobuf",
        "@eigen_archive//:eigen3",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:fingerprint",
        "@local_tsl//tsl/platform:platform_port",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:unbounded_work_queue",
        "@local_tsl//tsl/profiler/lib:connected_traceme",
        "@local_tsl//tsl/profiler/lib:context_types_hdrs",
        "@local_tsl//tsl/profiler/lib:traceme",
    ] + if_cuda([
        # keep sorted
        "@local_config_cuda//cuda:cuda_headers",
    ]) + if_rocm([
        # keep sorted
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

xla_test(
    name = "tfrt_gpu_client_test",
    srcs = ["tfrt_gpu_client_test.cc"],
    backends = [
        "nvgpu_any",
    ],
    tags = [
        "multi_gpu",
        "no_oss",
        "noasan",
        "nomsan",
    ],
    deps = [
        ":gpu_event",
        ":tfrt_gpu_client",
        ":thread_checker",
        ":tracked_gpu_device_buffer",
        "@com_google_googletest//:gtest_main",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/cleanup",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@llvm-project//mlir:IR",
        "@com_google_protobuf//:protobuf",
        "//xla:future",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/gpu:ffi",
        "//xla/ffi",
        "//xla/ffi:ffi_api",
        "//xla/hlo/builder:xla_computation",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:test",
        "//xla/pjrt:host_memory_spaces",
        "//xla/pjrt:mlir_to_hlo",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_common",
        "//xla/pjrt:pjrt_compiler",
        "//xla/pjrt:pjrt_executable",
        "//xla/pjrt:raw_buffer",
        "//xla/pjrt/distributed:in_memory_key_value_store",
        "//xla/pjrt/plugin/xla_gpu:xla_gpu_client_options",
        "//xla/pjrt/proto:compile_options_proto_cc",
        "//xla/service:gpu_topology",
        "//xla/service:gpu_topology_proto_cc",
        "//xla/service:platform_util",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tests:literal_test_util",
        "//xla/tsl/concurrency:async_value",
        # copybara:uncomment "//xla/tsl/framework:allocator",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:status",
        "//xla/tsl/platform:statusor",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:platform_port",
        "@local_tsl//tsl/platform:protobuf",
    ],
)

cc_library(
    name = "gpu_event",
    srcs = ["gpu_event.cc"],
    hdrs = ["gpu_event.h"],
    deps = [
        "//xla/tsl/concurrency:async_value",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/types:span",
    ],
)

xla_cc_test(
    name = "gpu_event_test",
    srcs = ["gpu_event_test.cc"],
    deps = [
        ":gpu_event",
        "//xla/tsl/concurrency:async_value",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "tracked_gpu_device_buffer",
    srcs = ["tracked_gpu_device_buffer.cc"],
    hdrs = ["tracked_gpu_device_buffer.h"],
    deps = [
        ":gpu_event",
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:util",
        "//xla/pjrt:pjrt_client",
        "//xla/service:shaped_buffer",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:event",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/concurrency:async_value",
        "//xla/tsl/framework:allocator",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:stacktrace",
    ],
)

cc_library(
    name = "thread_checker",
    srcs = ["thread_checker.cc"],
    hdrs = ["thread_checker.h"],
    deps = ["@com_google_absl//absl/log:check"],
)

xla_cc_test(
    name = "tracked_gpu_device_buffer_test",
    srcs = ["tracked_gpu_device_buffer_test.cc"],
    tags = [
        "gpu",
        "no_oss",
        "noasan",
        "nomsan",
        "requires-gpu-nvidia:2",
    ],
    deps = [
        ":gpu_event",
        ":tracked_gpu_device_buffer",
        "@com_google_googletest//:gtest_main",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/client:client_library",
        "//xla/client:local_client",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_common",
        "//xla/service:gpu_plugin",
        "//xla/service:shaped_buffer",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/tsl/concurrency:async_value",
        # copybara:uncomment "//xla/tsl/framework:allocator",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "tfrt_gpu_buffer_test",
    srcs = ["tfrt_gpu_buffer_test.cc"],
    tags = [
        "gpu",
        "no_oss",
        "noasan",
        "nomsan",
        "requires-gpu-nvidia:2",
    ],
    deps = [
        ":gpu_event",
        ":tfrt_gpu_client",
        ":thread_checker",
        ":tracked_gpu_device_buffer",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt/plugin/xla_gpu:xla_gpu_client_options",
        "//xla/service:gpu_plugin",
        "//xla/tsl/concurrency:async_value",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:casts",
    ],
)
