load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
load("@rules_cc//cc:cc_library.bzl", "cc_library")
load("//xla:xla.default.bzl", "xla_cc_binary", "xla_cc_test")
load("//xla/tests:build_defs.bzl", "xla_test")
load(
    "//xla/tsl/platform/default:cuda_build_defs.bzl",
    "if_cuda_is_configured",
)

package(
    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
    default_visibility = [
        "//xla/backends/autotuner:__subpackages__",
        "//xla/service/gpu:__subpackages__",
    ],
    licenses = ["notice"],
)

cc_library(
    name = "gpu_codegen_backend",
    hdrs = ["gpu_codegen_backend.h"],
    deps = [
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:gpu_topology",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tools:hlo_decomposer_lib",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_cc_test(
    name = "gpu_codegen_backend_test",
    srcs = ["gpu_codegen_backend_test.cc"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:xla_proto_cc",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "block_level_emitter",
    srcs = ["block_level_emitter.cc"],
    hdrs = ["block_level_emitter.h"],
    tags = ["gpu"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/backends/gpu/codegen/triton:support",
        "//xla/backends/gpu/codegen/triton:tma_utils",
        "//xla/hlo/analysis:symbolic_expr",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/service:compiler",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:instruction_fusion",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/service/gpu/model:fusion_analysis_cache",
        "//xla/service/gpu/model:gpu_indexing_performance_model",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/gpu:tma_metadata",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//mlir:IR",
    ],
)

xla_test(
    name = "block_level_emitter_test",
    srcs = ["block_level_emitter_test.cc"],
    backend_tags = {
        # TODO(b/445172709): Re-enable once fixed.
        "b200": ["broken"],
    },
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "no_mac",
    ],
    use_legacy_runtime = True,
    deps = [
        ":block_level_emitter",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/gpu:tma_metadata",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "cublas",
    srcs = ["cublas.cc"],
    hdrs = ["cublas.h"],
    tags = ["gpu"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:compiler",
        "//xla/service:hlo_cost_analysis",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:cublas_cudnn",
        "//xla/service/gpu:matmul_utils",
        "//xla/service/gpu/autotuning:redzone_buffers",
        "//xla/service/gpu/transforms:dot_algorithm_rewriter",
        "//xla/service/gpu/transforms:gemm_rewriter",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla/tools:hlo_decomposer_lib",
        "//xla/tsl/lib/gtl:iterator_range",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ] + if_cuda([
        "//xla/stream_executor/cuda:repeat_buffer_kernel_cuda",
        "//xla/stream_executor/cuda:cublas_plugin",
    ]),
)

xla_test(
    name = "cublas_test",
    srcs = ["cublas_test.cc"],
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "no_mac",
    ],
    use_legacy_runtime = True,
    deps = [
        ":cublas",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "cublaslt",
    srcs = ["cublaslt.cc"],
    hdrs = ["cublaslt.h"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:cublas_cudnn",
        "//xla/service/gpu:matmul_utils",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_test(
    name = "cublaslt_test",
    srcs = ["cublaslt_test.cc"],
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "no_mac",
    ],
    use_legacy_runtime = True,
    deps = [
        ":cublaslt",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "cudnn",
    srcs = ["cudnn.cc"],
    hdrs = ["cudnn.h"],
    tags = [
        "cuda-only",
        "gpu",
    ],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:algorithm_util",
        "//xla/service:compiler",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:cublas_cudnn",
        "//xla/service/gpu:gpu_conv_runner",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/service/gpu:stream_executor_util",
        "//xla/service/gpu/transforms:cudnn_fusion_compiler",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:engine_options",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_test(
    name = "cudnn_test",
    srcs = ["cudnn_test.cc"],
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "no_mac",
    ],
    use_legacy_runtime = True,
    deps = [
        ":cudnn",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:platform_util",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "custom_kernel",
    srcs = ["custom_kernel.cc"],
    hdrs = ["custom_kernel.h"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/service/gpu/kernels:custom_kernel",
        "//xla/service/gpu/kernels:custom_kernel_fusion",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
    ],
)

xla_test(
    name = "custom_kernel_test",
    srcs = ["custom_kernel_test.cc"],
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "no_mac",
    ],
    use_legacy_runtime = True,
    deps = [
        ":custom_kernel",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:platform_util",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "triton",
    srcs = ["triton.cc"],
    hdrs = ["triton.h"],
    tags = ["gpu"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/transforms/simplifiers:float_normalization",
        "//xla/hlo/utils:hlo_query",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/service:compiler",
        "//xla/service:hlo_cost_analysis",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:gpu_float_support",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/service/gpu:matmul_utils",
        "//xla/service/gpu:split_k_gemm_rewriter",
        "//xla/service/gpu/autotuning:dot_search_space",
        "//xla/service/gpu/autotuning:triton_configs",
        "//xla/service/gpu/transforms:convert_triton_gemm_config",
        "//xla/service/gpu/transforms:fusion_wrapper",
        "//xla/service/gpu/transforms:hoist_fused_bitcasts",
        "//xla/service/gpu/transforms:nest_gemm_fusion",
        "//xla/service/gpu/transforms:priority_fusion",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:status_macros",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@llvm-project//mlir:IR",
    ],
)

xla_test(
    name = "triton_test",
    srcs = ["triton_test.cc"],
    backend_tags = {
        # TODO(b/445172709): Re-enable once fixed.
        "b200": ["broken"],
    },
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",  # rocm support is not tested.
        "no_mac",
    ],
    use_legacy_runtime = True,
    deps = [
        ":triton",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/analysis:symbolic_expr",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
        "@llvm-project//mlir:IR",
    ],
)

cc_library(
    name = "gpu_profiler",
    srcs = ["gpu_profiler.cc"],
    hdrs = ["gpu_profiler.h"],
    deps = [
        "//xla:executable_run_options",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/backends/autotuner:profiler",
        "//xla/backends/gpu/runtime:buffer_comparator",
        "//xla/hlo/ir:hlo",
        "//xla/service:executable",
        "//xla/service:maybe_owning_device_address",
        "//xla/service:shaped_buffer",
        "//xla/service/gpu:gpu_executable_run_options",
        "//xla/service/gpu/autotuning:redzone_buffers",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:casts",
    ],
)

cc_library(
    name = "native_emitter",
    srcs = ["native_emitter.cc"],
    hdrs = ["native_emitter.h"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
    ],
)

xla_test(
    name = "native_emitter_test",
    srcs = ["native_emitter_test.cc"],
    backend_tags = {
        # TODO(b/445172709): Re-enable once fixed.
        "b200": ["broken"],
    },
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "no_mac",
    ],
    use_legacy_runtime = True,
    deps = [
        ":native_emitter",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:platform_util",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:nvptx_compiler",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "factory_cuda",
    srcs = ["factory_cuda.cc"],
    tags = [
        "cuda-only",
        "gpu",
    ],
    deps = [
        ":cublas",
        ":cublaslt",
        ":cudnn",
        ":factory",
        ":fission_backend",
        ":triton",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/service:compiler",
        "//xla/service/gpu/transforms:dot_algorithm_rewriter",
        "//xla/service/gpu/transforms:gemm_rewriter",
        "//xla/service/gpu/transforms:scaled_dot_rewriter",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/platform:platform_object_registry",
    ],
    alwayslink = True,
)

cc_library(
    name = "rocblas",
    srcs = ["rocblas.cc"],
    hdrs = ["rocblas.h"],
    tags = [
        "gpu",
        "rocm-only",
    ],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:cublas_cudnn",
        "//xla/service/gpu:matmul_utils",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "//xla/stream_executor/rocm:rocblas_plugin",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "hipblaslt",
    srcs = ["hipblaslt.cc"],
    hdrs = ["hipblaslt.h"],
    tags = [
        "gpu",
        "rocm-only",
    ],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:cublas_cudnn",
        "//xla/service/gpu:matmul_utils",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "//xla/stream_executor/rocm:amdhipblaslt_plugin",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_test(
    name = "hipblaslt_test",
    srcs = ["hipblaslt_test.cc"],
    backends = [
        "amdgpu_any",
    ],
    tags = [
        "gpu",
        "rocm-only",
    ],
    deps = [
        ":hipblaslt",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service/gpu:amdgpu_compiler_impl",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)

xla_test(
    name = "rocblas_test",
    srcs = ["rocblas_test.cc"],
    backends = [
        "amdgpu_any",
    ],
    tags = [
        "gpu",
        "rocm-only",
    ],
    deps = [
        ":rocblas",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service/gpu:amdgpu_compiler_impl",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "factory_rocm",
    srcs = ["factory_rocm.cc"],
    tags = [
        "gpu",
        "rocm-only",
    ],
    deps = [
        ":factory",
        ":rocblas",
        ":triton",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/service:compiler",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/platform:platform_object_registry",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "@llvm-project//mlir:IR",
    ],
    alwayslink = True,
)

cc_library(
    name = "factory",
    hdrs = ["factory.h"],
    deps = [
        "//xla/backends/autotuner:codegen_backend",
        "//xla/service:compiler",
        "//xla/stream_executor:stream_executor_h",
        "@llvm-project//mlir:IR",
    ],
)

xla_test(
    name = "gpu_profiler_test",
    srcs = ["gpu_profiler_test.cc"],
    backends = ["gpu"],
    tags = ["cuda-only"],
    use_legacy_runtime = True,
    deps = [
        ":gpu_profiler",
        "//xla:executable_run_options",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/backends/autotuner:profiler",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service:shaped_buffer",
        "//xla/service:transfer_manager",
        "//xla/service/gpu:gpu_compiler",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "legacy_cache",
    srcs = ["legacy_cache.cc"],
    hdrs = ["legacy_cache.h"],
    deps = [
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:autotuner_cache_interface",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/autotuning:autotune_cache_key",
        "//xla/service/gpu/autotuning:autotuner_util",
        "//xla/stream_executor:device_description",
        "//xla/tsl/platform:errors",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@com_google_protobuf//:duration_cc_proto",
    ],
)

cc_library(
    name = "fission_backend",
    srcs = ["fission_backend.cc"],
    hdrs = ["fission_backend.h"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/service:compiler",
        "//xla/service:hlo_cost_analysis",
        "//xla/service/gpu/transforms:priority_fusion",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tools:hlo_decomposer_lib",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@llvm-project//mlir:IR",
    ],
)

xla_test(
    name = "fission_backend_test",
    srcs = ["fission_backend_test.cc"],
    backends = ["h100"],
    tags = ["cuda-only"],
    use_legacy_runtime = True,
    deps = [
        ":cublas",
        ":custom_kernel",
        ":fission_backend",
        ":gpu_codegen_backend",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:platform_util",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/service/gpu/transforms:custom_kernel_fusion_rewriter",
        "//xla/service/gpu/transforms:dot_algorithm_rewriter",
        "//xla/service/gpu/transforms:gemm_rewriter",
        "//xla/service/gpu/transforms:scaled_dot_rewriter",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
        "@llvm-project//mlir:IR",
    ],
)

cc_library(
    name = "miopen",
    srcs = ["miopen.cc"],
    hdrs = ["miopen.h"],
    deps = [
        ":gpu_codegen_backend",
        "//xla:autotuning_proto_cc",
        "//xla:comparison_util",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service:hlo_creation_utils",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:cublas_cudnn",
        "//xla/service/gpu:gpu_conv_runner",
        "//xla/service/gpu:stream_executor_util",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:engine_options",
        "//xla/stream_executor:scratch_allocator",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:status_macros",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/types:span",
    ],
)

xla_cc_test(
    name = "legacy_cache_test",
    srcs = ["legacy_cache_test.cc"],
    deps = [
        ":legacy_cache",
        "//xla:autotuning_proto_cc",
        "//xla:literal_util",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:autotuner_cache_interface",
        "//xla/backends/autotuner:autotuner_cache_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "@com_google_absl//absl/status",
        "@com_google_googletest//:gtest_main",
        "@com_google_protobuf//:any_cc_proto",
        "@local_tsl//tsl/platform:path",
    ],
)

xla_cc_binary(
    name = "autotuner_main",
    srcs = ["autotuner_main.cc"],
    tags = [
        "gpu",
    ],
    deps = [
        ":factory",
        ":gpu_profiler",
        ":legacy_cache",
        "//xla:debug_options_flags",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner",
        "//xla/backends/autotuner:autotuner_cache_interface",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/backends/autotuner:file_based_autotuner_cache",
        "//xla/backends/autotuner:profiler",
        "//xla/hlo/analysis:symbolic_expr",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/service:compiler",
        "//xla/service:gpu_plugin",
        "//xla/service:platform_util",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/platform:platform_object_registry",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util:command_line_flags",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:platform_port",
    ] + if_cuda_is_configured([
        ":factory_cuda",
        "//xla/stream_executor/cuda:all_runtime",
    ]) + if_rocm_is_configured([
        ":factory_rocm",
        "//xla/stream_executor/rocm:all_runtime",
    ]),
)

xla_test(
    name = "miopen_test",
    srcs = ["miopen_test.cc"],
    backends = ["gpu"],
    tags = [
        "rocm-only",
    ],
    deps = [
        ":miopen",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:compiler",
        "//xla/service:platform_util",
        "//xla/service/gpu:amdgpu_compiler",
        "//xla/service/gpu:amdgpu_compiler_impl",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
    ],
)
