# Description:
#   Components that implement GPU autotuning.

load(
    "@local_config_rocm//rocm:build_defs.bzl",
    "if_rocm_is_configured",
)
load("@rules_cc//cc:cc_library.bzl", "cc_library")
load("//xla:xla.default.bzl", "xla_cc_test")
load("//xla/tests:build_defs.bzl", "xla_test")
load("//xla/tsl:tsl.default.bzl", "get_compatible_with_portable")
load(
    "//xla/tsl/platform:build_config.bzl",
    "tf_proto_library",
)
load(
    "//xla/tsl/platform/default:cuda_build_defs.bzl",
    "if_cuda_is_configured",
)

package(
    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
    default_visibility = [":friends"],
    licenses = ["notice"],
)

package_group(
    name = "friends",
    includes = [
        "//xla:friends",
    ],
)

cc_library(
    name = "gemm_fusion_autotuner_cuda",
    srcs = [
        "gemm_fusion_autotuner.h",
        "gemm_fusion_autotuner_cuda.cc",
    ],
    tags = [
        "cuda-only",
        "gpu",
    ],
    deps = [
        ":autotuner_compile_util",
        ":autotuner_util",
        ":redzone_buffers",
        ":triton_configs",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/backends/gpu/autotuner:cudnn",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:algorithm_util",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:shaped_buffer",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:hlo_fusion_analysis",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/service/gpu:matmul_utils",
        "//xla/service/gpu:stream_executor_util",
        "//xla/service/gpu/transforms:block_scaling_rewriter",
        "//xla/service/gpu/transforms:cudnn_fusion_compiler",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tsl/platform:env",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@llvm-project//mlir:IR",
        "@local_config_cuda//cuda:cuda_headers",
    ],
)

cc_library(
    name = "gemm_fusion_autotuner_rocm",
    srcs = [
        "gemm_fusion_autotuner.h",
        "gemm_fusion_autotuner_rocm.cc",
    ],
    tags = [
        "gpu",
        "rocm-only",
    ],
    deps = [
        ":autotuner_compile_util",
        ":autotuner_util",
        ":redzone_buffers",
        ":triton_configs",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:shaped_buffer",
        "//xla/service/gpu:matmul_utils",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/rocm:rocblas_plugin",
        "//xla/tsl/platform:env",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@llvm-project//mlir:IR",
        "@local_config_rocm//rocm:rocm_headers",
    ],
)

cc_library(
    name = "gemm_fusion_autotuner",
    srcs = [
        "gemm_fusion_autotuner.cc",
    ],
    hdrs = ["gemm_fusion_autotuner.h"],
    tags = ["gpu"],
    deps = if_cuda_is_configured([":gemm_fusion_autotuner_cuda"]) + if_rocm_is_configured([
        ":gemm_fusion_autotuner_rocm",
    ]) + [
        ":autotune_cache_key",
        ":autotuner_compile_util",
        ":autotuner_pass",
        ":autotuner_status_key",
        ":autotuner_util",
        ":dot_search_space",
        ":redzone_buffers",
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/backends/gpu/autotuner:cublas",
        "//xla/backends/gpu/autotuner:custom_kernel",
        "//xla/backends/gpu/autotuner:fission_backend",
        "//xla/backends/gpu/autotuner:triton",
        "//xla/backends/gpu/codegen/triton:tma_utils",
        "//xla/backends/gpu/runtime:buffer_comparator",
        "//xla/hlo/analysis:symbolic_expr",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/simplifiers:float_normalization",
        "//xla/hlo/utils:hlo_query",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:algorithm_util",
        "//xla/service:call_inliner",
        "//xla/service:compiler",
        "//xla/service:dump",
        "//xla/service:executable",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_graph_dumper",
        "//xla/service:hlo_module_config",
        "//xla/service:matmul_indexing_utils",
        "//xla/service:shaped_buffer",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:gpu_float_support",
        "//xla/service/gpu:hlo_fusion_analysis",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/service/gpu:matmul_utils",
        "//xla/service/gpu:split_k_gemm_rewriter",
        "//xla/service/gpu:stream_executor_util",
        "//xla/service/gpu/kernels:custom_kernel",
        "//xla/service/gpu/kernels:custom_kernel_fusion",
        "//xla/service/gpu/kernels:custom_kernel_fusion_pattern",
        "//xla/service/gpu/transforms:block_scaling_rewriter",
        "//xla/service/gpu/transforms:convert_triton_gemm_config",
        "//xla/service/gpu/transforms:custom_kernel_fusion_rewriter",
        "//xla/service/gpu/transforms:dot_algorithm_rewriter",
        "//xla/service/gpu/transforms:fusion_wrapper",
        "//xla/service/gpu/transforms:gemm_rewriter",
        "//xla/service/gpu/transforms:hoist_fused_bitcasts",
        "//xla/service/gpu/transforms:nest_gemm_fusion",
        "//xla/service/gpu/transforms:priority_fusion",
        "//xla/service/gpu/transforms:scaled_dot_rewriter",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/cuda:ptx_compiler_helpers",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla/stream_executor/gpu:tma_metadata",
        "//xla/stream_executor/integrations:tf_allocator_adapter",
        "//xla/tools:hlo_decomposer_lib",
        "//xla/tsl/lib/core:bits",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:status",
        "//xla/tsl/platform:status_macros",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_utils",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/functional:overload",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@com_google_protobuf//:protobuf",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
    ],
)

xla_test(
    name = "gemm_fusion_autotuner_test",
    timeout = "long",
    srcs = ["gemm_fusion_autotuner_test.cc"],
    backend_tags = {
        # TODO(b/445172709): Re-enable once fixed.
        "b200": ["broken"],
    },
    backends = [
        "a100",
        "h100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "no_mac",
        "pjrt_migration_candidate",
    ],
    use_legacy_runtime = True,
    deps = [
        ":autotune_cache_key",
        ":autotuner_util",
        ":gemm_fusion_autotuner",
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:error_spec",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/gpu/autotuner:gpu_codegen_backend",
        "//xla/hlo/analysis:symbolic_expr",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:pattern_matcher_gmock",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:call_inliner",
        "//xla/service:compiler",
        "//xla/service:dump",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/service:pattern_matcher",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:matmul_utils",
        "//xla/service/gpu/transforms:gemm_fusion",
        "//xla/service/gpu/transforms:gemm_rewriter",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_utils",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "//xla/tools:hlo_decomposer_lib",
        "//xla/tsl/distributed_runtime:call_options",
        "//xla/tsl/distributed_runtime/coordination:coordination_service_agent",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/platform:test",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/time",
        "@com_google_googletest//:gtest",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:platform_port",
    ],
)

cc_library(
    name = "dot_search_space",
    srcs = ["dot_search_space.cc"],
    hdrs = ["dot_search_space.h"],
    tags = ["gpu"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla/backends/gpu/codegen/triton:tma_utils",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/service/gpu:matmul_utils",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/gpu:tma_metadata",
        "//xla/tsl/lib/core:bits",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_protobuf//:protobuf_lite",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:protobuf",
    ],
)

xla_cc_test(
    name = "dot_search_space_test",
    srcs = ["dot_search_space_test.cc"],
    tags = ["gpu"],
    deps = [
        ":dot_search_space",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/service/gpu:matmul_utils",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
    ],
)

xla_test(
    name = "autotuner_pass_test",
    srcs = ["autotuner_pass_test.cc"],
    backends = ["gpu"],
    tags = ["cuda-only"],
    deps = [
        ":autotuner_pass",
        "//xla/backends/autotuner",
        "//xla/backends/autotuner:autotuner_cache_proto_cc",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/backends/gpu/autotuner:cublas",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:platform_util",
        "//xla/service/gpu:backend_configs_cc",
        "//xla/service/gpu:cublas_cudnn",
        "//xla/service/gpu:gpu_compiler",
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:path",
    ],
)

cc_library(
    name = "autotuner_status_key",
    srcs = ["autotuner_status_key.cc"],
    hdrs = ["autotuner_status_key.h"],
    compatible_with = get_compatible_with_portable(),
    deps = ["@com_google_absl//absl/strings"],
)

cc_library(
    name = "autotune_cache_key",
    srcs = ["autotune_cache_key.cc"],
    hdrs = ["autotune_cache_key.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
    ],
)

xla_cc_test(
    name = "autotune_cache_key_test",
    srcs = ["autotune_cache_key_test.cc"],
    data = ["//xla/backends/gpu/target_config:all_gpu_specs"],
    deps = [
        ":autotune_cache_key",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/platform:test",
        "@com_google_absl//absl/hash:hash_testing",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@com_google_protobuf//:protobuf",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
    ],
)

cc_library(
    name = "autotuner_util",
    srcs = ["autotuner_util.cc"],
    hdrs = ["autotuner_util.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":autotune_cache_key",
        ":autotuner_status_key",
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:dump",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:logging",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_protobuf//:protobuf",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:base64",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
    ],
)

# We need a separate target, as runtime executable cannot depend on compilation
# pipeline.
cc_library(
    name = "autotuner_compile_util",
    srcs = ["autotuner_compile_util.cc"],
    hdrs = ["autotuner_compile_util.h"],
    tags = ["gpu"],
    deps = [
        ":autotuner_util",
        "//xla:executable_run_options",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/gpu/autotuner:gpu_codegen_backend",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:maybe_owning_device_address",
        "//xla/service:shaped_buffer",
        "//xla/service/gpu:gpu_executable_run_options",
        "//xla/service/gpu:ir_emission_utils",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:kernel_stats",
        "//xla/stream_executor:stream",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/profiler/lib:traceme",
    ],
)

cc_library(
    name = "redzone_buffers",
    srcs = ["redzone_buffers.cc"],
    hdrs = ["redzone_buffers.h"],
    deps = [
        "//xla:executable_run_options",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_address",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:stream",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/profiler/lib:traceme",
    ],
)

xla_test(
    name = "redzone_buffers_test",
    srcs = ["redzone_buffers_test.cc"],
    backends = ["gpu"],
    tags = ["pjrt_migration_candidate"],
    use_legacy_runtime = True,
    deps = [
        ":redzone_buffers",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:platform_util",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_memory_allocator",
        "//xla/tests:hlo_test_base",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
    ],
)

tf_proto_library(
    name = "gpu_autotuning_proto",
    srcs = ["gpu_autotuning.proto"],
    deps = [
        "//xla:autotuning_proto",
        "//xla:xla_data_proto",
        "//xla/service:hlo_proto",
        "//xla/service/gpu:backend_configs",
    ],
)

xla_cc_test(
    name = "autotuner_util_test",
    srcs = ["autotuner_util_test.cc"],
    data = ["//xla/backends/gpu/target_config:all_gpu_specs"],
    tags = [
        "gpu",
        "pjrt_migration_candidate",
    ],
    deps = [
        ":autotune_cache_key",
        ":autotuner_status_key",
        ":autotuner_util",
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:dump",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor/host:host_platform",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:logging",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/platform:test",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:status_matchers",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@com_google_protobuf//:protobuf",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
    ],
)

cc_library(
    name = "autotuner_pass",
    srcs = [
        "autotuner_pass.cc",
    ],
    hdrs = ["autotuner_pass.h"],
    tags = ["gpu"],
    deps = [
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/backends/autotuner",
        "//xla/backends/autotuner:autotuner_cache_interface",
        "//xla/backends/autotuner:codegen_backend",
        "//xla/backends/autotuner:profiler",
        "//xla/backends/gpu/autotuner:gpu_profiler",
        "//xla/backends/gpu/autotuner:legacy_cache",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:compiler",
        "//xla/stream_executor:device_address_allocator",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "triton_configs",
    srcs = ["triton_configs.cc"],
    hdrs = ["triton_configs.h"],
    deps = [
        "//xla:autotuning_proto_cc",
        "//xla/service/gpu:matmul_utils",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:no_destructor",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf",
    ],
)
