/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.streaming.{FlowAssigned, HasStreamingSourceIdentifyingName, Unassigned, UserProvided}
import org.apache.spark.sql.catalyst.trees.TreePattern.NAMED_STREAMING_RELATION
import org.apache.spark.sql.errors.QueryCompilationErrors

/**
 * Propagates source identifying names from NamedStreamingRelation wrappers to the
 * underlying StreamingRelationV2 nodes during analysis.
 *
 * This rule unwraps NamedStreamingRelation nodes and propagates their sourceIdentifyingName
 * to the underlying streaming relations. The names are used to:
 * 1. Assign stable checkpoint locations (sources/<name> instead of sources/0, sources/1)
 * 2. Enable source evolution (add/remove/reorder sources without losing state)
 * 3. Provide metadata paths during analysis for schema inference
 *
 * This rule also handles resolution of Streaming Tables and Materialized Views that are
 * wrapped in NamedStreamingRelation nodes, by calling resolveSTsAndMVs.
 *
 * Naming strategy:
 * - User-provided names (via `.name()` API) are preserved
 * - Flow-assigned names (from DLT context) are preserved
 * - Unassigned sources remain unassigned and will be auto-numbered later in MicroBatchExecution
 *
 * When `spark.sql.streaming.queryEvolution.enableSourceEvolution` is enabled,
 * all sources must be explicitly named (throws error if any Unassigned sources found).
 */
object NameStreamingSources extends Rule[LogicalPlan] {

  /**
   * Extracts a concise descriptor for a streaming source for use in error messages.
   * Returns the source/provider name for streaming relations, alias for SubqueryAlias,
   * or the node class name as a fallback.
   */
  private def getSourceDescriptor(plan: LogicalPlan): String = plan match {
    case h: HasStreamingSourceIdentifyingName => h.toString  // Returns sourceName for V1/V2
    case SubqueryAlias(alias, _) => alias.toString
    case other => other.nodeName
  }

  /**
   * Validates that all streaming sources are explicitly named when enforcement is enabled.
   * Throws an AnalysisException if any sources are unnamed (Unassigned).
   *
   * TODO: Add autogenerated names for sources in the context of a flow
   */
  private def checkSourceNamingEnforcement(plan: LogicalPlan): Unit = {
    val namedRelations = plan.collect {
      case n: NamedStreamingRelation => n
    }
    val unnamedSourcesInfo = namedRelations.zipWithIndex.flatMap {
      case (n, i) =>
        n.sourceIdentifyingName match {
          case UserProvided(_) | FlowAssigned(_) => None  // Named, skip
          case Unassigned =>
            Some(s"Leaf position $i - ${getSourceDescriptor(n.child)}")  // Unnamed
        }
    }
    if (unnamedSourcesInfo.nonEmpty) {
      throw QueryCompilationErrors.unnamedStreamingSourcesWithEnforcementError(
        unnamedSourcesInfo.mkString("\n"))
    }
  }

  /**
   * Unwraps NamedStreamingRelation nodes and propagates their names to underlying
   * streaming relations.
   */
  private def propagateSourceNames(plan: LogicalPlan): LogicalPlan = {
    // Note: ST/MV resolution is handled by ResolveRelations which runs before this rule,
    // so we don't need to call resolveStreamingSTsAndMVs here.
    plan.resolveOperatorsWithPruning(_.containsPattern(NAMED_STREAMING_RELATION)) {
      // Handle any streaming relation that supports source naming (V1 and V2).
      // The HasStreamingSourceIdentifyingName trait provides a uniform interface
      // for propagating names to both StreamingRelation (sql/core) and
      // StreamingRelationV2 (catalyst).
      case NamedStreamingRelation(
          h: HasStreamingSourceIdentifyingName, sourceIdentifyingName) =>
        h.withSourceIdentifyingName(sourceIdentifyingName)

      // Handle SubqueryAlias wrapping a streaming relation - this happens when resolving
      // streaming tables from catalogs (e.g., Delta tables via .table() API).
      // Propagate the name through the SubqueryAlias to the inner streaming relation.
      case NamedStreamingRelation(
          sa @ SubqueryAlias(_, h: HasStreamingSourceIdentifyingName),
          sourceIdentifyingName) =>
        sa.copy(child = h.withSourceIdentifyingName(sourceIdentifyingName))

      // Fallback for resolved children that don't implement HasStreamingSourceIdentifyingName.
      // This includes:
      // - ReadTVF and other streaming TVFs (SQL interface doesn't support naming yet)
      // - LocalRelation (used in tests with isStreaming=true)
      // Real streaming sources from the DataFrame API (Delta, Kafka, etc.) resolve to
      // StreamingRelation or StreamingRelationV2 which both support naming.
      // Only safe when Unassigned (no name to propagate).
      case NamedStreamingRelation(child, Unassigned) if child.resolved =>
        child
    }
  }

  override def apply(plan: LogicalPlan): LogicalPlan = {
    if (!plan.isStreaming) {
      plan
    } else if (conf.enableStreamingSourceEvolution) {
      // Feature enabled - propagate names to streaming sources.
      checkSourceNamingEnforcement(plan)
      propagateSourceNames(plan)
    } else {
      // Feature disabled - unwrap NamedStreamingRelation nodes without propagating names.
      // Error if any source has an explicitly assigned name since the feature is disabled.
      // Only unwrap when child is resolved - this allows FindDataSourceTable to resolve
      // UnresolvedCatalogRelation to StreamingRelation before we unwrap.
      plan.resolveOperatorsWithPruning(_.containsPattern(NAMED_STREAMING_RELATION)) {
        case NamedStreamingRelation(child, Unassigned) if child.resolved =>
          child
        case NamedStreamingRelation(_, UserProvided(name)) =>
          throw QueryCompilationErrors.streamingSourceNamingNotSupportedError(name)
        case NamedStreamingRelation(_, FlowAssigned(name)) =>
          throw QueryCompilationErrors.streamingSourceNamingNotSupportedError(name)
      }
    }
  }
}
