// Copyright © 2026 Pathway

// `PyRef`s need to be passed by value
#![allow(clippy::needless_pass_by_value)]

use crate::async_runtime::create_async_tokio_runtime;
use crate::engine::graph::{
    ErrorLogHandle, ExportedTable, JoinExactlyOnce, OperatorProperties, SubscribeCallbacks,
    SubscribeCallbacksBuilder, SubscribeConfig,
};
use crate::engine::license::{Error as LicenseError, License};
use crate::engine::{
    Computer as EngineComputer, Expressions, PyObjectWrapper as InternalPyObjectWrapper,
    ShardPolicy, TotalFrontier,
};
use crate::persistence::frontier::OffsetAntichain;

use async_nats::connect as nats_connect;
use async_nats::jetstream;
use async_nats::Client as NatsClient;
use async_nats::Subscriber as NatsSubscriber;
use aws_sdk_dynamodb::Client as DynamoDBClient;
use aws_sdk_kinesis::Client as KinesisClient;
use azure_storage::StorageCredentials as AzureStorageCredentials;
use csv::ReaderBuilder as CsvReaderBuilder;
use deltalake::arrow::datatypes::TimeUnit as ArrowTimeUnit;
use elasticsearch::{
    auth::Credentials as ESCredentials,
    http::{
        transport::{SingleNodeConnectionPool, TransportBuilder},
        Url,
    },
    Elasticsearch,
};
use futures::future;
use futures::stream::FuturesOrdered;
use futures::StreamExt;
use iceberg::{Catalog as IcebergCatalog, CatalogBuilder};
use itertools::Itertools;
use log::{info, warn};
use mongodb::sync::Client as MongoClient;
use mysql::Pool as MysqlConnectionPool;
use ndarray;
use numpy::{PyArray, PyReadonlyArrayDyn};
use once_cell::sync::Lazy;
use postgres::{Client, NoTls};
use pyo3::exceptions::{
    PyBaseException, PyException, PyIOError, PyIndexError, PyKeyError, PyNotImplementedError,
    PyRuntimeError, PyTypeError, PyValueError, PyZeroDivisionError,
};
use pyo3::pyclass::CompareOp;
use pyo3::sync::{GILOnceCell, GILProtected};
use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyString, PyTuple, PyType};
use pyo3::{intern, PyTypeInfo};
use pyo3::{prelude::*, IntoPyObjectExt};
use pyo3_log::ResetHandle;
use questdb::ingress::Sender as QuestDBSender;
use rdkafka::consumer::{BaseConsumer, Consumer};
use rdkafka::producer::{DefaultProducerContext, ThreadedProducer};
use rdkafka::{ClientConfig, Offset as KafkaOffset, TopicPartitionList};
use rumqttc::{
    mqttbytes::QoS as MqttQoS, Client as MqttClient, Event as MqttEvent, MqttOptions,
    Packet as MqttPacket,
};
use rusqlite::Connection as SqliteConnection;
use rusqlite::OpenFlags as SqliteOpenFlags;
use s3::bucket::Bucket as S3Bucket;
use schema_registry_converter::blocking::json::JsonDecoder as RegistryJsonDecoder;
use schema_registry_converter::blocking::json::JsonEncoder as RegistryJsonEncoder;
use schema_registry_converter::blocking::schema_registry::SrSettings as SchemaRegistrySettings;
use scopeguard::defer;
use send_wrapper::SendWrapper;
use serde_json::Value as JsonValue;
use std::borrow::Borrow;
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::ffi::CString;
use std::fs::File;
use std::future::Future;
use std::io::{BufWriter, Read};
use std::mem::take;
use std::os::unix::prelude::*;
use std::sync::{Arc, Mutex};
use std::thread;
use std::time;
use tokio::runtime::Runtime as TokioRuntime;

use self::external_index_wrappers::{
    PyBruteForceKnnMetricKind, PyExternalIndexData, PyExternalIndexQuery, PyUSearchMetricKind,
};
use self::threads::PythonThreadState;

use crate::connectors::aws::{DynamoDBWriter, KinesisReader, KinesisWriter};
use crate::connectors::data_format::{
    BsonFormatter, DebeziumDBType, DebeziumMessageParser, DsvSettings, FieldSource, Formatter,
    IdentityFormatter, IdentityParser, InnerSchemaField, JsonLinesFormatter, JsonLinesParser,
    KeyGenerationPolicy, NullFormatter, Parser, RegistryEncoderWrapper, SingleColumnFormatter,
    TransparentParser, METADATA_FIELD_NAME,
};
use crate::connectors::data_lake::arrow::construct_schema as construct_arrow_schema;
use crate::connectors::data_lake::buffering::{
    AppendOnlyColumnBuffer, ColumnBuffer, SnapshotColumnBuffer,
};
use crate::connectors::data_lake::delta::DeltaOptimizerRule;
use crate::connectors::data_lake::iceberg::{IcebergBatchWriter, IcebergTableParams};
use crate::connectors::data_lake::{DeltaBatchWriter, MaintenanceMode};
use crate::connectors::data_storage::{
    ConnectorMode, DeltaTableReader, ElasticSearchWriter, FileWriter, IcebergReader, KafkaReader,
    KafkaWriter, LakeWriter, MessageQueueTopic, MongoWriter, MqttReader, MqttWriter, MysqlWriter,
    NatsReader, NatsWriter, NullWriter, ObjectDownloader, PsqlWriter, PythonConnectorEventType,
    PythonReaderBuilder, QuestDBAtColumnPolicy, QuestDBWriter, RdkafkaWatermark, ReadError,
    ReadMethod, ReaderBuilder, SqliteReader, TableWriterInitMode, WriteError, Writer,
    MQTT_CLIENT_MAX_CHANNEL_SIZE,
};
use crate::connectors::data_tokenize::{BufReaderTokenizer, CsvTokenizer, Tokenize};
use crate::connectors::nats;
use crate::connectors::posix_like::PosixLikeReader;
use crate::connectors::scanner::{FilesystemScanner, S3Scanner};
use crate::connectors::synchronization::ConnectorGroupDescriptor;
use crate::connectors::{PersistenceMode, SessionType, SnapshotAccess};
use crate::engine::dataflow::Config;
use crate::engine::error::{DataError, DynError, DynResult, Trace as EngineTrace};
use crate::engine::graph::ScopedContext;
use crate::engine::progress_reporter::MonitoringLevel;
use crate::engine::reduce::StatefulCombineFn;
use crate::engine::time::DateTime;
use crate::engine::Config as EngineTelemetryConfig;
use crate::engine::Timestamp;
use crate::engine::{
    run_with_new_dataflow_graph, BatchWrapper, ColumnHandle, ColumnPath,
    ColumnProperties as EngineColumnProperties, DataRow, DateTimeNaive, DateTimeUtc, Duration,
    ExpressionData, IxKeyPolicy, JoinData, JoinType, Key, KeyImpl, PointerExpression, Reducer,
    ReducerData, ScopedGraph, TableHandle, TableProperties as EngineTableProperties, Type,
    UniverseHandle, Value,
};
use crate::engine::{AnyExpression, Context as EngineContext};
use crate::engine::{BoolExpression, Error as EngineError};
use crate::engine::{ComplexColumn as EngineComplexColumn, WakeupReceiver};
use crate::engine::{DateTimeNaiveExpression, DateTimeUtcExpression, DurationExpression};
use crate::engine::{Expression, IntExpression};
use crate::engine::{FloatExpression, Graph};
use crate::engine::{LegacyTable as EngineLegacyTable, StringExpression};
use crate::persistence::config::{
    ConnectorWorkerPair, PersistenceManagerOuterConfig, PersistentStorageConfig,
};
use crate::persistence::input_snapshot::Event as SnapshotEvent;
use crate::persistence::{IntoPersistentId, UniqueName};
use crate::pipe::{pipe, ReaderType, WriterType};
use crate::python_api::external_index_wrappers::PyExternalIndexFactory;
use crate::timestamp::current_unix_timestamp_ms;

use s3::creds::Credentials as AwsCredentials;

mod external_index_wrappers;
mod logging;
pub mod threads;

static CONVERT: GILOnceCell<Py<PyModule>> = GILOnceCell::new();

fn get_convert_python_module(py: Python<'_>) -> &Bound<'_, PyModule> {
    CONVERT
        .get_or_init(py, || {
            PyModule::import(py, "pathway.internals.utils.convert")
                .unwrap()
                .unbind()
        })
        .bind(py)
}

#[allow(unused)] // XXX
macro_rules! pytodo {
    () => {
        return Err(PyNotImplementedError::new_err(()));
    };
    ($($arg:tt)+) => {
        return Err(PyNotImplementedError::new_err(format!($($arg)+)));
    };
}

impl<'py> FromPyObject<'py> for Key {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<Pointer>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for Key {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        Pointer(self).into_bound_py_any(py)
    }
}

fn value_from_python_datetime(ob: &Bound<PyAny>) -> PyResult<Value> {
    let py = ob.py();
    let (timestamp_ns, is_tz_aware) = get_convert_python_module(py)
        .call_method1(intern!(py, "_datetime_to_rust"), (ob,))?
        .extract::<(i64, bool)>()?;
    if is_tz_aware {
        Ok(Value::DateTimeUtc(DateTimeUtc::new(timestamp_ns)))
    } else {
        Ok(Value::DateTimeNaive(DateTimeNaive::new(timestamp_ns)))
    }
}

fn value_from_python_timedelta(ob: &Bound<PyAny>) -> PyResult<Value> {
    let py = ob.py();
    let duration_ns = get_convert_python_module(py)
        .call_method1(intern!(py, "_timedelta_to_rust"), (ob,))?
        .extract::<i64>()?;
    Ok(Value::Duration(Duration::new(duration_ns)))
}

fn value_from_pandas_timestamp(ob: &Bound<PyAny>) -> PyResult<Value> {
    let py = ob.py();
    let (timestamp, is_tz_aware) = get_convert_python_module(py)
        .call_method1(intern!(py, "_pd_timestamp_to_rust"), (ob,))?
        .extract::<(i64, bool)>()?;
    if is_tz_aware {
        Ok(Value::DateTimeUtc(DateTimeUtc::new(timestamp)))
    } else {
        Ok(Value::DateTimeNaive(DateTimeNaive::new(timestamp)))
    }
}

fn value_from_pandas_timedelta(ob: &Bound<PyAny>) -> PyResult<Value> {
    let py = ob.py();
    let duration = get_convert_python_module(py)
        .call_method1(intern!(py, "_pd_timedelta_to_rust"), (ob,))?
        .extract::<i64>()?;
    Ok(Value::Duration(Duration::new(duration)))
}

fn value_json_from_py_any(ob: &Bound<PyAny>) -> PyResult<Value> {
    let py = ob.py();
    let json_str = get_convert_python_module(py).call_method1(intern!(py, "_json_dumps"), (ob,))?;
    let json_str = json_str.downcast::<PyString>()?.to_str()?;
    let json: JsonValue = serde_json::from_str(json_str)
        .map_err(|e| PyValueError::new_err(format!("malformed json: {e}")))?;
    Ok(Value::from(json))
}

impl<'py> IntoPyObject<'py> for DateTimeNaive {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        get_convert_python_module(py)
            .call_method1(
                intern!(py, "_pd_timestamp_from_naive_ns"),
                (self.timestamp(),),
            )?
            .into_bound_py_any(py)
    }
}

impl<'py> IntoPyObject<'py> for DateTimeUtc {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        get_convert_python_module(py)
            .call_method1(
                intern!(py, "_pd_timestamp_from_utc_ns"),
                (self.timestamp(),),
            )?
            .into_bound_py_any(py)
    }
}

impl<'py> IntoPyObject<'py> for Duration {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        get_convert_python_module(py)
            .call_method1(intern!(py, "_pd_timedelta_from_ns"), (self.nanoseconds(),))?
            .into_bound_py_any(py)
    }
}

fn is_pathway_json(ob: &Bound<PyAny>) -> PyResult<bool> {
    let type_name = ob.get_type().qualname()?;
    Ok(type_name == "Json")
}

fn array_with_proper_dimensions<T>(
    array: ndarray::ArrayD<T>,
    dim: Option<usize>,
) -> Option<ndarray::ArrayD<T>> {
    match dim {
        Some(dim) if array.ndim() == dim => Some(array),
        Some(_) => None,
        None => Some(array),
    }
}

fn extract_datetime(ob: &Bound<PyAny>, type_: &Type) -> PyResult<Value> {
    assert!(matches!(type_, Type::DateTimeNaive | Type::DateTimeUtc));
    let type_name_bound = ob.get_type().qualname()?;
    let type_name = type_name_bound.to_str()?;
    let value = if type_name == "datetime" {
        value_from_python_datetime(ob)
    } else if matches!(type_name, "Timestamp" | "DateTimeNaive" | "DateTimeUtc") {
        value_from_pandas_timestamp(ob)
    } else {
        Err(PyValueError::new_err(format!(
            "cannot convert {type_name} to DateTime"
        )))
    }?;
    match (&value, type_) {
        (Value::DateTimeNaive(_), Type::DateTimeNaive)
        | (Value::DateTimeUtc(_), Type::DateTimeUtc) => Ok(value),
        (Value::DateTimeNaive(_), Type::DateTimeUtc) => Err(PyValueError::new_err(
            "cannot create DateTimeUtc from a datetime without timezone information. Pass a datetime with timezone information or change the type to DateTimeNaive",
        )),
        (Value::DateTimeUtc(_), Type::DateTimeNaive) => Err(PyValueError::new_err(
            "cannot create DateTimeNaive from a datetime with timezone information. Pass a datetime without timezone information or change the type to DateTimeUtc",
        )),
        _ => unreachable!("value_from_python_datetime and value_from_pandas_timestamp return only DateTimeNaive or DateTimeUtc"),
    }
}

fn extract_int_array(ob: &Bound<PyAny>, dim: Option<usize>) -> Option<ndarray::ArrayD<i64>> {
    let array = if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<i64>>() {
        Some(array.as_array().to_owned())
    } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<i32>>() {
        Some(array.as_array().mapv(i64::from))
    } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<u32>>() {
        Some(array.as_array().mapv(i64::from))
    } else {
        None
    }?;
    array_with_proper_dimensions(array, dim)
}

#[allow(clippy::cast_precision_loss)]
fn extract_float_array(ob: &Bound<PyAny>, dim: Option<usize>) -> Option<ndarray::ArrayD<f64>> {
    let array = if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<f64>>() {
        array.as_array().to_owned()
    } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<f32>>() {
        array.as_array().mapv(f64::from)
    } else {
        extract_int_array(ob, dim).map(|array| array.mapv(|v| v as f64))?
    };
    array_with_proper_dimensions(array, dim)
}

fn py_type_error(ob: &Bound<PyAny>, type_: &Type) -> PyErr {
    PyTypeError::new_err(format!(
        "cannot create an object of type {type_:?} from value {ob}"
    ))
}

pub fn extract_value(ob: &Bound<PyAny>, type_: &Type) -> PyResult<Value> {
    if ob.is_instance_of::<Error>() {
        return Ok(Value::Error);
    }
    let extracted = match type_ {
        Type::Any => ob.extract().ok(),
        Type::Optional(arg) => {
            if ob.is_none() {
                Some(Value::None)
            } else {
                Some(extract_value(ob, arg)?)
            }
        }
        Type::Bool => ob.extract::<bool>().ok().map(Value::from),
        Type::Int => ob.extract::<i64>().ok().map(Value::from),
        Type::Float => ob.extract::<f64>().ok().map(Value::from),
        Type::Pointer => ob.extract::<Key>().ok().map(Value::from),
        Type::String => ob
            .downcast::<PyString>()
            .ok()
            .and_then(|s| s.to_str().ok())
            .map(Value::from),
        Type::Bytes => ob
            .downcast::<PyBytes>()
            .ok()
            .map(|b| Value::from(b.as_bytes())),
        Type::DateTimeNaive | Type::DateTimeUtc => Some(extract_datetime(ob, type_)?),
        Type::Duration => {
            // XXX: check types, not names
            let type_name_bound = ob.get_type().qualname()?;
            let type_name = type_name_bound.to_str()?;
            if type_name == "timedelta" {
                value_from_python_timedelta(ob).ok()
            } else if matches!(type_name, "Timedelta" | "Duration") {
                value_from_pandas_timedelta(ob).ok()
            } else {
                None
            }
        }
        Type::Array(dim, wrapped) => match wrapped.borrow() {
            Type::Int => Ok(extract_int_array(ob, *dim).map(Value::from)),
            Type::Float => Ok(extract_float_array(ob, *dim).map(Value::from)),
            Type::Any => Ok(extract_int_array(ob, *dim)
                .map(Value::from)
                .or_else(|| extract_float_array(ob, *dim).map(Value::from))),
            wrapped => Err(PyValueError::new_err(format!(
                "{wrapped:?} is invalid type for Array"
            ))),
        }?,
        Type::Json => {
            if is_pathway_json(ob)? {
                value_json_from_py_any(&ob.getattr("value")?).ok()
            } else {
                value_json_from_py_any(ob).ok()
            }
        }
        Type::Tuple(args) => {
            let obs = ob.extract::<Vec<Bound<PyAny>>>()?;
            if obs.len() == args.len() {
                let values: Vec<_> = obs
                    .into_iter()
                    .zip(args.iter())
                    .map(|(ob, type_)| extract_value(&ob, type_))
                    .try_collect()?;
                Some(Value::from(values.as_slice()))
            } else {
                None
            }
        }
        Type::List(arg) => {
            let obs = ob.extract::<Vec<Bound<PyAny>>>()?;
            let values: Vec<_> = obs
                .into_iter()
                .map(|ob| extract_value(&ob, arg))
                .try_collect()?;
            Some(Value::from(values.as_slice()))
        }
        Type::PyObjectWrapper => {
            let value = if let Ok(ob) = ob.downcast::<PyObjectWrapper>() {
                ob.get().as_internal(ob.py())
            } else {
                PyObjectWrapper::new(ob.clone().unbind()).as_internal(ob.py())
            };
            Some(Value::from(value))
        }
        Type::Future(arg) => {
            if ob.is_instance_of::<Pending>() {
                Some(Value::Pending)
            } else {
                Some(extract_value(ob, arg)?)
            }
        }
    };
    extracted.ok_or_else(|| py_type_error(ob, type_))
}

impl<'py> FromPyObject<'py> for Value {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        let py = ob.py();
        if ob.is_none() {
            Ok(Value::None)
        } else if ob.is_exact_instance_of::<Error>() {
            Ok(Value::Error)
        } else if ob.is_exact_instance_of::<Pending>() {
            Ok(Value::Pending)
        } else if let Ok(s) = ob.downcast_exact::<PyString>() {
            Ok(Value::from(s.to_str()?))
        } else if let Ok(b) = ob.downcast_exact::<PyBytes>() {
            Ok(Value::from(b.as_bytes()))
        } else if ob.is_exact_instance_of::<PyInt>() {
            Ok(Value::Int(
                ob.extract::<i64>()
                    .expect("type conversion should work for int"),
            ))
        } else if ob.is_exact_instance_of::<PyFloat>() {
            Ok(Value::Float(
                ob.extract::<f64>()
                    .expect("type conversion should work for float")
                    .into(),
            ))
        } else if let Ok(b) = ob.downcast_exact::<PyBool>() {
            Ok(Value::Bool(b.is_true()))
        } else if ob.is_exact_instance_of::<Pointer>() {
            Ok(Value::Pointer(
                ob.extract::<Key>()
                    .expect("type conversion should work for Key"),
            ))
        } else if is_pathway_json(ob)? {
            value_json_from_py_any(&ob.getattr(intern!(py, "value"))?)
        } else if let Ok(b) = ob.extract::<bool>() {
            // Fallback checks from now on
            Ok(Value::Bool(b))
        } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<i64>>() {
            // single-element arrays convert to scalars, so we need to check for arrays first
            Ok(Value::from(array.as_array().to_owned()))
        } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<i32>>() {
            Ok(Value::from(array.as_array().mapv(i64::from)))
        } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<u32>>() {
            Ok(Value::from(array.as_array().mapv(i64::from)))
        } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<f64>>() {
            Ok(Value::from(array.as_array().to_owned()))
        } else if let Ok(array) = ob.extract::<PyReadonlyArrayDyn<f32>>() {
            Ok(Value::from(array.as_array().mapv(f64::from)))
        } else if let Ok(i) = ob.extract::<i64>() {
            Ok(Value::Int(i))
        } else if let Ok(f) = ob.extract::<f64>() {
            // XXX: bigints go here
            Ok(Value::Float(f.into()))
        } else if let Ok(k) = ob.extract::<Key>() {
            Ok(Value::Pointer(k))
        } else if let Ok(s) = ob.downcast::<PyString>() {
            Ok(s.to_str()?.into())
        } else if let Ok(bytes) = ob.downcast::<PyBytes>() {
            Ok(Value::Bytes(bytes.as_bytes().into()))
        } else if let Ok(t) = ob.extract::<Vec<Self>>() {
            Ok(Value::from(t.as_slice()))
        } else if let Ok(dict) = ob.downcast::<PyDict>() {
            value_json_from_py_any(dict)
        } else if let Ok(ob) = ob.downcast::<PyObjectWrapper>() {
            Ok(Value::from(ob.get().as_internal(ob.py())))
        } else {
            // XXX: check types, not names
            let type_name_bound = ob.get_type().qualname()?;
            let type_name = type_name_bound.to_str()?;
            if type_name == "datetime" {
                return value_from_python_datetime(ob);
            } else if type_name == "timedelta" {
                return value_from_python_timedelta(ob);
            } else if matches!(type_name, "Timestamp" | "DateTimeNaive" | "DateTimeUtc") {
                return value_from_pandas_timestamp(ob);
            } else if matches!(type_name, "Timedelta" | "Duration") {
                return value_from_pandas_timedelta(ob);
            }

            if let Ok(vec) = ob.extract::<Vec<Bound<PyAny>>>() {
                // generate a nicer error message if the type of an element is the problem
                for v in vec {
                    v.extract::<Self>()?;
                }
            }

            Err(PyTypeError::new_err(format!(
                "unsupported value type: {}",
                ob.get_type().name()?
            )))
        }
    }
}

fn json_to_py_object<'py>(py: Python<'py>, json: &JsonValue) -> PyResult<Bound<'py, PyAny>> {
    get_convert_python_module(py).call_method1(intern!(py, "_parse_to_json"), (json.to_string(),))
}

impl<'py> IntoPyObject<'py> for &Value {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        match self {
            Value::None => Ok(py.None().into_bound(py)),
            Value::Bool(b) => b.into_bound_py_any(py),
            Value::Int(i) => i.into_bound_py_any(py),
            Value::Float(f) => f.into_bound_py_any(py),
            Value::Pointer(k) => k.into_bound_py_any(py),
            Value::String(s) => s.into_bound_py_any(py),
            Value::Bytes(b) => PyBytes::new(py, b).into_bound_py_any(py),
            Value::Tuple(t) => Ok(PyTuple::new(py, t.iter())?.into_any()),
            Value::IntArray(a) => Ok(PyArray::from_array(py, a).into_any()),
            Value::FloatArray(a) => Ok(PyArray::from_array(py, a).into_any()),
            Value::DateTimeNaive(dt) => dt.into_bound_py_any(py),
            Value::DateTimeUtc(dt) => dt.into_bound_py_any(py),
            Value::Duration(d) => d.into_bound_py_any(py),
            Value::Json(j) => json_to_py_object(py, j),
            Value::Error => ERROR.clone_ref(py).into_bound_py_any(py),
            Value::PyObjectWrapper(op) => {
                PyObjectWrapper::from_internal(py, op).into_bound_py_any(py)
            }
            Value::Pending => PENDING.clone_ref(py).into_bound_py_any(py),
        }
    }
}

impl<'py> IntoPyObject<'py> for Value {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, PyErr> {
        (&self).into_pyobject(py)
    }
}

impl<'py> FromPyObject<'py> for Reducer {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyReducer>>()?.0.clone())
    }
}

impl<'py> IntoPyObject<'py> for Reducer {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyReducer(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for Type {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PathwayType>>()?.0.clone())
    }
}

impl<'py> IntoPyObject<'py> for Type {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PathwayType(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for ReadMethod {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyReadMethod>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for ReadMethod {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyReadMethod(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for ConnectorMode {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyConnectorMode>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for ConnectorMode {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyConnectorMode(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for SessionType {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PySessionType>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for SessionType {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PySessionType(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for PythonConnectorEventType {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyPythonConnectorEventType>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for PythonConnectorEventType {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyPythonConnectorEventType(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for DebeziumDBType {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyDebeziumDBType>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for DebeziumDBType {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyDebeziumDBType(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for KeyGenerationPolicy {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyKeyGenerationPolicy>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for KeyGenerationPolicy {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyKeyGenerationPolicy(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for MonitoringLevel {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyMonitoringLevel>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for MonitoringLevel {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyMonitoringLevel(self).into_bound_py_any(py)
    }
}

impl<'py> FromPyObject<'py> for TableWriterInitMode {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyTableWriterInitMode>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for TableWriterInitMode {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyTableWriterInitMode(self).into_bound_py_any(py)
    }
}

impl From<EngineError> for PyErr {
    fn from(mut error: EngineError) -> Self {
        match error.downcast::<PyErr>() {
            Ok(error) => return error,
            Err(other) => error = other,
        }
        Python::with_gil(|py| {
            if let EngineError::WithTrace { inner, trace } = error {
                let inner = PyErr::from(EngineError::from(inner));
                let args = (inner, trace);
                return PyErr::from_type(ENGINE_ERROR_WITH_TRACE_TYPE.bind(py).clone(), args);
            }

            let message = error.to_string();
            if let EngineError::ReaderFailed(read_error) = error {
                if let ReadError::Py(e) = *read_error {
                    return e;
                }
                let exception_type = ENGINE_ERROR_TYPE.bind(py).clone();
                return PyErr::from_type(exception_type, message);
            }
            let exception_type = match error {
                EngineError::DataError(ref error) => match error {
                    DataError::TypeMismatch { .. } => PyTypeError::type_object(py),
                    DataError::DuplicateKey(_)
                    | DataError::ValueMissing
                    | DataError::KeyMissingInOutputTable(_)
                    | DataError::KeyMissingInInputTable(_) => PyKeyError::type_object(py),
                    DataError::DivisionByZero => PyZeroDivisionError::type_object(py),
                    DataError::ParseError(_)
                    | DataError::ValueError(_)
                    | DataError::AppendOnlyViolation(_, _)
                    | DataError::RepeatedEntryInBatch => PyValueError::type_object(py),
                    DataError::IndexOutOfBounds => PyIndexError::type_object(py),
                    _ => ENGINE_ERROR_TYPE.bind(py).clone(),
                },
                EngineError::IterationLimitTooSmall
                | EngineError::InconsistentColumnProperties
                | EngineError::IdInTableProperties => PyValueError::type_object(py),
                EngineError::OtherWorkerPanic => OTHER_WORKER_ERROR.bind(py).clone(),
                _ => ENGINE_ERROR_TYPE.bind(py).clone(),
            };
            PyErr::from_type(exception_type, message)
        })
    }
}

fn check_identity<T>(a: &Py<T>, b: &Py<T>, msg: &'static str) -> PyResult<()> {
    if a.is(b) {
        Ok(())
    } else {
        Err(PyValueError::new_err(msg))
    }
}

fn from_py_iterable<'py, T>(iterable: &Bound<'py, PyAny>) -> PyResult<Vec<T>>
where
    T: FromPyObject<'py>,
{
    iterable.try_iter()?.map(|obj| obj?.extract()).collect()
}

fn engine_tables_from_py_iterable(iterable: &Bound<PyAny>) -> PyResult<Vec<EngineLegacyTable>> {
    iterable
        .try_iter()?
        .map(|table| {
            let table: PyRef<LegacyTable> = table?.extract()?;
            Ok(table.to_engine())
        })
        .collect()
}

pub fn generic_alias_class_getitem<'py>(
    cls: &Bound<'py, PyType>,
    item: &Bound<'py, PyAny>,
) -> PyResult<Bound<'py, PyAny>> {
    static GENERIC_ALIAS: GILOnceCell<Py<PyAny>> = GILOnceCell::new();

    let py = cls.py();
    GENERIC_ALIAS
        .get_or_try_init(py, || -> PyResult<_> {
            Ok(py.import("types")?.getattr("GenericAlias")?.unbind())
        })?
        .bind(py)
        .call1((cls, item))
}

#[pyclass(module = "pathway.engine", frozen)]
pub struct Pointer(Key);

#[pymethods]
impl Pointer {
    pub fn __str__(&self) -> String {
        self.0.to_string()
    }

    pub fn __repr__(&self) -> String {
        format!("Pointer(\"{}\")", self.0.to_string().escape_default())
    }

    pub fn __hash__(&self) -> usize {
        self.0 .0 as usize
    }

    pub fn __int__(&self) -> KeyImpl {
        self.0 .0
    }

    pub fn __richcmp__(&self, other: &Bound<PyAny>, op: CompareOp) -> Py<PyAny> {
        let py = other.py();
        if let Ok(other) = other.extract::<PyRef<Self>>() {
            return op.matches(self.0.cmp(&other.0)).into_py_any(py).unwrap();
        }
        if let Ok(other) = other.extract::<f64>() {
            // XXX: comparisons to ±∞
            if other == f64::NEG_INFINITY {
                return op.matches(Ordering::Greater).into_py_any(py).unwrap();
            }
            if other == f64::INFINITY {
                return op.matches(Ordering::Less).into_py_any(py).unwrap();
            }
        }
        py.NotImplemented()
    }

    #[classmethod]
    pub fn __class_getitem__<'py>(
        cls: &Bound<'py, PyType>,
        item: &Bound<'py, PyAny>,
    ) -> PyResult<Bound<'py, PyAny>> {
        generic_alias_class_getitem(cls, item)
    }
}

#[pyclass(module = "pathway.engine", frozen)]
struct PyObjectWrapper {
    #[pyo3(get)]
    value: PyObject,
    serializer: Option<PyObject>,
}

#[pymethods]
impl PyObjectWrapper {
    #[new]
    #[pyo3(signature = (value))]
    fn new(value: PyObject) -> Self {
        Self {
            value,
            serializer: None,
        }
    }

    #[staticmethod]
    #[pyo3(signature = (value, *, serializer=None), name="_create_with_serializer")]
    fn create_with_serializer(value: PyObject, serializer: Option<PyObject>) -> Self {
        Self { value, serializer }
    }

    pub fn __repr__(&self) -> String {
        format!("PyObjectWrapper({})", self.value)
    }

    #[classmethod]
    pub fn __class_getitem__<'py>(
        cls: &Bound<'py, PyType>,
        item: &Bound<'py, PyAny>,
    ) -> PyResult<Bound<'py, PyAny>> {
        generic_alias_class_getitem(cls, item)
    }

    fn __getnewargs__(&self, py: Python<'_>) -> (PyObject,) {
        (self.value.clone_ref(py),)
    }
}

impl PyObjectWrapper {
    fn as_internal(&self, py: Python<'_>) -> InternalPyObjectWrapper {
        InternalPyObjectWrapper::new(
            self.value.clone_ref(py),
            self.serializer.as_ref().map(|s| s.clone_ref(py)),
        )
    }

    fn from_internal(py: Python<'_>, ob: &InternalPyObjectWrapper) -> Self {
        Self::create_with_serializer(ob.get_inner(py), ob.get_serializer(py))
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "Reducer")]
pub struct PyReducer(Reducer);

#[pymethods]
impl PyReducer {
    #[classattr]
    pub const ARG_MIN: Reducer = Reducer::ArgMin;

    #[classattr]
    pub const MIN: Reducer = Reducer::Min;

    #[classattr]
    pub const ARG_MAX: Reducer = Reducer::ArgMax;

    #[classattr]
    pub const MAX: Reducer = Reducer::Max;

    #[staticmethod]
    fn float_sum(strict: bool) -> Reducer {
        Reducer::FloatSum { strict }
    }

    #[classattr]
    pub const INT_SUM: Reducer = Reducer::IntSum;

    #[staticmethod]
    fn array_sum(strict: bool) -> Reducer {
        Reducer::ArraySum { strict }
    }

    #[staticmethod]
    fn sorted_tuple(skip_nones: bool) -> Reducer {
        Reducer::SortedTuple { skip_nones }
    }

    #[staticmethod]
    fn tuple(skip_nones: bool) -> Reducer {
        Reducer::Tuple { skip_nones }
    }

    #[classattr]
    pub const UNIQUE: Reducer = Reducer::Unique;

    #[classattr]
    pub const COUNT: Reducer = Reducer::Count;

    #[classattr]
    pub const COUNT_DISTINCT: Reducer = Reducer::CountDistinct;

    #[staticmethod]
    fn count_distinct_approximate(precision: usize) -> Reducer {
        Reducer::CountDistinctApproximate { precision }
    }

    #[classattr]
    pub const ANY: Reducer = Reducer::Any;

    #[staticmethod]
    fn stateful_many(combine: Py<PyAny>) -> Reducer {
        Reducer::Stateful {
            combine_fn: wrap_stateful_combine(combine),
        }
    }

    #[classattr]
    pub const LATEST: Reducer = Reducer::Latest;

    #[classattr]
    pub const EARLIEST: Reducer = Reducer::Earliest;
}

fn wrap_stateful_combine(combine: Py<PyAny>) -> StatefulCombineFn {
    Arc::new(move |state, values| {
        Python::with_gil(|py| Ok(combine.bind(py).call1((state, values))?.extract()?))
    })
}

#[pyclass(module = "pathway.engine", frozen, name = "ConnectorGroupDescriptor")]
struct PyConnectorGroupDescriptor(ConnectorGroupDescriptor);

#[pymethods]
impl PyConnectorGroupDescriptor {
    #[new]
    fn new(
        name: String,
        column_index: usize,
        max_difference: Value,
        priority: u64,
        idle_duration: Option<std::time::Duration>,
    ) -> Self {
        Self(ConnectorGroupDescriptor {
            name,
            column_index,
            max_difference,
            priority,
            idle_duration,
        })
    }
}

impl<'py> FromPyObject<'py> for ConnectorGroupDescriptor {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyConnectorGroupDescriptor>>()?.0.clone())
    }
}

impl<'py> IntoPyObject<'py> for ConnectorGroupDescriptor {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyConnectorGroupDescriptor(self).into_bound_py_any(py)
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "ReducerData")]
struct PyReducerData(ReducerData);

#[pymethods]
impl PyReducerData {
    #[new]
    #[pyo3(signature = (reducer, skip_errors, append_only, column_paths, trace))]
    fn new(
        reducer: Reducer,
        skip_errors: bool,
        append_only: bool,
        column_paths: Vec<ColumnPath>,
        trace: Option<EngineTrace>,
    ) -> Self {
        Self(ReducerData {
            reducer,
            skip_errors,
            append_only,
            column_paths,
            trace: trace.unwrap_or(EngineTrace::Empty),
        })
    }
}

impl<'py> FromPyObject<'py> for ReducerData {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyReducerData>>()?.0.clone())
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "ExpressionData")]
struct PyExpressionData(ExpressionData);

#[pymethods]
impl PyExpressionData {
    #[new]
    fn new(
        expression: &PyExpression,
        properties: TableProperties,
        append_only: bool,
        deterministic: bool,
    ) -> Self {
        Self(ExpressionData {
            expression: expression.inner.clone(),
            properties: properties.0,
            append_only,
            deterministic,
            gil: expression.gil,
        })
    }
}

impl<'py> FromPyObject<'py> for ExpressionData {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyExpressionData>>()?.0.clone())
    }
}

#[derive(Clone, Copy, Debug)]
pub enum UnaryOperator {
    Inv,
    Neg,
}

#[derive(Clone, Copy, Debug)]
pub enum BinaryOperator {
    And,
    Or,
    Xor,
    Eq,
    Ne,
    Lt,
    Le,
    Gt,
    Ge,
    Add,
    Sub,
    Mul,
    FloorDiv,
    TrueDiv,
    Mod,
    Pow,
    Lshift,
    Rshift,
    MatMul,
}

#[pyclass(module = "pathway.engine", frozen, name = "UnaryOperator")]
pub struct PyUnaryOperator(UnaryOperator);

#[pymethods]
impl PyUnaryOperator {
    #[classattr]
    pub const INV: UnaryOperator = UnaryOperator::Inv;
    #[classattr]
    pub const NEG: UnaryOperator = UnaryOperator::Neg;
}

impl<'py> FromPyObject<'py> for UnaryOperator {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyUnaryOperator>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for UnaryOperator {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyUnaryOperator(self).into_bound_py_any(py)
    }
}
#[pyclass(module = "pathway.engine", frozen, name = "BinaryOperator")]
pub struct PyBinaryOperator(BinaryOperator);

#[pymethods]
impl PyBinaryOperator {
    #[classattr]
    pub const AND: BinaryOperator = BinaryOperator::And;
    #[classattr]
    pub const OR: BinaryOperator = BinaryOperator::Or;
    #[classattr]
    pub const XOR: BinaryOperator = BinaryOperator::Xor;
    #[classattr]
    pub const EQ: BinaryOperator = BinaryOperator::Eq;
    #[classattr]
    pub const NE: BinaryOperator = BinaryOperator::Ne;
    #[classattr]
    pub const LT: BinaryOperator = BinaryOperator::Lt;
    #[classattr]
    pub const LE: BinaryOperator = BinaryOperator::Le;
    #[classattr]
    pub const GT: BinaryOperator = BinaryOperator::Gt;
    #[classattr]
    pub const GE: BinaryOperator = BinaryOperator::Ge;
    #[classattr]
    pub const ADD: BinaryOperator = BinaryOperator::Add;
    #[classattr]
    pub const SUB: BinaryOperator = BinaryOperator::Sub;
    #[classattr]
    pub const MUL: BinaryOperator = BinaryOperator::Mul;
    #[classattr]
    pub const FLOOR_DIV: BinaryOperator = BinaryOperator::FloorDiv;
    #[classattr]
    pub const TRUE_DIV: BinaryOperator = BinaryOperator::TrueDiv;
    #[classattr]
    pub const MOD: BinaryOperator = BinaryOperator::Mod;
    #[classattr]
    pub const POW: BinaryOperator = BinaryOperator::Pow;
    #[classattr]
    pub const LSHIFT: BinaryOperator = BinaryOperator::Lshift;
    #[classattr]
    pub const RSHIFT: BinaryOperator = BinaryOperator::Rshift;
    #[classattr]
    pub const MATMUL: BinaryOperator = BinaryOperator::MatMul;
}

impl<'py> FromPyObject<'py> for BinaryOperator {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyBinaryOperator>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for BinaryOperator {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyBinaryOperator(self).into_bound_py_any(py)
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "Expression")]
pub struct PyExpression {
    inner: Arc<Expression>,
    gil: bool,
}

impl PyExpression {
    fn new(inner: Arc<Expression>, gil: bool) -> Self {
        Self { inner, gil }
    }
}

macro_rules! unary_op {
    ($expression:path, $e:expr $(, $arg:expr)*) => {
        Self::new(
            Arc::new(Expression::from($expression($e.inner.clone() $(, $arg)*))),
            $e.gil,
        )
    };
}

macro_rules! binary_op {
    ($expression:path, $lhs:expr, $rhs:expr $(, $arg:expr)*) => {
        Self::new(
            Arc::new(Expression::from($expression(
                $lhs.inner.clone(),
                $rhs.inner.clone(),
                $($arg,)*
            ))),
            $lhs.gil || $rhs.gil,
        )
    };
}

macro_rules! unary_expr {
    ($name:ident, $expression:path $(, $arg:ident : $type:ty)*) => {
        #[pymethods]
        impl PyExpression {
            #[staticmethod]
            fn $name(expr: &Self $(, $arg : $type)*) -> Self {
                unary_op!($expression, expr $(, $arg)*)
            }
        }
    };
}

macro_rules! binary_expr {
    ($name:ident, $expression:path $(, $arg:ident : $type:ty)*) => {
        #[pymethods]
        impl PyExpression {
            #[staticmethod]
            fn $name(lhs: &Self, rhs: &Self $(, $arg : $type)*) -> Self {
                binary_op!($expression, lhs, rhs $(, $arg)*)
            }
        }
    };
}

fn batch_apply(
    input: &[&[Value]],
    n_args: usize,
    logic: &impl Fn(Vec<Vec<Vec<Value>>>) -> Vec<DynResult<Vec<DynResult<Value>>>>,
    propagate_none: bool,
    max_batch_size: usize,
) -> Vec<DynResult<Value>> {
    let (data, masks): (Vec<_>, Vec<_>) = input
        .chunks(max_batch_size)
        .map(|inputs| {
            let mut mask = vec![false; inputs.len()];
            let mut data = Vec::with_capacity(n_args);
            let mut not_none_count = 0;
            data.resize_with(n_args, || Vec::with_capacity(inputs.len()));
            for (i, input_i) in inputs.iter().enumerate() {
                if propagate_none && input_i.iter().any(|a| matches!(a, Value::None)) {
                    mask[i] = true;
                } else {
                    not_none_count += 1;
                    for (j, input_ij) in input_i.iter().enumerate() {
                        data[j].push(input_ij.clone());
                    }
                }
            }
            (data, (mask, not_none_count))
        })
        .unzip();
    logic(data)
        .into_iter()
        .zip_eq(masks)
        .flat_map(|(results, (mask, not_none_count))| {
            let batch_result: Vec<DynResult<Value>> = match results {
                Ok(results) => {
                    if results.len() == not_none_count {
                        results
                    } else {
                        let msg =
                            format!("The number of rows produced by a UDF ({})", results.len())
                                + " is different than the number of rows on its input"
                                + &format!(" ({not_none_count}).");
                        mask.iter()
                            .map(|_i| Err(PyValueError::new_err(msg.clone()).into()))
                            .collect()
                    }
                }
                Err(e) => {
                    let msg = "Error in batch UDF.";
                    [Err(e)]
                        .into_iter()
                        .chain((1..not_none_count).map(|_| Err(PyValueError::new_err(msg).into())))
                        .collect()
                }
            };
            if propagate_none {
                let mut batch_result_iter = batch_result.into_iter();
                mask.into_iter()
                    .map(|m| {
                        if m {
                            Ok(Value::None)
                        } else {
                            batch_result_iter.next().unwrap()
                        }
                    })
                    .collect()
            } else {
                batch_result
            }
        })
        .collect()
}

fn start_async_task(
    event_loop: &Py<PyAny>,
    function: &Py<PyAny>,
    args: Bound<'_, PyTuple>,
) -> PyResult<impl Future<Output = PyResult<PyObject>> + Send> {
    let py = args.py();
    let event_loop = event_loop.clone_ref(py);
    let awaitable = function.call1(py, args)?;
    let awaitable = awaitable.into_bound(py);
    let locals =
        pyo3_async_runtimes::TaskLocals::new(event_loop.into_bound(py)).copy_context(py)?;
    pyo3_async_runtimes::into_future_with_locals(&locals, awaitable)
}

#[pymethods]
impl PyExpression {
    #[staticmethod]
    fn r#const(ob: &Bound<PyAny>, type_: Type) -> PyResult<Self> {
        let value = extract_value(ob, &type_)?;
        Ok(Self::new(Arc::new(Expression::new_const(value)), false))
    }

    #[staticmethod]
    fn argument(index: usize) -> Self {
        Self::new(
            Arc::new(Expression::Any(AnyExpression::Argument(index))),
            false,
        )
    }

    #[staticmethod]
    #[pyo3(signature = (function, *args, dtype, propagate_none=false, max_batch_size=None))]
    fn apply(
        function: Py<PyAny>,
        args: Vec<PyRef<PyExpression>>,
        dtype: Type,
        propagate_none: bool,
        max_batch_size: Option<usize>,
    ) -> Self {
        let args = args
            .into_iter()
            .map(|expr| expr.inner.clone())
            .collect_vec();
        let n_args = args.len();
        let expression = if let Some(max_batch_size) = max_batch_size {
            let logic = move |batches: Vec<Vec<Vec<Value>>>| {
                Python::with_gil(|py| {
                    batches
                        .into_iter()
                        .map(|batch| {
                            if !batch.is_empty() && batch[0].is_empty() {
                                // if the batch has size 0, possible if propagate_none=true
                                return Ok(Vec::new());
                            }
                            let args = PyTuple::new(py, batch)?;
                            let results = function.call1(py, args)?;
                            Ok(results
                                .extract::<Vec<Bound<PyAny>>>(py)?
                                .into_iter()
                                .map(|result| Ok(extract_value(&result, &dtype)?))
                                .collect())
                        })
                        .collect()
                })
            };
            let func = Box::new(move |input: &[&[Value]]| {
                batch_apply(input, n_args, &logic, propagate_none, max_batch_size)
            });
            AnyExpression::Apply(func, args.into())
        } else {
            let func = Box::new(move |input: &[&[Value]]| {
                Python::with_gil(|py| -> Vec<DynResult<Value>> {
                    input
                        .iter()
                        .map(|input_i| {
                            if propagate_none && input_i.iter().any(|a| matches!(a, Value::None)) {
                                Ok(Value::None)
                            } else {
                                let args = PyTuple::new(py, *input_i)?;
                                let result = function.call1(py, args)?;
                                Ok(extract_value(result.bind(py), &dtype)?)
                            }
                        })
                        .collect()
                })
            });
            AnyExpression::Apply(func, args.into())
        };
        Self::new(Arc::new(Expression::Any(expression)), true)
    }

    #[staticmethod]
    #[pyo3(signature = (scope, function, *args, dtype, propagate_none=false, max_batch_size=None))]
    fn async_apply(
        scope: &Bound<Scope>,
        function: Py<PyAny>,
        args: Vec<PyRef<PyExpression>>,
        dtype: Type,
        propagate_none: bool,
        max_batch_size: Option<usize>,
    ) -> Self {
        let args = args
            .into_iter()
            .map(|expr| expr.inner.clone())
            .collect_vec();
        let dtype = Arc::new(dtype);
        let py = scope.py();
        let event_loop = scope.borrow().event_loop.clone_ref(py);
        let expression = if let Some(max_batch_size) = max_batch_size {
            let n_args = args.len();
            let logic = move |input: Vec<Vec<Value>>| {
                let future = if !input.is_empty() && input[0].is_empty() {
                    None
                } else {
                    Some(Python::with_gil(|py| {
                        start_async_task(&event_loop, &function, PyTuple::new(py, input)?)
                    }))
                };
                let dtype = dtype.clone();
                Box::pin(async {
                    if let Some(future) = future {
                        let results = future?.await?;
                        Python::with_gil(|py| {
                            Ok(results
                                .extract::<Vec<Bound<PyAny>>>(py)?
                                .into_iter()
                                .map(move |result| Ok(extract_value(&result, &dtype)?))
                                .collect())
                        })
                    } else {
                        Ok(Vec::new())
                    }
                })
            };
            let multi_batch_logic = Box::new(move |input: Vec<Vec<Vec<Value>>>| {
                let futures: FuturesOrdered<_> = input.into_iter().map(&logic).collect();
                let mut result = Vec::with_capacity(futures.len());
                futures::executor::block_on(futures.for_each(|item| {
                    result.push(item);
                    future::ready(())
                }));
                result
            });
            let func = Box::new(move |input: &[&[Value]]| {
                batch_apply(
                    input,
                    n_args,
                    &multi_batch_logic,
                    propagate_none,
                    max_batch_size,
                )
            });
            Expression::Any(AnyExpression::Apply(func, args.into()))
        } else {
            let logic = move |input_i: &[Value]| {
                let pass_none = propagate_none && input_i.iter().any(|a| matches!(a, Value::None));
                let future = if pass_none {
                    // Conditional future because the final result has to be returned from the same async block.
                    // Thanks to that only a single future type is returned and there's no need to Box the closure.
                    None
                } else {
                    Some(Python::with_gil(|py| {
                        start_async_task(&event_loop, &function, PyTuple::new(py, input_i)?)
                    }))
                };

                let dtype = dtype.clone();
                Box::pin(async {
                    if let Some(future) = future {
                        let result = future?.await?;
                        Python::with_gil(move |py| Ok(extract_value(result.bind(py), &dtype)?))
                    } else {
                        Ok(Value::None)
                    }
                })
            };
            let func = Box::new(move |input: &[&[Value]]| {
                let futures: FuturesOrdered<_> =
                    input.iter().map(|input_i| logic(input_i)).collect();
                let mut result = Vec::with_capacity(futures.len());
                futures::executor::block_on(futures.for_each(|item| {
                    result.push(item);
                    future::ready(())
                }));
                result
            });
            Expression::Any(AnyExpression::Apply(func, args.into()))
        };
        Self::new(Arc::new(expression), true)
    }

    #[staticmethod]
    fn unary_expression(
        expr: &PyExpression,
        operator: UnaryOperator,
        expr_dtype: Type,
    ) -> Option<Self> {
        match (operator, expr_dtype) {
            (UnaryOperator::Inv, Type::Bool) => Some(unary_op!(BoolExpression::Not, expr)),
            (UnaryOperator::Neg, Type::Int) => Some(unary_op!(IntExpression::Neg, expr)),
            (UnaryOperator::Neg, Type::Float) => Some(unary_op!(FloatExpression::Neg, expr)),
            (UnaryOperator::Neg, Type::Duration) => Some(unary_op!(DurationExpression::Neg, expr)),
            _ => None,
        }
    }

    #[allow(clippy::too_many_lines)]
    #[staticmethod]
    fn binary_expression(
        lhs: &PyExpression,
        rhs: &PyExpression,
        operator: BinaryOperator,
        left_dtype: Type,
        right_dtype: Type,
    ) -> Option<Self> {
        type Tp = Type;
        type Op = BinaryOperator;
        type AnyE = AnyExpression;
        type BoolE = BoolExpression;
        type IntE = IntExpression;
        type FloatE = FloatExpression;
        type StringE = StringExpression;
        type DurationE = DurationExpression;
        match (operator, left_dtype, right_dtype) {
            (Op::And, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::And, lhs, rhs)),
            (Op::Or, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::Or, lhs, rhs)),
            (Op::Xor, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::Xor, lhs, rhs)),
            (Op::Eq, Tp::Int, Tp::Int) => Some(binary_op!(BoolE::IntEq, lhs, rhs)),
            (Op::Ne, Tp::Int, Tp::Int) => Some(binary_op!(BoolE::IntNe, lhs, rhs)),
            (Op::Lt, Tp::Int, Tp::Int) => Some(binary_op!(BoolE::IntLt, lhs, rhs)),
            (Op::Le, Tp::Int, Tp::Int) => Some(binary_op!(BoolE::IntLe, lhs, rhs)),
            (Op::Gt, Tp::Int, Tp::Int) => Some(binary_op!(BoolE::IntGt, lhs, rhs)),
            (Op::Ge, Tp::Int, Tp::Int) => Some(binary_op!(BoolE::IntGe, lhs, rhs)),
            (Op::Eq, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::BoolEq, lhs, rhs)),
            (Op::Ne, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::BoolNe, lhs, rhs)),
            (Op::Lt, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::BoolLt, lhs, rhs)),
            (Op::Le, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::BoolLe, lhs, rhs)),
            (Op::Gt, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::BoolGt, lhs, rhs)),
            (Op::Ge, Tp::Bool, Tp::Bool) => Some(binary_op!(BoolE::BoolGe, lhs, rhs)),
            (Op::Add, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Add, lhs, rhs)),
            (Op::Sub, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Sub, lhs, rhs)),
            (Op::Mul, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Mul, lhs, rhs)),
            (Op::FloorDiv, Tp::Int, Tp::Int) => Some(binary_op!(IntE::FloorDiv, lhs, rhs)),
            (Op::TrueDiv, Tp::Int, Tp::Int) => Some(binary_op!(FloatE::IntTrueDiv, lhs, rhs)),
            (Op::Mod, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Mod, lhs, rhs)),
            (Op::Pow, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Pow, lhs, rhs)),
            (Op::Lshift, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Lshift, lhs, rhs)),
            (Op::Rshift, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Rshift, lhs, rhs)),
            (Op::And, Tp::Int, Tp::Int) => Some(binary_op!(IntE::And, lhs, rhs)),
            (Op::Or, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Or, lhs, rhs)),
            (Op::Xor, Tp::Int, Tp::Int) => Some(binary_op!(IntE::Xor, lhs, rhs)),
            (Op::Eq, Tp::Float, Tp::Float) => Some(binary_op!(BoolE::FloatEq, lhs, rhs)),
            (Op::Ne, Tp::Float, Tp::Float) => Some(binary_op!(BoolE::FloatNe, lhs, rhs)),
            (Op::Lt, Tp::Float, Tp::Float) => Some(binary_op!(BoolE::FloatLt, lhs, rhs)),
            (Op::Le, Tp::Float, Tp::Float) => Some(binary_op!(BoolE::FloatLe, lhs, rhs)),
            (Op::Gt, Tp::Float, Tp::Float) => Some(binary_op!(BoolE::FloatGt, lhs, rhs)),
            (Op::Ge, Tp::Float, Tp::Float) => Some(binary_op!(BoolE::FloatGe, lhs, rhs)),
            (Op::Add, Tp::Float, Tp::Float) => Some(binary_op!(FloatE::Add, lhs, rhs)),
            (Op::Sub, Tp::Float, Tp::Float) => Some(binary_op!(FloatE::Sub, lhs, rhs)),
            (Op::Mul, Tp::Float, Tp::Float) => Some(binary_op!(FloatE::Mul, lhs, rhs)),
            (Op::FloorDiv, Tp::Float, Tp::Float) => Some(binary_op!(FloatE::FloorDiv, lhs, rhs)),
            (Op::TrueDiv, Tp::Float, Tp::Float) => Some(binary_op!(FloatE::TrueDiv, lhs, rhs)),
            (Op::Mod, Tp::Float, Tp::Float) => Some(binary_op!(FloatE::Mod, lhs, rhs)),
            (Op::Pow, Tp::Float, Tp::Float) => Some(binary_op!(FloatE::Pow, lhs, rhs)),
            (Op::Eq, Tp::String, Tp::String) => Some(binary_op!(BoolE::StringEq, lhs, rhs)),
            (Op::Ne, Tp::String, Tp::String) => Some(binary_op!(BoolE::StringNe, lhs, rhs)),
            (Op::Lt, Tp::String, Tp::String) => Some(binary_op!(BoolE::StringLt, lhs, rhs)),
            (Op::Le, Tp::String, Tp::String) => Some(binary_op!(BoolE::StringLe, lhs, rhs)),
            (Op::Gt, Tp::String, Tp::String) => Some(binary_op!(BoolE::StringGt, lhs, rhs)),
            (Op::Ge, Tp::String, Tp::String) => Some(binary_op!(BoolE::StringGe, lhs, rhs)),
            (Op::Add, Tp::String, Tp::String) => Some(binary_op!(StringE::Add, lhs, rhs)),
            (Op::Mul, Tp::String, Tp::Int) => Some(binary_op!(StringE::Mul, lhs, rhs)),
            (Op::Mul, Tp::Int, Tp::String) => Some(binary_op!(StringE::Mul, rhs, lhs)),
            (Op::Eq, Tp::Pointer, Tp::Pointer) => Some(binary_op!(BoolE::PtrEq, lhs, rhs)),
            (Op::Ne, Tp::Pointer, Tp::Pointer) => Some(binary_op!(BoolE::PtrNe, lhs, rhs)),
            (Op::Lt, Tp::Pointer, Tp::Pointer) => Some(binary_op!(BoolE::PtrLt, lhs, rhs)),
            (Op::Le, Tp::Pointer, Tp::Pointer) => Some(binary_op!(BoolE::PtrLe, lhs, rhs)),
            (Op::Gt, Tp::Pointer, Tp::Pointer) => Some(binary_op!(BoolE::PtrGt, lhs, rhs)),
            (Op::Ge, Tp::Pointer, Tp::Pointer) => Some(binary_op!(BoolE::PtrGe, lhs, rhs)),
            (Op::Eq, Tp::DateTimeNaive, Tp::DateTimeNaive) => {
                Some(binary_op!(BoolE::DateTimeNaiveEq, lhs, rhs))
            }
            (Op::Ne, Tp::DateTimeNaive, Tp::DateTimeNaive) => {
                Some(binary_op!(BoolE::DateTimeNaiveNe, lhs, rhs))
            }
            (Op::Lt, Tp::DateTimeNaive, Tp::DateTimeNaive) => {
                Some(binary_op!(BoolE::DateTimeNaiveLt, lhs, rhs))
            }
            (Op::Le, Tp::DateTimeNaive, Tp::DateTimeNaive) => {
                Some(binary_op!(BoolE::DateTimeNaiveLe, lhs, rhs))
            }
            (Op::Gt, Tp::DateTimeNaive, Tp::DateTimeNaive) => {
                Some(binary_op!(BoolE::DateTimeNaiveGt, lhs, rhs))
            }
            (Op::Ge, Tp::DateTimeNaive, Tp::DateTimeNaive) => {
                Some(binary_op!(BoolE::DateTimeNaiveGe, lhs, rhs))
            }
            (Op::Sub, Tp::DateTimeNaive, Tp::DateTimeNaive) => {
                Some(binary_op!(DurationExpression::DateTimeNaiveSub, lhs, rhs))
            }
            (Op::Add, Tp::DateTimeNaive, Tp::Duration) => {
                Some(binary_op!(DateTimeNaiveExpression::AddDuration, lhs, rhs))
            }
            (Op::Sub, Tp::DateTimeNaive, Tp::Duration) => {
                Some(binary_op!(DateTimeNaiveExpression::SubDuration, lhs, rhs))
            }
            (Op::Eq, Tp::DateTimeUtc, Tp::DateTimeUtc) => {
                Some(binary_op!(BoolE::DateTimeUtcEq, lhs, rhs))
            }
            (Op::Ne, Tp::DateTimeUtc, Tp::DateTimeUtc) => {
                Some(binary_op!(BoolE::DateTimeUtcNe, lhs, rhs))
            }
            (Op::Lt, Tp::DateTimeUtc, Tp::DateTimeUtc) => {
                Some(binary_op!(BoolE::DateTimeUtcLt, lhs, rhs))
            }
            (Op::Le, Tp::DateTimeUtc, Tp::DateTimeUtc) => {
                Some(binary_op!(BoolE::DateTimeUtcLe, lhs, rhs))
            }
            (Op::Gt, Tp::DateTimeUtc, Tp::DateTimeUtc) => {
                Some(binary_op!(BoolE::DateTimeUtcGt, lhs, rhs))
            }
            (Op::Ge, Tp::DateTimeUtc, Tp::DateTimeUtc) => {
                Some(binary_op!(BoolE::DateTimeUtcGe, lhs, rhs))
            }
            (Op::Sub, Tp::DateTimeUtc, Tp::DateTimeUtc) => {
                Some(binary_op!(DurationExpression::DateTimeUtcSub, lhs, rhs))
            }
            (Op::Add, Tp::DateTimeUtc, Tp::Duration) => {
                Some(binary_op!(DateTimeUtcExpression::AddDuration, lhs, rhs))
            }
            (Op::Sub, Tp::DateTimeUtc, Tp::Duration) => {
                Some(binary_op!(DateTimeUtcExpression::SubDuration, lhs, rhs))
            }
            (Op::Eq, Tp::Duration, Tp::Duration) => Some(binary_op!(BoolE::DurationEq, lhs, rhs)),
            (Op::Ne, Tp::Duration, Tp::Duration) => Some(binary_op!(BoolE::DurationNe, lhs, rhs)),
            (Op::Lt, Tp::Duration, Tp::Duration) => Some(binary_op!(BoolE::DurationLt, lhs, rhs)),
            (Op::Le, Tp::Duration, Tp::Duration) => Some(binary_op!(BoolE::DurationLe, lhs, rhs)),
            (Op::Gt, Tp::Duration, Tp::Duration) => Some(binary_op!(BoolE::DurationGt, lhs, rhs)),
            (Op::Ge, Tp::Duration, Tp::Duration) => Some(binary_op!(BoolE::DurationGe, lhs, rhs)),
            (Op::Add, Tp::Duration, Tp::Duration) => Some(binary_op!(DurationE::Add, lhs, rhs)),
            (Op::Sub, Tp::Duration, Tp::Duration) => Some(binary_op!(DurationE::Sub, lhs, rhs)),
            (Op::Add, Tp::Duration, Tp::DateTimeNaive) => {
                Some(binary_op!(DateTimeNaiveExpression::AddDuration, rhs, lhs))
            }
            (Op::Add, Tp::Duration, Tp::DateTimeUtc) => {
                Some(binary_op!(DateTimeUtcExpression::AddDuration, rhs, lhs))
            }
            (Op::Mul, Tp::Duration, Tp::Int) => Some(binary_op!(DurationE::MulByInt, lhs, rhs)),
            (Op::Mul, Tp::Int, Tp::Duration) => Some(binary_op!(DurationE::MulByInt, rhs, lhs)),
            (Op::FloorDiv, Tp::Duration, Tp::Int) => {
                Some(binary_op!(DurationE::DivByInt, lhs, rhs))
            }
            (Op::TrueDiv, Tp::Duration, Tp::Int) => {
                Some(binary_op!(DurationE::TrueDivByInt, lhs, rhs))
            }
            (Op::Mul, Tp::Duration, Tp::Float) => Some(binary_op!(DurationE::MulByFloat, lhs, rhs)),
            (Op::Mul, Tp::Float, Tp::Duration) => Some(binary_op!(DurationE::MulByFloat, rhs, lhs)),
            (Op::TrueDiv, Tp::Duration, Tp::Float) => {
                Some(binary_op!(DurationE::DivByFloat, lhs, rhs))
            }
            (Op::FloorDiv, Tp::Duration, Tp::Duration) => {
                Some(binary_op!(IntExpression::DurationFloorDiv, lhs, rhs))
            }
            (Op::TrueDiv, Tp::Duration, Tp::Duration) => {
                Some(binary_op!(FloatExpression::DurationTrueDiv, lhs, rhs))
            }
            (Op::Mod, Tp::Duration, Tp::Duration) => Some(binary_op!(DurationE::Mod, lhs, rhs)),
            (Op::MatMul, Tp::Array(_, _), Tp::Array(_, _)) => {
                Some(binary_op!(AnyE::MatMul, lhs, rhs))
            }
            (Op::Eq, Tp::Tuple(_) | Tp::List(_), Tp::Tuple(_) | Tp::List(_)) => {
                Some(binary_op!(BoolE::TupleEq, lhs, rhs))
            }
            (Op::Ne, Tp::Tuple(_) | Tp::List(_), Tp::Tuple(_) | Tp::List(_)) => {
                Some(binary_op!(BoolE::TupleNe, lhs, rhs))
            }
            (Op::Lt, Tp::Tuple(_) | Tp::List(_), Tp::Tuple(_) | Tp::List(_)) => {
                Some(binary_op!(BoolE::TupleLt, lhs, rhs))
            }
            (Op::Le, Tp::Tuple(_) | Tp::List(_), Tp::Tuple(_) | Tp::List(_)) => {
                Some(binary_op!(BoolE::TupleLe, lhs, rhs))
            }
            (Op::Gt, Tp::Tuple(_) | Tp::List(_), Tp::Tuple(_) | Tp::List(_)) => {
                Some(binary_op!(BoolE::TupleGt, lhs, rhs))
            }
            (Op::Ge, Tp::Tuple(_) | Tp::List(_), Tp::Tuple(_) | Tp::List(_)) => {
                Some(binary_op!(BoolE::TupleGe, lhs, rhs))
            }
            _ => None,
        }
    }

    #[staticmethod]
    fn cast(expr: &PyExpression, source_type: Type, target_type: Type) -> Option<Self> {
        type Tp = Type;
        match (source_type, target_type) {
            (Tp::Int, Tp::Float) => Some(unary_op!(FloatExpression::CastFromInt, expr)),
            (Tp::Int, Tp::Bool) => Some(unary_op!(BoolExpression::CastFromInt, expr)),
            (Tp::Int, Tp::String) => Some(unary_op!(StringExpression::CastFromInt, expr)),
            (Tp::Float, Tp::Int) => Some(unary_op!(IntExpression::CastFromFloat, expr)),
            (Tp::Float, Tp::Bool) => Some(unary_op!(BoolExpression::CastFromFloat, expr)),
            (Tp::Float, Tp::String) => Some(unary_op!(StringExpression::CastFromFloat, expr)),
            (Tp::Bool, Tp::Int) => Some(unary_op!(IntExpression::CastFromBool, expr)),
            (Tp::Bool, Tp::Float) => Some(unary_op!(FloatExpression::CastFromBool, expr)),
            (Tp::Bool, Tp::String) => Some(unary_op!(StringExpression::CastFromBool, expr)),
            (Tp::String, Tp::Int) => Some(unary_op!(IntExpression::CastFromString, expr)),
            (Tp::String, Tp::Float) => Some(unary_op!(FloatExpression::CastFromString, expr)),
            (Tp::String, Tp::Bool) => Some(unary_op!(BoolExpression::CastFromString, expr)),
            _ => None,
        }
    }

    #[staticmethod]
    fn cast_optional(expr: &PyExpression, source_type: Type, target_type: Type) -> Option<Self> {
        type Tp = Type;
        match (target_type, source_type) {
            (Tp::Int, Tp::Float) => Some(unary_op!(
                AnyExpression::CastToOptionalIntFromOptionalFloat,
                expr
            )),
            (Tp::Float, Tp::Int) => Some(unary_op!(
                AnyExpression::CastToOptionalFloatFromOptionalInt,
                expr
            )),
            _ => None,
        }
    }

    #[staticmethod]
    fn convert(
        expr: &PyExpression,
        default: &PyExpression,
        source_type: Type,
        target_type: Type,
        unwrap: bool,
    ) -> Option<Self> {
        type Tp = Type;
        match (&source_type, &target_type) {
            (Tp::Json, Tp::Int | Tp::Float | Tp::Bool | Tp::String) => Some(Self::new(
                Arc::new(Expression::Any(AnyExpression::JsonToValue(
                    expr.inner.clone(),
                    default.inner.clone(),
                    target_type,
                    unwrap,
                ))),
                expr.gil || default.gil,
            )),
            _ => None,
        }
    }

    #[staticmethod]
    fn if_else(if_: &PyExpression, then: &PyExpression, else_: &PyExpression) -> Self {
        Self::new(
            Arc::new(Expression::Any(AnyExpression::IfElse(
                if_.inner.clone(),
                then.inner.clone(),
                else_.inner.clone(),
            ))),
            if_.gil || then.gil || else_.gil,
        )
    }

    #[staticmethod]
    #[pyo3(signature = (*args, optional = false, instance = None))]
    fn pointer_from(
        args: Vec<PyRef<PyExpression>>,
        optional: bool,
        instance: Option<&PyExpression>,
    ) -> Self {
        let gil = args.iter().any(|a| a.gil);
        let args = args
            .into_iter()
            .map(|expr| expr.inner.clone())
            .collect_vec();
        let expr = match (optional, instance) {
            (false, None) => Arc::new(Expression::Pointer(PointerExpression::PointerFrom(
                args.into(),
            ))),
            (false, Some(instance)) => Arc::new(Expression::Pointer(
                PointerExpression::PointerWithInstanceFrom(args.into(), instance.inner.clone()),
            )),
            (true, None) => Arc::new(Expression::Any(AnyExpression::OptionalPointerFrom(
                args.into(),
            ))),
            (true, Some(instance)) => Arc::new(Expression::Any(
                AnyExpression::OptionalPointerWithInstanceFrom(args.into(), instance.inner.clone()),
            )),
        };
        Self::new(expr, gil)
    }

    #[staticmethod]
    #[pyo3(signature = (*args))]
    fn make_tuple(args: Vec<PyRef<PyExpression>>) -> Self {
        let gil = args.iter().any(|a| a.gil);
        let args = args
            .into_iter()
            .map(|expr| expr.inner.clone())
            .collect_vec();
        Self::new(
            Arc::new(Expression::Any(AnyExpression::MakeTuple(args.into()))),
            gil,
        )
    }

    #[staticmethod]
    fn sequence_get_item_checked(
        expr: &PyExpression,
        index: &PyExpression,
        default: &PyExpression,
    ) -> Self {
        Self::new(
            Arc::new(Expression::Any(AnyExpression::TupleGetItemChecked(
                expr.inner.clone(),
                index.inner.clone(),
                default.inner.clone(),
            ))),
            expr.gil || index.gil || default.gil,
        )
    }

    #[staticmethod]
    fn json_get_item_checked(
        expr: &PyExpression,
        index: &PyExpression,
        default: &PyExpression,
    ) -> Self {
        Self::new(
            Arc::new(Expression::Any(AnyExpression::JsonGetItem(
                expr.inner.clone(),
                index.inner.clone(),
                default.inner.clone(),
            ))),
            expr.gil || index.gil || default.gil,
        )
    }

    #[staticmethod]
    fn json_get_item_unchecked(expr: &PyExpression, index: &PyExpression) -> Self {
        Self::new(
            Arc::new(Expression::Any(AnyExpression::JsonGetItem(
                expr.inner.clone(),
                index.inner.clone(),
                Arc::new(Expression::Any(AnyExpression::Const(Value::from(
                    serde_json::Value::Null,
                )))),
            ))),
            expr.gil || index.gil,
        )
    }
}

unary_expr!(is_none, BoolExpression::IsNone);
binary_expr!(eq, BoolExpression::Eq);
binary_expr!(ne, BoolExpression::Ne);
unary_expr!(int_abs, IntExpression::Abs);
unary_expr!(float_abs, FloatExpression::Abs);
binary_expr!(
    sequence_get_item_unchecked,
    AnyExpression::TupleGetItemUnchecked
);
unary_expr!(
    date_time_naive_nanosecond,
    IntExpression::DateTimeNaiveNanosecond
);
unary_expr!(
    date_time_naive_microsecond,
    IntExpression::DateTimeNaiveMicrosecond
);
unary_expr!(
    date_time_naive_millisecond,
    IntExpression::DateTimeNaiveMillisecond
);
unary_expr!(date_time_naive_second, IntExpression::DateTimeNaiveSecond);
unary_expr!(date_time_naive_minute, IntExpression::DateTimeNaiveMinute);
unary_expr!(date_time_naive_hour, IntExpression::DateTimeNaiveHour);
unary_expr!(date_time_naive_day, IntExpression::DateTimeNaiveDay);
unary_expr!(date_time_naive_month, IntExpression::DateTimeNaiveMonth);
unary_expr!(date_time_naive_year, IntExpression::DateTimeNaiveYear);
unary_expr!(
    date_time_naive_timestamp_ns,
    IntExpression::DateTimeNaiveTimestampNs
);
binary_expr!(
    date_time_naive_timestamp,
    FloatExpression::DateTimeNaiveTimestamp
);
unary_expr!(date_time_naive_weekday, IntExpression::DateTimeNaiveWeekday);
binary_expr!(date_time_naive_strptime, DateTimeNaiveExpression::Strptime);
binary_expr!(
    date_time_naive_strftime,
    StringExpression::DateTimeNaiveStrftime
);
binary_expr!(
    date_time_naive_from_timestamp,
    DateTimeNaiveExpression::FromTimestamp
);
binary_expr!(
    date_time_naive_from_float_timestamp,
    DateTimeNaiveExpression::FromFloatTimestamp
);
binary_expr!(date_time_naive_to_utc, DateTimeUtcExpression::FromNaive);
binary_expr!(date_time_naive_round, DateTimeNaiveExpression::Round);
binary_expr!(date_time_naive_floor, DateTimeNaiveExpression::Floor);
unary_expr!(
    date_time_utc_nanosecond,
    IntExpression::DateTimeUtcNanosecond
);
unary_expr!(
    date_time_utc_microsecond,
    IntExpression::DateTimeUtcMicrosecond
);
unary_expr!(
    date_time_utc_millisecond,
    IntExpression::DateTimeUtcMillisecond
);
unary_expr!(date_time_utc_second, IntExpression::DateTimeUtcSecond);
unary_expr!(date_time_utc_minute, IntExpression::DateTimeUtcMinute);
unary_expr!(date_time_utc_hour, IntExpression::DateTimeUtcHour);
unary_expr!(date_time_utc_day, IntExpression::DateTimeUtcDay);
unary_expr!(date_time_utc_month, IntExpression::DateTimeUtcMonth);
unary_expr!(date_time_utc_year, IntExpression::DateTimeUtcYear);
unary_expr!(
    date_time_utc_timestamp_ns,
    IntExpression::DateTimeUtcTimestampNs
);
binary_expr!(
    date_time_utc_timestamp,
    FloatExpression::DateTimeUtcTimestamp
);
unary_expr!(date_time_utc_weekday, IntExpression::DateTimeUtcWeekday);
binary_expr!(date_time_utc_strptime, DateTimeUtcExpression::Strptime);
binary_expr!(
    date_time_utc_strftime,
    StringExpression::DateTimeUtcStrftime
);
binary_expr!(date_time_utc_to_naive, DateTimeNaiveExpression::FromUtc);
binary_expr!(date_time_utc_round, DateTimeUtcExpression::Round);
binary_expr!(date_time_utc_floor, DateTimeUtcExpression::Floor);
binary_expr!(to_duration, DurationExpression::FromTimeUnit);
unary_expr!(duration_nanoseconds, IntExpression::DurationNanoseconds);
unary_expr!(duration_microseconds, IntExpression::DurationMicroseconds);
unary_expr!(duration_milliseconds, IntExpression::DurationMilliseconds);
unary_expr!(duration_seconds, IntExpression::DurationSeconds);
unary_expr!(duration_minutes, IntExpression::DurationMinutes);
unary_expr!(duration_hours, IntExpression::DurationHours);
unary_expr!(duration_days, IntExpression::DurationDays);
unary_expr!(duration_weeks, IntExpression::DurationWeeks);
unary_expr!(unwrap, AnyExpression::Unwrap);
unary_expr!(to_string, StringExpression::ToString);
unary_expr!(parse_int, AnyExpression::ParseStringToInt, optional: bool);
unary_expr!(parse_float, AnyExpression::ParseStringToFloat, optional: bool);
unary_expr!(
    parse_bool,
    AnyExpression::ParseStringToBool,
    true_list: Vec<String>,
    false_list: Vec<String>,
    optional: bool
);
binary_expr!(fill_error, AnyExpression::FillError);

#[pyclass(module = "pathway.engine", frozen, name = "PathwayType")]
pub struct PathwayType(Type);

#[pymethods]
impl PathwayType {
    #[classattr]
    pub const ANY: Type = Type::Any;
    #[classattr]
    pub const BOOL: Type = Type::Bool;
    #[classattr]
    pub const INT: Type = Type::Int;
    #[classattr]
    pub const FLOAT: Type = Type::Float;
    #[classattr]
    pub const POINTER: Type = Type::Pointer;
    #[classattr]
    pub const STRING: Type = Type::String;
    #[classattr]
    pub const DATE_TIME_NAIVE: Type = Type::DateTimeNaive;
    #[classattr]
    pub const DATE_TIME_UTC: Type = Type::DateTimeUtc;
    #[classattr]
    pub const DURATION: Type = Type::Duration;
    #[staticmethod]
    #[pyo3(signature = (dim, wrapped))]
    pub fn array(dim: Option<usize>, wrapped: Type) -> Type {
        Type::Array(dim, wrapped.into())
    }
    #[classattr]
    pub const JSON: Type = Type::Json;
    #[staticmethod]
    #[pyo3(signature = (*args))]
    pub fn tuple(args: Vec<Type>) -> Type {
        Type::Tuple(args.into())
    }
    #[staticmethod]
    pub fn list(arg: Type) -> Type {
        Type::List(arg.into())
    }
    #[classattr]
    pub const BYTES: Type = Type::Bytes;
    #[classattr]
    pub const PY_OBJECT_WRAPPER: Type = Type::PyObjectWrapper;
    #[staticmethod]
    pub fn optional(wrapped: Type) -> Type {
        Type::Optional(wrapped.into())
    }
    #[staticmethod]
    pub fn future(wrapped: Type) -> Type {
        Type::Future(wrapped.into())
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "ReadMethod")]
pub struct PyReadMethod(ReadMethod);

#[pymethods]
impl PyReadMethod {
    #[classattr]
    pub const BY_LINE: ReadMethod = ReadMethod::ByLine;
    #[classattr]
    pub const FULL: ReadMethod = ReadMethod::Full;
}

#[pyclass(module = "pathway.engine", frozen, name = "FieldSource")]
pub struct PyFieldSource(FieldSource);

#[pymethods]
impl PyFieldSource {
    #[classattr]
    pub const KEY: FieldSource = FieldSource::Key;
    #[classattr]
    pub const PAYLOAD: FieldSource = FieldSource::Payload;
}

impl<'py> FromPyObject<'py> for FieldSource {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyFieldSource>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for FieldSource {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyFieldSource(self).into_bound_py_any(py)
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "ConnectorMode")]
pub struct PyConnectorMode(ConnectorMode);

#[pymethods]
impl PyConnectorMode {
    #[classattr]
    pub const STATIC: ConnectorMode = ConnectorMode::Static;
    #[classattr]
    pub const STREAMING: ConnectorMode = ConnectorMode::Streaming;

    pub fn __eq__(&self, other: &Self) -> bool {
        self.0 == other.0
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "SessionType")]
pub struct PySessionType(SessionType);

#[pymethods]
impl PySessionType {
    #[classattr]
    pub const NATIVE: SessionType = SessionType::Native;
    #[classattr]
    pub const UPSERT: SessionType = SessionType::Upsert;
}

#[pyclass(module = "pathway.engine", frozen, name = "PythonConnectorEventType")]
pub struct PyPythonConnectorEventType(PythonConnectorEventType);

#[pymethods]
impl PyPythonConnectorEventType {
    #[classattr]
    pub const INSERT: PythonConnectorEventType = PythonConnectorEventType::Insert;
    #[classattr]
    pub const DELETE: PythonConnectorEventType = PythonConnectorEventType::Delete;
    #[classattr]
    pub const EXTERNAL_OFFSET: PythonConnectorEventType = PythonConnectorEventType::ExternalOffset;
}

#[pyclass(module = "pathway.engine", frozen, name = "DebeziumDBType")]
pub struct PyDebeziumDBType(DebeziumDBType);

#[pymethods]
impl PyDebeziumDBType {
    #[classattr]
    pub const POSTGRES: DebeziumDBType = DebeziumDBType::Postgres;
    #[classattr]
    pub const MONGO_DB: DebeziumDBType = DebeziumDBType::MongoDB;
}

#[pyclass(module = "pathway.engine", frozen, name = "KeyGenerationPolicy")]
pub struct PyKeyGenerationPolicy(KeyGenerationPolicy);

#[pymethods]
impl PyKeyGenerationPolicy {
    #[classattr]
    pub const ALWAYS_AUTOGENERATE: KeyGenerationPolicy = KeyGenerationPolicy::AlwaysAutogenerate;
    #[classattr]
    pub const PREFER_MESSAGE_KEY: KeyGenerationPolicy = KeyGenerationPolicy::PreferMessageKey;
}

#[pyclass(module = "pathway.engine", frozen, name = "MonitoringLevel")]
pub struct PyMonitoringLevel(MonitoringLevel);

#[pymethods]
impl PyMonitoringLevel {
    #[classattr]
    pub const NONE: MonitoringLevel = MonitoringLevel::None;

    #[classattr]
    pub const IN_OUT: MonitoringLevel = MonitoringLevel::InOut;

    #[classattr]
    pub const ALL: MonitoringLevel = MonitoringLevel::All;
}

#[pyclass(module = "pathway.engine", frozen, name = "TableWriterInitMode")]
pub struct PyTableWriterInitMode(TableWriterInitMode);

#[pymethods]
impl PyTableWriterInitMode {
    #[classattr]
    pub const DEFAULT: TableWriterInitMode = TableWriterInitMode::Default;
    #[classattr]
    pub const CREATE_IF_NOT_EXISTS: TableWriterInitMode = TableWriterInitMode::CreateIfNotExists;
    #[classattr]
    pub const REPLACE: TableWriterInitMode = TableWriterInitMode::Replace;
}

#[pyclass(module = "pathway.engine", frozen)]
pub struct Universe {
    scope: Py<Scope>,
    handle: UniverseHandle,
}

impl Universe {
    fn new<'py>(scope: &Bound<'py, Scope>, handle: UniverseHandle) -> PyResult<Bound<'py, Self>> {
        let py = scope.py();
        if let Some(universe) = scope.borrow().universes.get(py).borrow().get(&handle) {
            return Ok(universe.bind(py).clone());
        }
        let res = Bound::new(
            py,
            Self {
                scope: scope.clone().unbind(),
                handle,
            },
        )?;
        scope
            .borrow()
            .universes
            .get(py)
            .borrow_mut()
            .insert(handle, res.clone().unbind());
        Ok(res)
    }
}

#[pymethods]
impl Universe {
    pub fn __repr__(&self) -> String {
        format!("<Universe {:?}>", self.handle)
    }
}

#[pyclass(module = "pathway.engine", frozen, subclass)]
pub struct ComplexColumn;

impl ComplexColumn {
    fn output_universe(self_: &Bound<Self>) -> Option<Py<Universe>> {
        if let Ok(_column) = self_.downcast_exact::<Column>() {
            None
        } else if let Ok(computer) = self_.downcast_exact::<Computer>() {
            let py = computer.py();
            let computer = computer.get();
            computer.is_output.then(|| computer.universe.clone_ref(py))
        } else {
            unreachable!("Unknown ComplexColumn subclass");
        }
    }

    fn to_engine(self_: &Bound<Self>) -> EngineComplexColumn {
        if let Ok(column) = self_.downcast_exact::<Column>() {
            EngineComplexColumn::Column(column.borrow().handle)
        } else if let Ok(computer) = self_.downcast_exact::<Computer>() {
            Computer::to_engine(computer)
        } else {
            unreachable!("Unknown ComplexColumn subclass");
        }
    }
}

#[pyclass(module = "pathway.engine", frozen, extends = ComplexColumn)]
pub struct Column {
    #[pyo3(get)] // ?
    universe: Py<Universe>,
    handle: ColumnHandle,
}

impl Column {
    fn new<'py>(
        universe: &Bound<'py, Universe>,
        handle: ColumnHandle,
    ) -> PyResult<Bound<'py, Self>> {
        let py = universe.py();
        let universe_ref = universe.borrow();
        let scope = universe_ref.scope.borrow(py);
        if let Some(column) = scope.columns.get(py).borrow().get(&handle) {
            let column = column.bind(py).clone();
            assert!(column.get().universe.is(universe));
            return Ok(column);
        }
        let res = Bound::new(
            py,
            (
                Self {
                    universe: universe.clone().unbind(),
                    handle,
                },
                ComplexColumn,
            ),
        )?;
        scope
            .columns
            .get(py)
            .borrow_mut()
            .insert(handle, res.clone().unbind());
        Ok(res)
    }
}

#[pymethods]
impl Column {
    pub fn __repr__(&self, py: Python) -> String {
        format!(
            "<Column universe={:?} {:?}>",
            self.universe.borrow(py).handle,
            self.handle
        )
    }
}

#[pyclass(module = "pathway.engine", frozen)]
pub struct LegacyTable {
    #[pyo3(get)] // ?
    universe: Py<Universe>,

    #[pyo3(get)] // ?
    columns: Vec<Py<Column>>,
}

#[pymethods]
impl LegacyTable {
    #[new]
    pub fn new(
        universe: Bound<Universe>,
        #[pyo3(from_py_with = from_py_iterable)] columns: Vec<Py<Column>>,
    ) -> PyResult<Self> {
        let py = universe.py();
        for column in &columns {
            check_identity(
                &column.borrow(py).universe,
                universe.as_unbound(),
                "universe mismatch",
            )?;
        }
        Ok(Self {
            universe: universe.unbind(),
            columns,
        })
    }

    pub fn __repr__(&self, py: Python) -> String {
        format!(
            "<LegacyTable universe={:?} columns=[{}]>",
            self.universe.borrow(py).handle,
            self.columns.iter().format_with(", ", |column, f| {
                f(&format_args!("{:?}", column.borrow(py).handle))
            })
        )
    }
}

impl LegacyTable {
    fn to_engine(&self) -> (UniverseHandle, Vec<ColumnHandle>) {
        let universe = self.universe.get();
        let column_handles = self.columns.iter().map(|c| c.get().handle).collect();
        (universe.handle, column_handles)
    }

    fn from_handles<'py>(
        scope: &Bound<'py, Scope>,
        universe_handle: UniverseHandle,
        column_handles: impl IntoIterator<Item = ColumnHandle>,
    ) -> PyResult<Bound<'py, Self>> {
        let py = scope.py();
        let universe = Universe::new(scope, universe_handle)?;
        let columns = column_handles
            .into_iter()
            .map(|column_handle| Ok(Column::new(&universe, column_handle)?.unbind()))
            .collect::<PyResult<_>>()?;
        Bound::new(py, Self::new(universe, columns)?)
    }

    fn from_engine<'py>(
        scope: &Bound<'py, Scope>,
        table: EngineLegacyTable,
    ) -> PyResult<Bound<'py, Self>> {
        let (universe_handle, column_handles) = table;
        Self::from_handles(scope, universe_handle, column_handles)
    }
}

#[pyclass(module = "pathway.engine", frozen)]
pub struct Table {
    scope: Py<Scope>,
    handle: TableHandle,
}

impl Table {
    fn new(scope: &Bound<Scope>, handle: TableHandle) -> PyResult<Py<Self>> {
        let py = scope.py();
        if let Some(table) = scope.borrow().tables.get(py).borrow().get(&handle) {
            return Ok(table.clone_ref(py));
        }
        let res = Py::new(
            py,
            Self {
                scope: scope.clone().unbind(),
                handle,
            },
        )?;
        scope
            .borrow()
            .tables
            .get(py)
            .borrow_mut()
            .insert(handle, res.clone_ref(py));
        Ok(res)
    }
}

#[pyclass(module = "pathway.engine", frozen)]
pub struct ErrorLog {
    scope: Py<Scope>,
    handle: ErrorLogHandle,
}

impl ErrorLog {
    fn new(scope: &Bound<Scope>, handle: ErrorLogHandle) -> PyResult<Py<Self>> {
        let py = scope.py();
        if let Some(error_log) = scope.borrow().error_logs.get(py).borrow().get(&handle) {
            return Ok(error_log.clone_ref(py));
        }
        let res = Py::new(
            py,
            Self {
                scope: scope.clone().unbind(),
                handle,
            },
        )?;
        scope
            .borrow()
            .error_logs
            .get(py)
            .borrow_mut()
            .insert(handle, res.clone_ref(py));
        Ok(res)
    }
}

// submodule to make sure no other code can create instances of `Error`
mod error {
    use once_cell::sync::Lazy;
    use pyo3::prelude::*;

    struct InnerError;
    #[pyclass(module = "pathway.engine", frozen)]
    pub struct Error(InnerError);

    #[pymethods]
    impl Error {
        #[allow(clippy::unused_self)]
        fn __repr__(&self) -> &'static str {
            "Error"
        }
    }

    pub static ERROR: Lazy<Py<Error>> = Lazy::new(|| {
        Python::with_gil(|py| {
            Py::new(py, Error(InnerError)).expect("creating ERROR should not fail")
        })
    });
}
use error::{Error, ERROR};

mod pending {
    use once_cell::sync::Lazy;
    use pyo3::prelude::*;

    struct InnerPending;
    #[pyclass(module = "pathway.engine", frozen)]
    pub struct Pending(InnerPending);

    #[pymethods]
    impl Pending {
        #[allow(clippy::unused_self)]
        fn __repr__(&self) -> &'static str {
            "Pending"
        }
    }

    pub static PENDING: Lazy<Py<Pending>> = Lazy::new(|| {
        Python::with_gil(|py| {
            Py::new(py, Pending(InnerPending)).expect("creating PENDING should not fail")
        })
    });
}
use pending::{Pending, PENDING};

impl<'py> FromPyObject<'py> for ColumnPath {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        let py = ob.py();
        if ob.getattr(intern!(py, "is_key")).is_ok_and(|is_key| {
            is_key
                .extract::<bool>()
                .expect("is_key field in ColumnPath should be bool")
        }) {
            Ok(Self::Key)
        } else if let Ok(path) = ob
            .getattr(intern!(py, "path"))
            .and_then(|path| path.extract())
        {
            Ok(Self::ValuePath(path))
        } else {
            Err(PyTypeError::new_err(format!(
                "can't convert {} to ColumnPath",
                ob.get_type().name()?
            )))
        }
    }
}

static MISSING_VALUE_ERROR_TYPE: Lazy<Py<PyType>> = Lazy::new(|| {
    Python::with_gil(|py| {
        PyErr::new_type(
            py,
            &CString::new("pathway.engine.MissingValueError").unwrap(),
            None,
            Some(&PyBaseException::type_object(py)),
            None,
        )
        .expect("creating MissingValueError type should not fail")
    })
});

static ENGINE_ERROR_TYPE: Lazy<Py<PyType>> = Lazy::new(|| {
    Python::with_gil(|py| {
        PyErr::new_type(
            py,
            &CString::new("pathway.engine.EngineError").unwrap(),
            None,
            Some(&PyException::type_object(py)),
            None,
        )
        .expect("creating EngineError type should not fail")
    })
});

static ENGINE_ERROR_WITH_TRACE_TYPE: Lazy<Py<PyType>> = Lazy::new(|| {
    Python::with_gil(|py| {
        PyErr::new_type(
            py,
            &CString::new("pathway.engine.EngineErrorWithTrace").unwrap(),
            None,
            Some(&PyException::type_object(py)),
            None,
        )
        .expect("creating EngineErrorWithTrace type should not fail")
    })
});

static OTHER_WORKER_ERROR: Lazy<Py<PyType>> = Lazy::new(|| {
    Python::with_gil(|py| {
        PyErr::new_type(
            py,
            &CString::new("pathway.engine.OtherWorkerError").unwrap(),
            None,
            Some(&PyException::type_object(py)),
            None,
        )
        .expect("creating OtherWorkerError type should not fail")
    })
});

#[pyclass(module = "pathway.engine", frozen)]
pub struct Context(SendWrapper<ScopedContext>);

#[allow(clippy::redundant_closure_for_method_calls)] // false positives
#[pymethods]
impl Context {
    #[getter]
    fn this_row(&self) -> PyResult<Key> {
        self.0
            .with(|context| context.this_row())
            .ok_or_else(|| PyValueError::new_err("context out of scope"))
    }

    #[getter]
    fn data(&self) -> PyResult<Value> {
        self.0
            .with(|context| context.data())
            .ok_or_else(|| PyValueError::new_err("context out of scope"))
    }

    #[pyo3(signature=(column, row, *args))]
    fn raising_get(
        &self,
        py: Python,
        column: usize,
        row: Key,
        args: Vec<Value>,
    ) -> PyResult<Value> {
        self.0
            .with(|context| {
                context
                    .get(column, row, args)
                    .ok_or_else(|| PyErr::from_type(MISSING_VALUE_ERROR_TYPE.bind(py).clone(), ()))
            })
            .unwrap_or_else(|| Err(PyValueError::new_err("context out of scope")))
    }
}

#[pyclass(module = "pathway.engine", frozen, extends = ComplexColumn)]
pub struct Computer {
    fun: Py<PyAny>,
    #[allow(unused)] // XXX
    dtype: Py<PyAny>,
    is_output: bool,
    is_method: bool,
    universe: Py<Universe>,
    data: Value,
    data_column: Option<Py<Column>>,
}

#[pymethods]
impl Computer {
    #[allow(clippy::too_many_arguments)]
    #[staticmethod]
    #[pyo3(signature = (
        fun,
        dtype,
        is_output,
        is_method,
        universe,
        data = Value::None,
        data_column = None,
    ))]
    pub fn from_raising_fun(
        py: Python,
        fun: Py<PyAny>,
        #[allow(unused)] dtype: Py<PyAny>,
        is_output: bool,
        is_method: bool,
        universe: Py<Universe>,
        data: Value,
        data_column: Option<Py<Column>>,
    ) -> PyResult<Py<Self>> {
        Py::new(
            py,
            (
                Self {
                    fun,
                    dtype,
                    is_output,
                    is_method,
                    universe,
                    data,
                    data_column,
                },
                ComplexColumn,
            ),
        )
    }
}

impl Computer {
    fn compute(
        &self,
        py: Python,
        engine_context: &dyn EngineContext,
        args: &[Value],
    ) -> PyResult<Option<Value>> {
        let context = Bound::new(py, Context(SendWrapper::new(ScopedContext::default())))?;
        let mut all_args = Vec::with_capacity(args.len() + 1);
        all_args.push(context.clone().into_any().unbind());
        let args: Vec<_> = args
            .iter()
            .map(|value| value.into_py_any(py))
            .try_collect()?;
        all_args.extend(args);
        let res = context.borrow().0.scoped(
            engine_context,
            || self.fun.bind(py).call1(PyTuple::new(py, all_args)?), // FIXME
        );
        match res {
            Ok(value) => Ok(Some(value.extract()?)),
            Err(error) => {
                if error.is_instance(py, MISSING_VALUE_ERROR_TYPE.bind(py)) {
                    Ok(None)
                } else {
                    Err(error)
                }
            }
        }
    }

    fn to_engine(self_: &Bound<Self>) -> EngineComplexColumn {
        let py = self_.py();
        let self_ref = self_.borrow();
        let computer: Py<Self> = self_.clone().unbind();
        let engine_computer = if self_ref.is_method {
            let data_column_handle = self_ref
                .data_column
                .as_ref()
                .map(|data_column| data_column.borrow(py).handle);
            EngineComputer::Method {
                logic: Box::new(move |engine_context, args| {
                    let engine_context = SendWrapper::new(engine_context);
                    Ok(Python::with_gil(|py| {
                        let engine_context = engine_context.take();
                        computer.borrow(py).compute(py, engine_context, args)
                    })?)
                }),
                universe_handle: self_ref.universe.borrow(py).handle,
                data: self_ref.data.clone(),
                data_column_handle,
            }
        } else {
            // XXX: check these asserts in constructor
            assert_eq!(self_ref.data, Value::None);
            assert!(self_ref.data_column.is_none());
            EngineComputer::Attribute {
                logic: Box::new(move |engine_context| {
                    let engine_context = SendWrapper::new(engine_context);
                    Ok(Python::with_gil(|py| {
                        let engine_context = engine_context.take();
                        computer.borrow(py).compute(py, engine_context, &[])
                    })?)
                }),
                universe_handle: self_ref.universe.borrow(py).handle,
            }
        };
        if self_ref.is_output {
            EngineComplexColumn::ExternalComputer(engine_computer)
        } else {
            EngineComplexColumn::InternalComputer(engine_computer)
        }
    }
}

#[pyclass(module = "pathway.engine", frozen)]
pub struct Scope {
    #[pyo3(get)]
    parent: Option<Py<Scope>>,
    license: Option<License>,
    graph: SendWrapper<ScopedGraph>,
    is_persisted: bool,
    timestamp_at_start: Timestamp,

    // empty_universe: Lazy<Py<Universe>>,
    universes: GILProtected<RefCell<HashMap<UniverseHandle, Py<Universe>>>>,
    columns: GILProtected<RefCell<HashMap<ColumnHandle, Py<Column>>>>,
    tables: GILProtected<RefCell<HashMap<TableHandle, Py<Table>>>>,
    error_logs: GILProtected<RefCell<HashMap<ErrorLogHandle, Py<ErrorLog>>>>,
    unique_names: GILProtected<RefCell<HashSet<UniqueName>>>,
    event_loop: PyObject,
    total_connectors: GILProtected<RefCell<usize>>,
}

impl Scope {
    fn new(
        parent: Option<Py<Self>>,
        event_loop: PyObject,
        license: Option<License>,
        is_persisted: bool,
        timestamp_at_start: Timestamp,
    ) -> Self {
        Scope {
            parent,
            license,
            is_persisted,
            graph: SendWrapper::new(ScopedGraph::new()),
            universes: GILProtected::new(RefCell::new(HashMap::new())),
            columns: GILProtected::new(RefCell::new(HashMap::new())),
            tables: GILProtected::new(RefCell::new(HashMap::new())),
            error_logs: GILProtected::new(RefCell::new(HashMap::new())),
            unique_names: GILProtected::new(RefCell::new(HashSet::new())),
            event_loop,
            total_connectors: GILProtected::new(RefCell::new(0)),
            timestamp_at_start,
        }
    }

    fn clear_caches(&self, py: Python<'_>) {
        self.universes.get(py).borrow_mut().clear();
        self.columns.get(py).borrow_mut().clear();
        self.tables.get(py).borrow_mut().clear();
        self.error_logs.get(py).borrow_mut().clear();
    }

    fn register_unique_name(
        &self,
        unique_name: Option<&UniqueName>,
        py: Python<'_>,
    ) -> PyResult<()> {
        if let Some(unique_name) = &unique_name {
            let is_unique_id = self
                .unique_names
                .get(py)
                .borrow_mut()
                .insert((*unique_name).clone());
            if !is_unique_id {
                return Err(PyValueError::new_err(format!(
                    "Unique name '{unique_name}' used more than once"
                )));
            }
        }
        Ok(())
    }
}

#[pymethods]
impl Scope {
    #[getter]
    pub fn worker_index(&self) -> usize {
        self.graph.worker_index()
    }

    #[getter]
    pub fn worker_count(&self) -> usize {
        self.graph.worker_count()
    }

    #[getter]
    pub fn thread_count(&self) -> usize {
        self.graph.thread_count()
    }

    #[getter]
    pub fn process_count(&self) -> usize {
        self.graph.process_count()
    }

    #[getter]
    pub fn event_loop(&self, py: Python<'_>) -> PyObject {
        self.event_loop.clone_ref(py)
    }

    pub fn empty_table(
        self_: &Bound<Self>,
        properties: ConnectorProperties,
    ) -> PyResult<Py<Table>> {
        let column_properties = properties.column_properties();
        let table_handle = self_
            .borrow()
            .graph
            .empty_table(Arc::new(EngineTableProperties::flat(column_properties)))?;
        Table::new(self_, table_handle)
    }

    pub fn static_universe<'py>(
        self_: &Bound<'py, Self>,
        #[pyo3(from_py_with = from_py_iterable)] keys: Vec<Key>,
    ) -> PyResult<Bound<'py, Universe>> {
        let handle = self_.borrow().graph.static_universe(keys)?;
        Universe::new(self_, handle)
    }

    pub fn static_column<'py>(
        self_: &Bound<'py, Self>,
        universe: &Bound<'py, Universe>,
        #[pyo3(from_py_with = from_py_iterable)] values: Vec<(Key, Value)>,
        properties: ColumnProperties,
    ) -> PyResult<Bound<'py, Column>> {
        check_identity(self_.as_unbound(), &universe.get().scope, "scope mismatch")?;
        let handle =
            self_
                .borrow()
                .graph
                .static_column(universe.get().handle, values, properties.0)?;
        Column::new(universe, handle)
    }

    pub fn static_table(
        self_: &Bound<Self>,
        #[pyo3(from_py_with = from_py_iterable)] data: Vec<DataRow>,
        properties: ConnectorProperties,
    ) -> PyResult<Py<Table>> {
        let column_properties = properties.column_properties();
        let handle = self_.borrow().graph.static_table(
            data,
            Arc::new(EngineTableProperties::flat(column_properties)),
        )?;
        Table::new(self_, handle)
    }

    pub fn connector_table(
        self_: &Bound<Self>,
        data_source: &Bound<DataStorage>,
        data_format: &Bound<DataFormat>,
        properties: &Bound<ConnectorProperties>,
    ) -> PyResult<Py<Table>> {
        let py = self_.py();

        let unique_name = properties.borrow().unique_name.clone();
        self_
            .borrow()
            .register_unique_name(unique_name.as_ref(), py)?;
        *self_.borrow().total_connectors.get(py).borrow_mut() += 1;
        let (reader_impl, parallel_readers) = data_source.borrow().construct_reader(
            py,
            &data_format.borrow(),
            &self_.borrow(),
            &properties.borrow(),
        )?;

        let parser_impl = data_format.borrow().construct_parser(py)?;

        let column_properties = properties.borrow().column_properties();
        let table_handle = self_.borrow().graph.connector_table(
            reader_impl,
            parser_impl,
            properties
                .borrow()
                .commit_duration_ms
                .map(time::Duration::from_millis),
            parallel_readers,
            Arc::new(EngineTableProperties::flat(column_properties)),
            unique_name.as_ref(),
            properties.borrow().synchronization_group.borrow().as_ref(),
            properties.borrow().max_backlog_size,
            self_.borrow().timestamp_at_start,
        )?;
        Table::new(self_, table_handle)
    }

    #[allow(clippy::type_complexity)]
    #[pyo3(signature = (iterated, iterated_with_universe, extra, logic, *, limit = None))]
    pub fn iterate<'py>(
        self_: &Bound<'py, Self>,
        #[pyo3(from_py_with = engine_tables_from_py_iterable)] iterated: Vec<EngineLegacyTable>,
        #[pyo3(from_py_with = engine_tables_from_py_iterable)] iterated_with_universe: Vec<
            EngineLegacyTable,
        >,
        #[pyo3(from_py_with = engine_tables_from_py_iterable)] extra: Vec<EngineLegacyTable>,
        logic: &Bound<'py, PyAny>,
        limit: Option<u32>,
    ) -> PyResult<(Vec<Bound<'py, LegacyTable>>, Vec<Bound<'py, LegacyTable>>)> {
        let py = self_.py();
        let (result, result_with_universe) = self_.borrow().graph.iterate(
            iterated,
            iterated_with_universe,
            extra,
            limit,
            Box::new(|graph, iterated, iterated_with_universe, extra| {
                let scope = Bound::new(
                    py,
                    Scope::new(
                        Some(self_.clone().unbind()),
                        self_.borrow().event_loop.clone_ref(py),
                        None,
                        false,
                        Timestamp::new_from_current_time(),
                    ),
                )?;
                scope.borrow().graph.scoped(graph, || {
                    let iterated = iterated
                        .into_iter()
                        .map(|table| LegacyTable::from_engine(&scope, table))
                        .collect::<PyResult<Vec<_>>>()?;
                    let iterated_with_universe = iterated_with_universe
                        .into_iter()
                        .map(|table| LegacyTable::from_engine(&scope, table))
                        .collect::<PyResult<Vec<_>>>()?;
                    let extra = extra
                        .into_iter()
                        .map(|table| LegacyTable::from_engine(&scope, table))
                        .collect::<PyResult<Vec<_>>>()?;
                    let (result, result_with_universe): (Bound<PyAny>, Bound<PyAny>) = logic
                        .call1((scope, iterated, iterated_with_universe, extra))?
                        .extract()?;
                    let result = result
                        .try_iter()?
                        .map(|table| {
                            let table: PyRef<LegacyTable> = table?.extract()?;
                            Ok(table.to_engine())
                        })
                        .collect::<PyResult<_>>()?;
                    let result_with_universe = result_with_universe
                        .try_iter()?
                        .map(|table| {
                            let table: PyRef<LegacyTable> = table?.extract()?;
                            Ok(table.to_engine())
                        })
                        .collect::<PyResult<_>>()?;
                    Ok((result, result_with_universe))
                })
            }),
        )?;
        let result = result
            .into_iter()
            .map(|table| LegacyTable::from_engine(self_, table))
            .collect::<PyResult<_>>()?;
        let result_with_universe = result_with_universe
            .into_iter()
            .map(|table| LegacyTable::from_engine(self_, table))
            .collect::<PyResult<_>>()?;
        Ok((result, result_with_universe))
    }

    pub fn map_column<'py>(
        self_: &Bound<'py, Self>,
        table: &LegacyTable,
        function: Py<PyAny>,
        properties: ColumnProperties,
    ) -> PyResult<Bound<'py, Column>> {
        let py = self_.py();
        let universe = table.universe.bind(py);
        let universe_ref = universe.get();
        check_identity(self_.as_unbound(), &universe_ref.scope, "scope mismatch")?;
        let column_handles = table.columns.iter().map(|c| c.borrow(py).handle).collect();
        let handle = self_.borrow().graph.expression_column(
            BatchWrapper::WithGil,
            Arc::new(Expression::Any(AnyExpression::Apply(
                Box::new(move |input| {
                    Python::with_gil(|py| -> Vec<DynResult<_>> {
                        input
                            .iter()
                            .map(|input_i| {
                                let inputs_i = PyTuple::new(py, *input_i)?;
                                Ok(function.call1(py, (inputs_i,))?.extract::<Value>(py)?)
                            })
                            .collect()
                    })
                }),
                Expressions::Arguments(0..table.columns.len()),
            ))),
            universe_ref.handle,
            column_handles,
            properties.0,
        )?;
        Column::new(universe, handle)
    }

    pub fn expression_table(
        self_: &Bound<Self>,
        table: &Table,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
        #[pyo3(from_py_with = from_py_iterable)] expressions: Vec<ExpressionData>,
        append_only_or_deterministic: bool,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_.borrow().graph.expression_table(
            table.handle,
            column_paths,
            expressions,
            append_only_or_deterministic,
        )?;
        Table::new(self_, table_handle)
    }

    pub fn columns_to_table(
        self_: &Bound<Self>,
        universe: &Bound<Universe>,
        #[pyo3(from_py_with = from_py_iterable)] columns: Vec<PyRef<Column>>,
    ) -> PyResult<Py<Table>> {
        let column_handles = columns.into_iter().map(|column| column.handle).collect();
        let table_handle = self_
            .borrow()
            .graph
            .columns_to_table(universe.borrow().handle, column_handles)?;
        Table::new(self_, table_handle)
    }

    pub fn table_column<'py>(
        self_: &Bound<'py, Self>,
        universe: &Bound<'py, Universe>,
        table: PyRef<Table>,
        column_path: ColumnPath,
    ) -> PyResult<Bound<'py, Column>> {
        let handle = self_.borrow().graph.table_column(
            universe.borrow().handle,
            table.handle,
            column_path,
        )?;
        Column::new(universe, handle)
    }

    pub fn table_universe<'py>(
        self_: &Bound<'py, Self>,
        table: &Table,
    ) -> PyResult<Bound<'py, Universe>> {
        let universe_handle = self_.borrow().graph.table_universe(table.handle)?;
        Universe::new(self_, universe_handle)
    }

    pub fn table_properties(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        path: ColumnPath,
    ) -> PyResult<Py<TableProperties>> {
        let py = self_.py();
        let properties = self_.borrow().graph.table_properties(table.handle, &path)?;
        TableProperties::new(py, properties)
    }

    pub fn flatten_table_storage(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_
            .borrow()
            .graph
            .flatten_table_storage(table.handle, column_paths)?;
        Table::new(self_, table_handle)
    }

    pub fn filter_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        filtering_column_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.filter_table(
            table.handle,
            filtering_column_path,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn remove_retractions_from_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_
            .borrow()
            .graph
            .remove_retractions_from_table(table.handle, table_properties.0)?;
        Table::new(self_, new_table_handle)
    }

    pub fn forget(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        threshold_column_path: ColumnPath,
        current_time_column_path: ColumnPath,
        instance_column_path: ColumnPath,
        mark_forgetting_records: bool,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.forget(
            table.handle,
            threshold_column_path,
            current_time_column_path,
            instance_column_path,
            mark_forgetting_records,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn forget_immediately(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_
            .borrow()
            .graph
            .forget_immediately(table.handle, table_properties.0)?;
        Table::new(self_, new_table_handle)
    }

    pub fn filter_out_results_of_forgetting(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        ensure_consistency: bool,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.filter_out_results_of_forgetting(
            table.handle,
            ensure_consistency,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn freeze(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        threshold_column_path: ColumnPath,
        current_time_column_path: ColumnPath,
        instance_column_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.freeze(
            table.handle,
            threshold_column_path,
            current_time_column_path,
            instance_column_path,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn gradual_broadcast(
        self_: &Bound<Self>,
        input_table: PyRef<Table>,
        threshold_table: PyRef<Table>,
        lower_path: ColumnPath,
        value_path: ColumnPath,
        upper_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.gradual_broadcast(
            input_table.handle,
            threshold_table.handle,
            lower_path,
            value_path,
            upper_path,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn use_external_index_as_of_now(
        self_: &Bound<Self>,
        index: &PyExternalIndexData,
        queries: &PyExternalIndexQuery,
        table_properties: TableProperties,
        external_index_factory: PyExternalIndexFactory,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.use_external_index_as_of_now(
            index.to_external_index_data(),
            queries.to_external_index_query(),
            table_properties.0,
            external_index_factory.inner.make_instance()?,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn buffer(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        threshold_column_path: ColumnPath,
        current_time_column_path: ColumnPath,
        instance_column_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.buffer(
            table.handle,
            threshold_column_path,
            current_time_column_path,
            instance_column_path,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn intersect_tables(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] tables: Vec<PyRef<Table>>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handles = tables.into_iter().map(|table| table.handle).collect();
        let result_table_handle = self_.borrow().graph.intersect_tables(
            table.handle,
            table_handles,
            table_properties.0,
        )?;
        Table::new(self_, result_table_handle)
    }

    pub fn subtract_table(
        self_: &Bound<Self>,
        left_table: PyRef<Table>,
        right_table: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let result_table_handle = self_.borrow().graph.subtract_table(
            left_table.handle,
            right_table.handle,
            table_properties.0,
        )?;
        Table::new(self_, result_table_handle)
    }

    pub fn concat_tables(
        self_: &Bound<Self>,
        #[pyo3(from_py_with = from_py_iterable)] tables: Vec<PyRef<Table>>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handles = tables.into_iter().map(|table| table.handle).collect();
        let table_handle = self_
            .borrow()
            .graph
            .concat_tables(table_handles, table_properties.0)?;
        Table::new(self_, table_handle)
    }

    pub fn flatten_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        flatten_column_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.flatten_table(
            table.handle,
            flatten_column_path,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn sort_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        key_column_path: ColumnPath,
        instance_column_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.sort_table(
            table.handle,
            key_column_path,
            instance_column_path,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    pub fn reindex_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        reindexing_column_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let result_table_handle = self_.borrow().graph.reindex_table(
            table.handle,
            reindexing_column_path,
            table_properties.0,
        )?;
        Table::new(self_, result_table_handle)
    }

    pub fn restrict_column<'py>(
        self_: &Bound<'py, Self>,
        universe: &Bound<'py, Universe>,
        column: &Bound<'py, Column>,
    ) -> PyResult<Bound<'py, Column>> {
        check_identity(self_.as_unbound(), &universe.get().scope, "scope mismatch")?;
        let column_universe = column.get().universe.bind(self_.py());
        check_identity(
            self_.as_unbound(),
            &column_universe.borrow().scope,
            "scope mismatch",
        )?;
        let new_column_handle = self_
            .borrow()
            .graph
            .restrict_column(universe.get().handle, column.get().handle)?;
        Column::new(universe, new_column_handle)
    }

    pub fn restrict_table(
        self_: &Bound<Self>,
        orig_table: PyRef<Table>,
        new_table: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let result_table_handle = self_.borrow().graph.restrict_or_override_table_universe(
            orig_table.handle,
            new_table.handle,
            false,
            table_properties.0,
        )?;
        Table::new(self_, result_table_handle)
    }

    pub fn override_table_universe(
        self_: &Bound<Self>,
        orig_table: PyRef<Table>,
        new_table: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let result_table_handle = self_.borrow().graph.restrict_or_override_table_universe(
            orig_table.handle,
            new_table.handle,
            true,
            table_properties.0,
        )?;
        Table::new(self_, result_table_handle)
    }

    pub fn table<'py>(
        self_: &Bound<'py, Self>,
        universe: &Bound<'py, Universe>,
        columns: &Bound<'py, PyAny>,
    ) -> PyResult<LegacyTable> {
        check_identity(
            self_.as_unbound(),
            &universe.borrow().scope,
            "scope mismatch",
        )?;
        let columns = columns
            .try_iter()?
            .map(|column| {
                let column = column?;
                let column = column.downcast()?;
                let restricted = Self::restrict_column(self_, universe, column)?;
                Ok(restricted.unbind())
            })
            .collect::<PyResult<_>>()?;
        LegacyTable::new(universe.clone(), columns)
    }

    #[pyo3(signature = (table, grouping_columns_paths, last_column_is_instance, reducers, set_id, table_properties))]
    pub fn group_by_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] grouping_columns_paths: Vec<ColumnPath>,
        last_column_is_instance: bool,
        #[pyo3(from_py_with = from_py_iterable)] reducers: Vec<ReducerData>,
        set_id: bool,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_.borrow().graph.group_by_table(
            table.handle,
            grouping_columns_paths,
            ShardPolicy::from_last_column_is_instance(last_column_is_instance),
            reducers,
            set_id,
            table_properties.0,
        )?;
        Table::new(self_, table_handle)
    }

    #[pyo3(signature = (table, grouping_columns_paths, reduced_column_paths, combine, unique_name, table_properties))]
    pub fn deduplicate(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] grouping_columns_paths: Vec<ColumnPath>,
        #[pyo3(from_py_with = from_py_iterable)] reduced_column_paths: Vec<ColumnPath>,
        combine: Py<PyAny>,
        unique_name: Option<UniqueName>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_.borrow().graph.deduplicate(
            table.handle,
            grouping_columns_paths,
            reduced_column_paths,
            wrap_stateful_combine(combine),
            unique_name.as_ref(),
            table_properties.0,
        )?;
        Table::new(self_, table_handle)
    }

    pub fn ix_table(
        self_: &Bound<Self>,
        to_ix_table: PyRef<Table>,
        key_table: PyRef<Table>,
        key_column_path: ColumnPath,
        optional: bool,
        strict: bool,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let ix_key_policy = IxKeyPolicy::from_strict_optional(strict, optional)?;
        let result_table_handle = self_.borrow().graph.ix_table(
            to_ix_table.handle,
            key_table.handle,
            key_column_path,
            ix_key_policy,
            table_properties.0,
        )?;
        Table::new(self_, result_table_handle)
    }

    #[pyo3(signature = (left_table, right_table, left_column_paths, right_column_paths, *,
        last_column_is_instance, table_properties, assign_id = false, left_ear = false,
        right_ear = false, left_exactly_once = false, right_exactly_once = false))]
    #[allow(clippy::too_many_arguments)]
    #[allow(clippy::fn_params_excessive_bools)]
    pub fn join_tables(
        self_: &Bound<Self>,
        left_table: PyRef<Table>,
        right_table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] left_column_paths: Vec<ColumnPath>,
        #[pyo3(from_py_with = from_py_iterable)] right_column_paths: Vec<ColumnPath>,
        last_column_is_instance: bool,
        table_properties: TableProperties,
        assign_id: bool,
        left_ear: bool,
        right_ear: bool,
        left_exactly_once: bool,
        right_exactly_once: bool,
    ) -> PyResult<Py<Table>> {
        let join_type = JoinType::from_assign_left_right(assign_id, left_ear, right_ear)?;
        let join_exactly_once = JoinExactlyOnce::new(left_exactly_once, right_exactly_once);
        let table_handle = self_.borrow().graph.join_tables(
            JoinData::new(left_table.handle, left_column_paths),
            JoinData::new(right_table.handle, right_column_paths),
            ShardPolicy::from_last_column_is_instance(last_column_is_instance),
            join_type,
            join_exactly_once,
            table_properties.0,
        )?;
        Table::new(self_, table_handle)
    }

    fn complex_columns<'py>(
        self_: &Bound<'py, Self>,
        #[pyo3(from_py_with = from_py_iterable)] inputs: Vec<Bound<'py, ComplexColumn>>,
    ) -> PyResult<Vec<Bound<'py, Column>>> {
        let py = self_.py();
        let mut engine_complex_columns = Vec::new();
        let mut output_universes = Vec::new();
        for input in inputs {
            engine_complex_columns.push(ComplexColumn::to_engine(&input));
            output_universes.extend(ComplexColumn::output_universe(&input));
        }
        let columns = self_
            .borrow()
            .graph
            .complex_columns(engine_complex_columns)?
            .into_iter()
            .zip_eq(output_universes)
            .map(|(column_handle, universe)| Column::new(universe.bind(py), column_handle))
            .collect::<PyResult<_>>()?;
        Ok(columns)
    }

    pub fn debug_table<'py>(
        self_: &'py Bound<Self>,
        name: String,
        table: &'py Bound<Table>,
        columns: Vec<(String, ColumnPath)>,
    ) -> PyResult<()> {
        check_identity(self_.as_unbound(), &table.borrow().scope, "scope mismatch")?;
        Ok(self_
            .borrow()
            .graph
            .debug_table(name, table.borrow().handle, columns)?)
    }

    pub fn update_rows_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        update: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let result_table_handle = self_.borrow().graph.update_rows_table(
            table.handle,
            update.handle,
            table_properties.0,
        )?;

        Table::new(self_, result_table_handle)
    }

    pub fn update_cells_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        update: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
        #[pyo3(from_py_with = from_py_iterable)] update_paths: Vec<ColumnPath>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let result_table_handle = self_.borrow().graph.update_cells_table(
            table.handle,
            update.handle,
            column_paths,
            update_paths,
            table_properties.0,
        )?;

        Table::new(self_, result_table_handle)
    }

    #[pyo3(signature = (table, column_paths, data_sink, data_format, unique_name=None, sort_by_indices=None))]
    pub fn output_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
        data_sink: &Bound<DataStorage>,
        data_format: &Bound<DataFormat>,
        unique_name: Option<UniqueName>,
        sort_by_indices: Option<Vec<usize>>,
    ) -> PyResult<()> {
        let py = self_.py();

        self_
            .borrow()
            .register_unique_name(unique_name.as_ref(), py)?;
        let sink_impl = data_sink.borrow().construct_writer(
            py,
            &data_format.borrow(),
            self_.borrow().license.as_ref(),
        )?;
        let format_impl = data_format.borrow().construct_formatter(py)?;

        self_.borrow().graph.output_table(
            sink_impl,
            format_impl,
            table.handle,
            column_paths,
            unique_name,
            sort_by_indices,
        )?;

        Ok(())
    }

    #[allow(clippy::too_many_arguments)]
    #[pyo3(signature = (table, column_paths, skip_persisted_batch, skip_errors, on_change, on_time_end, on_end, unique_name=None, sort_by_indices=None))]
    pub fn subscribe_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
        skip_persisted_batch: bool,
        skip_errors: bool,
        on_change: Py<PyAny>,
        on_time_end: Py<PyAny>,
        on_end: Py<PyAny>,
        unique_name: Option<UniqueName>,
        sort_by_indices: Option<Vec<usize>>,
    ) -> PyResult<()> {
        let py = self_.py();
        self_
            .borrow()
            .register_unique_name(unique_name.as_ref(), py)?;

        let event_loop = if is_async_function(py, on_change.clone_ref(py))? {
            if sort_by_indices.is_some() {
                return Err(PyValueError::new_err(
                    "Using sort_by with async observer is not supported",
                ));
            }
            Some(self_.borrow().event_loop(py))
        } else {
            None
        };
        let callbacks = build_subscribe_callback(on_change, on_time_end, on_end, event_loop);

        self_.borrow().graph.subscribe_table(
            table.handle,
            column_paths,
            callbacks,
            SubscribeConfig {
                skip_persisted_batch,
                skip_errors,
                skip_pending: true,
            },
            unique_name,
            sort_by_indices,
        )?;
        Ok(())
    }

    pub fn set_operator_properties(
        self_: &Bound<Self>,
        operator_id: usize,
        depends_on_error_log: bool,
    ) -> PyResult<()> {
        Ok(self_
            .borrow()
            .graph
            .set_operator_properties(OperatorProperties {
                id: operator_id,
                depends_on_error_log,
            })?)
    }

    #[pyo3(signature = (error_log=None))]
    pub fn set_error_log(self_: &Bound<Self>, error_log: Option<PyRef<ErrorLog>>) -> PyResult<()> {
        if let Some(error_log) = error_log.as_ref() {
            check_identity(self_.as_unbound(), &error_log.scope, "scope mismatch")?;
        }
        Ok(self_
            .borrow()
            .graph
            .set_error_log(error_log.map(|error_log| error_log.handle))?)
    }

    pub fn error_log(
        self_: &Bound<Self>,
        properties: ConnectorProperties,
    ) -> PyResult<(Py<Table>, Py<ErrorLog>)> {
        let column_properties = properties.column_properties();
        let (table_handle, error_log_handle) = self_
            .borrow()
            .graph
            .error_log(Arc::new(EngineTableProperties::flat(column_properties)))?;
        Ok((
            Table::new(self_, table_handle)?,
            ErrorLog::new(self_, error_log_handle)?,
        ))
    }

    pub fn probe_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        operator_id: usize,
    ) -> PyResult<()> {
        self_
            .borrow()
            .graph
            .probe_table(table.handle, operator_id)?;
        Ok(())
    }

    pub fn export_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        column_paths: Vec<ColumnPath>,
    ) -> PyResult<PyExportedTable> {
        let exported_table = self_
            .borrow()
            .graph
            .export_table(table.handle, column_paths)?;
        Ok(PyExportedTable::new(exported_table))
    }

    pub fn import_table(self_: &Bound<Self>, table: &PyExportedTable) -> PyResult<Py<Table>> {
        let table_handle = self_.borrow().graph.import_table(table.inner.clone())?;
        Table::new(self_, table_handle)
    }

    pub fn remove_value_from_table(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
        value: Value,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_.borrow().graph.remove_value_from_table(
            table.handle,
            column_paths,
            value,
            table_properties.0,
        )?;
        Table::new(self_, new_table_handle)
    }

    #[allow(clippy::too_many_arguments)]
    pub fn async_transformer(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
        on_change: Py<PyAny>,
        on_time_end: Py<PyAny>,
        on_end: Py<PyAny>,
        data_source: &Bound<DataStorage>,
        data_format: &Bound<DataFormat>,
        properties: ConnectorProperties,
        skip_errors: bool,
    ) -> PyResult<Py<Table>> {
        let py = self_.py();

        let callbacks = build_subscribe_callback(on_change, on_time_end, on_end, None);
        *self_.borrow().total_connectors.get(py).borrow_mut() += 1;
        let (reader_impl, parallel_readers) = data_source.borrow().construct_reader(
            py,
            &data_format.borrow(),
            &self_.borrow(),
            &properties,
        )?;
        assert_eq!(parallel_readers, 1); // python connector that has parallel_readers == 1 has to be used

        let parser_impl = data_format.borrow().construct_parser(py)?;
        let commit_duration = properties
            .commit_duration_ms
            .map(time::Duration::from_millis);
        let column_properties = properties.column_properties();

        let table_handle = self_.borrow().graph.async_transformer(
            table.handle,
            column_paths,
            callbacks,
            reader_impl,
            parser_impl,
            commit_duration,
            Arc::new(EngineTableProperties::flat(column_properties)),
            skip_errors,
        )?;
        Table::new(self_, table_handle)
    }

    pub fn table_to_stream(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_
            .borrow()
            .graph
            .table_to_stream(table.handle, table_properties.0)?;
        Table::new(self_, table_handle)
    }

    pub fn stream_to_table(
        self_: &Bound<Self>,
        stream: PyRef<Table>,
        is_upsert_path: ColumnPath,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_.borrow().graph.stream_to_table(
            stream.handle,
            is_upsert_path,
            table_properties.0,
        )?;
        Table::new(self_, table_handle)
    }

    pub fn merge_streams_to_table(
        self_: &Bound<Self>,
        insertions_stream_handle: PyRef<Table>,
        deletions_stream_handle: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_.borrow().graph.merge_streams_to_table(
            insertions_stream_handle.handle,
            deletions_stream_handle.handle,
            table_properties.0,
        )?;
        Table::new(self_, table_handle)
    }

    fn assert_append_only(
        self_: &Bound<Self>,
        table_handle: PyRef<Table>,
        #[pyo3(from_py_with = from_py_iterable)] column_paths: Vec<ColumnPath>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let table_handle = self_.borrow().graph.assert_append_only(
            table_handle.handle,
            column_paths,
            table_properties.0,
        )?;
        Table::new(self_, table_handle)
    }

    pub fn unpack_snapshots(
        self_: &Bound<Self>,
        table: PyRef<Table>,
        table_properties: TableProperties,
    ) -> PyResult<Py<Table>> {
        let new_table_handle = self_
            .borrow()
            .graph
            .unpack_snapshots(table.handle, table_properties.0)?;
        Table::new(self_, new_table_handle)
    }
}

fn build_subscribe_callback(
    on_change: Py<PyAny>,
    on_time_end: Py<PyAny>,
    on_end: Py<PyAny>,
    event_loop: Option<Py<PyAny>>,
) -> SubscribeCallbacks {
    let builder = SubscribeCallbacksBuilder::new();

    let builder = if let Some(event_loop) = event_loop {
        builder.on_data_async(Box::new(move |key, values, time, diff| {
            let future = Python::with_gil(|py| {
                let args = (key, PyTuple::new(py, values)?, time, diff).into_pyobject(py)?;
                start_async_task(&event_loop, &on_change, args)
            });

            Box::pin(async {
                future?.await?;
                Ok(())
            })
        }))
    } else {
        builder
            .wrapper(BatchWrapper::WithGil)
            .on_data(Box::new(move |key, values, time, diff| {
                Python::with_gil(|py| {
                    on_change.call1(py, (key, PyTuple::new(py, values)?, time, diff))?;
                    Ok(())
                })
            }))
    };

    builder
        .on_time_end(Box::new(move |new_time| {
            Python::with_gil(|py| {
                on_time_end.call1(py, (new_time,))?;
                Ok(())
            })
        }))
        .on_end(Box::new(move || {
            Python::with_gil(|py| {
                on_end.call0(py)?;
                Ok(())
            })
        }))
        .build()
}

fn is_async_function(py: Python<'_>, func: Py<PyAny>) -> PyResult<bool> {
    let inspect = py.import("inspect")?;
    inspect
        .getattr("iscoroutinefunction")?
        .call1((func,))?
        .extract()
}

type CapturedTableData = Arc<Mutex<Vec<DataRow>>>;

fn capture_table_data(
    graph: &dyn Graph,
    table: PyRef<Table>,
    column_paths: Vec<ColumnPath>,
) -> PyResult<CapturedTableData> {
    let table_data = Arc::new(Mutex::new(Vec::new()));
    {
        let table_data = table_data.clone();
        let callbacks = SubscribeCallbacksBuilder::new()
            .on_data(Box::new(move |key, values, time, diff| {
                table_data.lock().unwrap().push(DataRow::from_engine(
                    key,
                    Vec::from(values),
                    time,
                    diff,
                ));
                Ok(())
            }))
            .build();
        graph.subscribe_table(
            table.handle,
            column_paths,
            callbacks,
            SubscribeConfig {
                skip_persisted_batch: false,
                skip_errors: false,
                skip_pending: false,
            },
            None,
            None,
        )?;
    }
    Ok(table_data)
}

pub fn make_captured_table(table_data: Vec<CapturedTableData>) -> Vec<DataRow> {
    let mut combined_table_data = Vec::new();
    for single_table_data in table_data {
        combined_table_data.extend(take(&mut *single_table_data.lock().unwrap()));
    }
    combined_table_data
}

#[pyfunction]
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (
    logic,
    event_loop,
    *,
    stats_monitor = None,
    ignore_asserts = false,
    monitoring_level = MonitoringLevel::None,
    with_http_server = false,
    persistence_config = None,
    license_key = None,
    telemetry_config = TelemetryConfig::default(),
    terminate_on_error = true,
    max_expression_batch_size = 1024,
))]
pub fn run_with_new_graph(
    py: Python,
    logic: PyObject,
    event_loop: PyObject,
    stats_monitor: Option<PyObject>,
    ignore_asserts: bool,
    monitoring_level: MonitoringLevel,
    with_http_server: bool,
    persistence_config: Option<PersistenceConfig>,
    license_key: Option<String>,
    telemetry_config: TelemetryConfig,
    terminate_on_error: bool,
    max_expression_batch_size: usize,
) -> PyResult<Vec<Vec<DataRow>>> {
    LOGGING_RESET_HANDLE.reset();
    defer! {
        log::logger().flush();
    }
    let config = Config::from_env()
        .map_err(|msg| PyErr::from_type(ENGINE_ERROR_TYPE.bind(py).clone(), msg.to_string()))?;
    let license = License::new(license_key)?;
    let persistence_config = {
        if let Some(persistence_config) = persistence_config {
            let persistence_config = persistence_config.prepare()?;
            persistence_config.validate(&license)?;
            Some(persistence_config)
        } else {
            None
        }
    };
    let is_persisted = persistence_config.is_some();
    let timestamp_at_start = Timestamp::new_from_current_time();

    let telemetry_config = EngineTelemetryConfig::create(
        &license,
        Some(telemetry_config.run_id),
        telemetry_config.monitoring_server,
        telemetry_config.detailed_metrics_dir,
        telemetry_config.trace_parent,
        telemetry_config.metrics_reader_interval_secs,
        telemetry_config.graph,
    )?;

    let results: Vec<Vec<_>> = run_with_wakeup_receiver(py, |wakeup_receiver| {
        let scope_license = license.clone();
        py.allow_threads(|| {
            run_with_new_dataflow_graph(
                move |graph| {
                    let thread_state = PythonThreadState::new();

                    let captured_tables = Python::with_gil(|py| {
                        let our_scope = &Bound::new(
                            py,
                            Scope::new(
                                None,
                                event_loop.clone_ref(py),
                                Some(scope_license.clone()),
                                is_persisted,
                                timestamp_at_start,
                            ),
                        )?;
                        let tables: Vec<(PyRef<Table>, Vec<ColumnPath>)> =
                            our_scope.borrow().graph.scoped(graph, || {
                                from_py_iterable(&logic.bind(py).call1((our_scope,))?)
                            })?;
                        our_scope.borrow().clear_caches(py);
                        tables
                            .into_iter()
                            .map(|(table, paths)| capture_table_data(graph, table, paths))
                            .try_collect()
                    })?;
                    Ok((thread_state, captured_tables))
                },
                |(_thread_state, captured_tables)| captured_tables,
                config,
                wakeup_receiver,
                stats_monitor,
                ignore_asserts,
                monitoring_level,
                with_http_server,
                persistence_config,
                &license,
                telemetry_config,
                terminate_on_error,
                max_expression_batch_size,
            )
        })
    })??;
    let mut captured_tables = Vec::new();
    for result in results {
        captured_tables.resize_with(result.len(), Vec::new);
        for (i, table) in result.into_iter().enumerate() {
            captured_tables[i].push(table);
        }
    }
    Ok(captured_tables
        .into_iter()
        .map(make_captured_table)
        .collect())
}

#[pyfunction]
#[pyo3(signature = (*values, optional = false))]
pub fn ref_scalar(values: &Bound<PyTuple>, optional: bool) -> PyResult<Option<Key>> {
    if optional && values.iter().any(|v| v.is_none()) {
        return Ok(None);
    }
    let key = Key::for_values(&from_py_iterable(values)?);
    Ok(Some(key))
}

#[pyfunction]
#[pyo3(signature = (*values, instance, optional = false))]
pub fn ref_scalar_with_instance(
    values: &Bound<PyTuple>,
    instance: Value,
    optional: bool,
) -> PyResult<Option<Key>> {
    if optional && (values.iter().any(|v| v.is_none()) || matches!(instance, Value::None)) {
        return Ok(None);
    }
    let mut values_with_instance: Vec<Value> = from_py_iterable(values)?;
    values_with_instance.push(instance);
    let key = ShardPolicy::LastKeyColumn.generate_key(&values_with_instance);
    Ok(Some(key))
}

#[pyfunction]
pub fn unsafe_make_pointer(value: KeyImpl) -> Key {
    Key(value)
}

#[pyfunction]
#[pyo3(signature = (value), name="serialize")]
pub fn serialize(py: Python, value: Value) -> PyResult<Py<PyBytes>> {
    let bytes = bincode::serialize(&value)
        .map_err(|e| PyValueError::new_err(format!("failed to serialize: {e}")))?;
    Ok(PyBytes::new(py, &bytes).into())
}

#[pyfunction]
#[pyo3(signature = (bytes), name="deserialize")]
pub fn deserialize(bytes: &[u8]) -> PyResult<Value> {
    let value: Value = bincode::deserialize(bytes)
        .map_err(|e| PyValueError::new_err(format!("failed to deserialize: {e}")))?;
    Ok(value)
}

#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen)]
pub struct AzureBlobStorageSettings {
    account: String,
    password: String,
    container: String,
}

#[pymethods]
impl AzureBlobStorageSettings {
    #[new]
    #[pyo3(signature = (account, password, container))]
    fn new(account: String, password: String, container: String) -> Self {
        Self {
            account,
            password,
            container,
        }
    }
}

impl AzureBlobStorageSettings {
    fn credentials(&self) -> AzureStorageCredentials {
        AzureStorageCredentials::access_key(self.account.clone(), self.password.clone())
    }
}

#[pyclass(module = "pathway.engine", frozen)]
#[derive(Clone, Debug)]
pub struct AwsS3Settings {
    bucket_name: Option<String>,
    region: s3::region::Region,
    access_key: Option<String>,
    secret_access_key: Option<String>,
    with_path_style: bool,
    profile: Option<String>,
    session_token: Option<String>,
}

#[pymethods]
impl AwsS3Settings {
    #[new]
    #[allow(clippy::too_many_arguments)]
    #[pyo3(signature = (
        bucket_name = None,
        access_key = None,
        secret_access_key = None,
        with_path_style = false,
        region = None,
        endpoint = None,
        profile = None,
        session_token = None,
    ))]
    fn new(
        bucket_name: Option<String>,
        access_key: Option<String>,
        secret_access_key: Option<String>,
        with_path_style: bool,
        region: Option<String>,
        endpoint: Option<String>,
        profile: Option<String>,
        session_token: Option<String>,
    ) -> PyResult<Self> {
        Ok(AwsS3Settings {
            bucket_name,
            region: Self::aws_region(region, endpoint)?,
            access_key,
            secret_access_key,
            with_path_style,
            profile,
            session_token,
        })
    }
}

impl AwsS3Settings {
    fn aws_region(
        region: Option<String>,
        endpoint: Option<String>,
    ) -> PyResult<s3::region::Region> {
        if let Some(endpoint) = endpoint {
            Ok(s3::region::Region::Custom {
                region: region.unwrap_or(endpoint.clone()),
                endpoint,
            })
        } else if let Some(region) = region {
            region
                .parse()
                .map_err(|_| PyValueError::new_err("Incorrect AWS region"))
        } else {
            Err(PyValueError::new_err(
                "At least one of { region, endpoint } must be defined",
            ))
        }
    }

    fn final_bucket_name(&self, deduced_name: Option<&str>) -> PyResult<String> {
        if let Some(bucket_name) = &self.bucket_name {
            Ok(bucket_name.clone())
        } else if let Some(bucket_name) = deduced_name {
            Ok(bucket_name.to_string())
        } else {
            Err(PyRuntimeError::new_err(
                "bucket_name not specified and isn't in the s3 path",
            ))
        }
    }

    fn construct_private_bucket(&self, deduced_name: Option<&str>) -> PyResult<S3Bucket> {
        let credentials = AwsCredentials::new(
            Some(&self.access_key.clone().ok_or(PyRuntimeError::new_err(
                "access key must be specified for a private bucket",
            ))?),
            Some(
                &self
                    .secret_access_key
                    .clone()
                    .ok_or(PyRuntimeError::new_err(
                        "secret access key must be specified for a private bucket",
                    ))?,
            ),
            None,
            None,
            None,
        )
        .map_err(|err| {
            PyRuntimeError::new_err(format!("Unable to form credentials to AWS storage: {err}"))
        })?;

        self.construct_bucket_with_credentials(credentials, deduced_name)
    }

    fn construct_bucket_with_credentials(
        &self,
        credentials: AwsCredentials,
        deduced_name: Option<&str>,
    ) -> PyResult<S3Bucket> {
        S3Bucket::new(
            &self.final_bucket_name(deduced_name)?,
            self.region.clone(),
            credentials,
        )
        .map_err(|err| {
            PyRuntimeError::new_err(format!("Failed to connect to private AWS bucket: {err}"))
        })
    }

    fn construct_public_bucket(&self, deduced_name: Option<&str>) -> PyResult<S3Bucket> {
        S3Bucket::new_public(&self.final_bucket_name(deduced_name)?, self.region.clone()).map_err(
            |err| PyRuntimeError::new_err(format!("Failed to connect to public AWS bucket: {err}")),
        )
    }

    fn construct_bucket(&self, name_override: Option<&str>) -> PyResult<S3Bucket> {
        let has_access_key = self.access_key.is_some();
        let has_secret_access_key = self.secret_access_key.is_some();
        if has_access_key != has_secret_access_key {
            warn!("Only one of access_key and secret_access_key is specified. Trying to connect to a public bucket.");
        }

        let mut bucket = {
            if has_access_key && has_secret_access_key {
                self.construct_private_bucket(name_override)?
            } else {
                let aws_credentials = AwsCredentials::from_sts_env("aws-creds")
                    .or_else(|_| AwsCredentials::from_env())
                    // The rust-s3 crate doesn't check the AWS_PROFILE env var, so we do it by ourselves
                    .or_else(|_| {
                        AwsCredentials::from_profile(
                            self.profile
                                .clone()
                                .or_else(|| std::env::var("AWS_PROFILE").ok())
                                .as_deref(),
                        )
                    })
                    .or_else(|_| AwsCredentials::from_instance_metadata());

                // first, try to deduce credentials from various sources
                if let Ok(credentials) = aws_credentials {
                    self.construct_bucket_with_credentials(credentials, name_override)?
                } else {
                    // if there are no credentials, treat the bucket as a public
                    self.construct_public_bucket(name_override)?
                }
            }
        };

        if self.with_path_style {
            bucket = bucket.with_path_style();
        }

        Ok(bucket)
    }
}

#[pyclass(module = "pathway.engine", frozen)]
pub struct ElasticSearchAuth {
    auth_type: String,
    username: Option<String>,
    password: Option<String>,
    bearer: Option<String>,
    apikey_id: Option<String>,
    apikey: Option<String>,
}

#[pymethods]
impl ElasticSearchAuth {
    #[new]
    #[pyo3(signature = (
        auth_type,
        username = None,
        password = None,
        bearer = None,
        apikey_id = None,
        apikey = None,
    ))]
    fn new(
        auth_type: String,
        username: Option<String>,
        password: Option<String>,
        bearer: Option<String>,
        apikey_id: Option<String>,
        apikey: Option<String>,
    ) -> Self {
        ElasticSearchAuth {
            auth_type,
            username,
            password,
            bearer,
            apikey_id,
            apikey,
        }
    }
}

impl ElasticSearchAuth {
    fn as_client_auth(&self) -> PyResult<ESCredentials> {
        match self.auth_type.as_ref() {
            "basic" => {
                let username = self.username.as_ref().ok_or_else(|| {
                    PyValueError::new_err("For basic auth username should be specified")
                })?;
                let password = self.password.as_ref().ok_or_else(|| {
                    PyValueError::new_err("For basic auth password should be specified")
                })?;
                Ok(ESCredentials::Basic(username.clone(), password.clone()))
            }
            "bearer" => {
                let bearer = self.bearer.as_ref().ok_or_else(|| {
                    PyValueError::new_err("For bearer auth bearer should be specified")
                })?;
                Ok(ESCredentials::Bearer(bearer.clone()))
            }
            "apikey" => {
                let apikey_id = self.apikey_id.as_ref().ok_or_else(|| {
                    PyValueError::new_err("For API Key auth apikey_id should be specified")
                })?;
                let apikey = self.apikey.as_ref().ok_or_else(|| {
                    PyValueError::new_err("For API Key auth apikey should be specified")
                })?;
                Ok(ESCredentials::ApiKey(apikey_id.clone(), apikey.clone()))
            }
            _ => Err(PyValueError::new_err("Unsupported type of auth")),
        }
    }
}

#[pyclass(module = "pathway.engine", frozen)]
#[derive(Debug)]
pub struct ElasticSearchParams {
    host: String,
    index_name: String,
    auth: Py<ElasticSearchAuth>,
}

#[pymethods]
impl ElasticSearchParams {
    #[new]
    fn new(host: String, index_name: String, auth: Py<ElasticSearchAuth>) -> Self {
        ElasticSearchParams {
            host,
            index_name,
            auth,
        }
    }
}

impl ElasticSearchParams {
    fn client(&self, py: pyo3::Python) -> PyResult<Elasticsearch> {
        let creds = self.auth.borrow(py).as_client_auth()?;

        let url = Url::parse(&self.host)
            .map_err(|e| PyValueError::new_err(format!("Failed to parse node URL: {e:?}")))?;
        let conn_pool = SingleNodeConnectionPool::new(url);

        let transport = TransportBuilder::new(conn_pool)
            .auth(creds)
            .disable_proxy()
            .build()
            .map_err(|e| {
                PyValueError::new_err(format!(
                    "Failed to build ES transfer with the given params: {e:?}"
                ))
            })?;

        Ok(Elasticsearch::new(transport))
    }
}

#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen, name = "DeltaOptimizerRule")]
pub struct PyDeltaOptimizerRule {
    field_name: String,
    time_format: String,
    quick_access_window: std::time::Duration,
    compression_frequency: std::time::Duration,
    retention_period: chrono::TimeDelta,
}

#[pymethods]
impl PyDeltaOptimizerRule {
    #[new]
    #[pyo3(signature = (
        field_name,
        time_format,
        quick_access_window,
        compression_frequency,
        retention_period,
    ))]
    pub fn new(
        field_name: String,
        time_format: String,
        quick_access_window: std::time::Duration,
        compression_frequency: std::time::Duration,
        retention_period: std::time::Duration,
    ) -> PyResult<Self> {
        Ok(Self {
            field_name,
            time_format,
            quick_access_window,
            compression_frequency,
            retention_period: chrono::TimeDelta::from_std(retention_period).map_err(|e| {
                PyValueError::new_err(format!("Failed to parse retention_period: {e}"))
            })?,
        })
    }
}

impl PyDeltaOptimizerRule {
    fn into_inner(self) -> DeltaOptimizerRule {
        DeltaOptimizerRule::new(
            self.field_name,
            self.time_format,
            self.quick_access_window,
            self.compression_frequency,
            self.retention_period,
        )
    }
}

#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen, name = "MqttSettings")]
pub struct MqttSettings {
    qos: MqttQoS,
    retain: bool,
}

#[pymethods]
impl MqttSettings {
    #[new]
    #[pyo3(signature = (
        qos,
        retain,
    ))]
    pub fn new(qos: usize, retain: bool) -> PyResult<Self> {
        let qos = match qos {
            0 => MqttQoS::AtMostOnce,
            1 => MqttQoS::AtLeastOnce,
            2 => MqttQoS::ExactlyOnce,
            _ => {
                return Err(PyValueError::new_err(format!(
                    "MQTT QoS can only be 0, 1 or 2. Specified value: {qos}"
                )))
            }
        };
        Ok(Self { qos, retain })
    }
}

#[allow(clippy::struct_excessive_bools)]
#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen)]
pub struct DataStorage {
    storage_type: String,
    path: Option<String>,
    rdkafka_settings: Option<HashMap<String, String>>,
    topic: Option<String>,
    connection_string: Option<String>,
    csv_parser_settings: Option<CsvParserSettings>,
    #[pyo3(get)]
    mode: ConnectorMode,
    read_method: ReadMethod,
    snapshot_maintenance_on_output: bool,
    aws_s3_settings: Option<Arc<Py<AwsS3Settings>>>,
    elasticsearch_params: Option<Arc<Py<ElasticSearchParams>>>,
    parallel_readers: Option<usize>,
    python_subject: Option<Arc<Py<PythonSubject>>>,
    unique_name: Option<UniqueName>,
    max_batch_size: Option<usize>,
    object_pattern: String,
    mock_events: Option<HashMap<(UniqueName, usize), Vec<SnapshotEvent>>>,
    table_name: Option<String>,
    header_fields: Vec<(String, usize)>,
    key_field_index: Option<usize>,
    min_commit_frequency: Option<u64>,
    downloader_threads_count: Option<usize>,
    database: Option<String>,
    start_from_timestamp_ms: Option<i64>,
    namespace: Option<Vec<String>>,
    table_writer_init_mode: TableWriterInitMode,
    topic_name_index: Option<usize>,
    partition_columns: Option<Vec<String>>,
    backfilling_thresholds: Option<Vec<BackfillingThreshold>>,
    azure_blob_storage_settings: Option<AzureBlobStorageSettings>,
    delta_optimizer_rule: Option<PyDeltaOptimizerRule>,
    mqtt_settings: Option<MqttSettings>,
    only_provide_metadata: bool,
    sort_key_index: Option<usize>,
    legacy_mode: bool,
    js_stream_name: Option<String>,
    durable_consumer_name: Option<String>,
    iceberg_catalog: Option<IcebergCatalogSettings>,
}

#[allow(clippy::doc_markdown)]
/// Specifies the type of persistence used by Pathway.
///
/// Attributes:
///     PERSISTING: Persists all data and state necessary to fully restore the computation.
///     UDF_CACHING: Only the results of UDFs, for which `cache_strategy` is set, are persisted, however, the data
///         needs to be read again by the input connectors.
///
/// Notes:
///     ``pw.PersistenceMode.UDF_CACHING`` currently works either when the File System
///     is used as the backend for persistent storage, or, if another backend is used, a
///     temporary directory will be created for writing the cache. In the latter case,
///     persistence guarantees are not provided.
///     By default, ``pw.PersistenceMode.UDF_CACHING`` does not persist data from input
///     sources. This means that if the program restarts, it will re-read all input streams
///     from the beginning. However, this behavior can be overridden by assigning names
///     to specific input sources. If an input connector has a name parameter, the input
///     stream for this source will also be persisted. Upon restart, the program will
///     resume reading from the point where it previously stopped.
#[pyclass(module = "pathway.engine", frozen, name = "PersistenceMode")]
pub struct PyPersistenceMode(PersistenceMode);

#[pymethods]
impl PyPersistenceMode {
    #[classattr]
    pub const REALTIME_REPLAY: PersistenceMode = PersistenceMode::RealtimeReplay;
    #[classattr]
    pub const SPEEDRUN_REPLAY: PersistenceMode = PersistenceMode::SpeedrunReplay;
    #[classattr]
    pub const BATCH: PersistenceMode = PersistenceMode::Batch;
    #[classattr]
    pub const PERSISTING: PersistenceMode = PersistenceMode::Persisting;
    #[classattr]
    pub const SELECTIVE_PERSISTING: PersistenceMode = PersistenceMode::SelectivePersisting;
    #[classattr]
    pub const UDF_CACHING: PersistenceMode = PersistenceMode::UdfCaching;
    #[classattr]
    pub const OPERATOR_PERSISTING: PersistenceMode = PersistenceMode::OperatorPersisting;
}

impl<'py> FromPyObject<'py> for PersistenceMode {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PyPersistenceMode>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for PersistenceMode {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PyPersistenceMode(self).into_bound_py_any(py)
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "SnapshotAccess")]
pub struct PySnapshotAccess(SnapshotAccess);

#[pymethods]
impl PySnapshotAccess {
    #[classattr]
    pub const REPLAY: SnapshotAccess = SnapshotAccess::Replay;
    #[classattr]
    pub const RECORD: SnapshotAccess = SnapshotAccess::Record;
    #[classattr]
    pub const FULL: SnapshotAccess = SnapshotAccess::Full;
    #[classattr]
    pub const OFFSETS_ONLY: SnapshotAccess = SnapshotAccess::OffsetsOnly;
}

impl<'py> FromPyObject<'py> for SnapshotAccess {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PySnapshotAccess>>()?.0)
    }
}

impl<'py> IntoPyObject<'py> for SnapshotAccess {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PySnapshotAccess(self).into_bound_py_any(py)
    }
}
#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen)]
pub struct PersistenceConfig {
    snapshot_interval: ::std::time::Duration,
    backend: DataStorage,
    snapshot_access: SnapshotAccess,
    persistence_mode: PersistenceMode,
    continue_after_replay: bool,
}

#[pymethods]
impl PersistenceConfig {
    #[new]
    #[pyo3(signature = (
        *,
        snapshot_interval_ms,
        backend,
        snapshot_access = SnapshotAccess::Full,
        persistence_mode = PersistenceMode::Batch,
        continue_after_replay = true,
    ))]
    fn new(
        snapshot_interval_ms: u64,
        backend: DataStorage,
        snapshot_access: SnapshotAccess,
        persistence_mode: PersistenceMode,
        continue_after_replay: bool,
    ) -> Self {
        Self {
            snapshot_interval: ::std::time::Duration::from_millis(snapshot_interval_ms),
            backend,
            snapshot_access,
            persistence_mode,
            continue_after_replay,
        }
    }
}

impl PersistenceConfig {
    fn prepare(self) -> PyResult<PersistenceManagerOuterConfig> {
        Ok(PersistenceManagerOuterConfig::new(
            self.snapshot_interval,
            self.backend.construct_persistent_storage_config()?,
            self.snapshot_access,
            self.persistence_mode,
            self.continue_after_replay,
        ))
    }
}

#[derive(Clone, Debug, Default)]
#[pyclass(module = "pathway.engine", frozen, get_all)]
pub struct TelemetryConfig {
    monitoring_server: Option<String>,
    detailed_metrics_dir: Option<String>,
    logging_servers: Vec<String>,
    tracing_servers: Vec<String>,
    metrics_servers: Vec<String>,
    service_name: Option<String>,
    service_version: Option<String>,
    service_namespace: Option<String>,
    service_instance_id: Option<String>,
    run_id: String,
    license_key: Option<String>,
    trace_parent: Option<String>,
    metrics_reader_interval_secs: Option<u64>,
    graph: Option<String>,
}

#[pymethods]
impl TelemetryConfig {
    #[staticmethod]
    #[pyo3(signature = (
        *,
        run_id = None,
        license_key = None,
        monitoring_server = None,
        detailed_metrics_dir = None,
        metrics_reader_interval_secs = None,
        graph = None,
    ))]
    fn create(
        run_id: Option<String>,
        license_key: Option<String>,
        monitoring_server: Option<String>,
        detailed_metrics_dir: Option<String>,
        metrics_reader_interval_secs: Option<u64>,
        graph: Option<String>,
    ) -> PyResult<TelemetryConfig> {
        let license = License::new(license_key)?;
        let config = EngineTelemetryConfig::create(
            &license,
            run_id,
            monitoring_server,
            detailed_metrics_dir,
            None,
            metrics_reader_interval_secs,
            graph,
        )?;
        Ok(config.into())
    }

    #[pyo3(signature = (trace_parent))]
    fn with_trace_parent(&self, trace_parent: String) -> Self {
        let mut new_config = self.clone();
        new_config.trace_parent = Some(trace_parent);
        new_config
    }
}

impl From<EngineTelemetryConfig> for TelemetryConfig {
    fn from(config: EngineTelemetryConfig) -> Self {
        match config {
            EngineTelemetryConfig::Enabled(config) => Self {
                monitoring_server: config.monitoring_server,
                detailed_metrics_dir: config.detailed_metrics_dir,
                logging_servers: config.logging_servers,
                tracing_servers: config.tracing_servers,
                metrics_servers: config.metrics_servers,
                service_name: Some(config.service_name),
                service_version: Some(config.service_version),
                service_namespace: Some(config.service_namespace),
                service_instance_id: Some(config.service_instance_id),
                run_id: config.run_id,
                license_key: Some(config.license_key),
                trace_parent: config.trace_parent,
                metrics_reader_interval_secs: Some(config.periodic_reader_interval.as_secs()),
                graph: config.graph,
            },
            EngineTelemetryConfig::Disabled => Self::default(),
        }
    }
}

impl<'py> FromPyObject<'py> for SnapshotEvent {
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        Ok(ob.extract::<PyRef<PySnapshotEvent>>()?.0.clone())
    }
}

impl<'py> IntoPyObject<'py> for SnapshotEvent {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        PySnapshotEvent(self).into_bound_py_any(py)
    }
}
#[pyclass(module = "pathway.engine", frozen, name = "SnapshotEvent")]
pub struct PySnapshotEvent(SnapshotEvent);

#[pymethods]
impl PySnapshotEvent {
    #[staticmethod]
    pub fn insert(key: Key, values: Vec<Value>) -> SnapshotEvent {
        SnapshotEvent::Insert(key, values)
    }
    #[staticmethod]
    pub fn delete(key: Key, values: Vec<Value>) -> SnapshotEvent {
        SnapshotEvent::Delete(key, values)
    }
    #[staticmethod]
    pub fn advance_time(timestamp: Timestamp) -> SnapshotEvent {
        SnapshotEvent::AdvanceTime(timestamp, OffsetAntichain::new())
    }
    #[classattr]
    pub const FINISHED: SnapshotEvent = SnapshotEvent::Finished;
}

#[pyclass(module = "pathway.engine", frozen)]
#[derive(Debug)]
pub struct PythonSubject {
    pub start: Py<PyAny>,
    pub read: Py<PyAny>,
    pub seek: Py<PyAny>,
    pub on_persisted_run: Py<PyAny>,
    pub end: Py<PyAny>,
    pub is_internal: bool,
    pub deletions_enabled: bool,
}

#[pymethods]
impl PythonSubject {
    #[new]
    #[pyo3(signature = (start, read, seek, on_persisted_run, end, is_internal, deletions_enabled))]
    fn new(
        start: Py<PyAny>,
        read: Py<PyAny>,
        seek: Py<PyAny>,
        on_persisted_run: Py<PyAny>,
        end: Py<PyAny>,
        is_internal: bool,
        deletions_enabled: bool,
    ) -> Self {
        Self {
            start,
            read,
            seek,
            on_persisted_run,
            end,
            is_internal,
            deletions_enabled,
        }
    }
}

#[pyclass(module = "pathway.engine")]
#[derive(Clone)]
pub struct ValueField {
    #[pyo3(get)]
    pub name: String,
    #[pyo3(get)]
    pub type_: Type,
    #[pyo3(get)]
    pub source: FieldSource,
    #[pyo3(get)]
    pub default: Option<Value>,
    #[pyo3(get)]
    pub metadata: Option<String>,
}

impl ValueField {
    fn as_inner_schema_field(&self) -> InnerSchemaField {
        InnerSchemaField::new(self.type_.clone(), self.default.clone())
    }
}

#[pymethods]
impl ValueField {
    #[new]
    #[pyo3(signature = (name, type_, source = FieldSource::Payload))]
    pub fn new(name: String, type_: Type, source: FieldSource) -> Self {
        let effective_source = if name == METADATA_FIELD_NAME {
            FieldSource::Metadata
        } else {
            source
        };

        ValueField {
            name,
            type_,
            source: effective_source,
            default: None,
            metadata: None,
        }
    }

    fn set_default(&mut self, ob: &Bound<PyAny>) -> PyResult<()> {
        self.default = Some(extract_value(ob, &self.type_)?);
        Ok(())
    }

    fn set_metadata(&mut self, ob: &Bound<PyString>) -> PyResult<()> {
        self.metadata = Some(ob.extract()?);
        Ok(())
    }
}

#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen, get_all)]
pub struct BackfillingThreshold {
    pub field: String,
    pub threshold: Value,
    pub comparison_op: String, // TODO: enum?
}

#[pymethods]
impl BackfillingThreshold {
    #[new]
    #[pyo3(signature = (field, threshold, comparison_op))]
    fn new(field: String, threshold: Value, comparison_op: String) -> PyResult<Self> {
        let allowed_comparison_ops = vec![">", "<", ">=", "<=", "==", "!="];
        if !allowed_comparison_ops.contains(&comparison_op.as_str()) {
            return Err(PyValueError::new_err(format!("Unknown 'comparison_op': only {} are supported, but '{comparison_op}' was specified.", allowed_comparison_ops.into_iter().map(|x| format!("'{x}'")).format(", "))));
        }
        Ok(BackfillingThreshold {
            field,
            threshold,
            comparison_op,
        })
    }
}

#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen, name = "SchemaRegistrySettings")]
pub struct PySchemaRegistrySettings {
    urls: Vec<String>,
    token_authorization: Option<String>,
    username: Option<String>,
    password: Option<String>,
    headers: Vec<(String, String)>,
    proxy: Option<String>,
    timeout: Option<std::time::Duration>,
}

#[pymethods]
impl PySchemaRegistrySettings {
    #[new]
    #[pyo3(signature = (
        urls,
        token_authorization = None,
        username = None,
        password = None,
        headers = Vec::new(),
        proxy = None,
        timeout = None,
    ))]
    pub fn new(
        urls: Vec<String>,
        token_authorization: Option<String>,
        username: Option<String>,
        password: Option<String>,
        headers: Vec<(String, String)>,
        proxy: Option<String>,
        timeout: Option<std::time::Duration>,
    ) -> PyResult<Self> {
        // The client for Confluent Schema Registry is a wrapper around the corresponding API.
        // It performs HTTP requests using the Reqwest library's `reqwest::blocking::get` and
        // `reqwest::blocking::post` methods. Internally, the blocking client spins up its own
        // async runtime on a separate thread, which relies on thread-local storage for managing tasks.
        // This design conflicts with PyO3 and the Python GIL, causing requests to effectively
        // time out despite successful client creation.
        // To mitigate this, we move client creation into a separate method executed outside the GIL.
        if urls.is_empty() {
            return Err(PyValueError::new_err("The list of URLs must be non-empty."));
        }
        if username.is_none() && password.is_some() {
            return Err(PyValueError::new_err(
                "Username can't be empty, if password is specified.",
            ));
        }
        Ok(Self {
            urls,
            token_authorization,
            username,
            password,
            headers,
            proxy,
            timeout,
        })
    }
}

impl PySchemaRegistrySettings {
    fn create_settings(self) -> PyResult<SchemaRegistrySettings> {
        let mut builder = SchemaRegistrySettings::new_builder(self.urls[0].clone());
        for url in &self.urls[1..] {
            builder.add_url(url.clone());
        }

        if let Some(token_authorization) = &self.token_authorization {
            builder.set_token_authorization(token_authorization);
        }

        if let Some(username) = &self.username {
            builder.set_basic_authorization(username, self.password.as_deref());
        }

        if let Some(proxy) = &self.proxy {
            builder.set_proxy(proxy);
        }

        for (key, value) in &self.headers {
            builder.add_header(key, value);
        }

        if let Some(timeout) = self.timeout {
            builder.set_timeout(timeout);
        }

        builder.build().map_err(|e| {
            PyValueError::new_err(format!("Failed to connect to the schema registry: {e}"))
        })
    }

    pub fn build_decoder(self) -> PyResult<RegistryJsonDecoder> {
        Ok(RegistryJsonDecoder::new(self.create_settings()?))
    }

    pub fn build_encoder(self) -> PyResult<RegistryJsonEncoder> {
        Ok(RegistryJsonEncoder::new(self.create_settings()?))
    }
}

#[pyclass(module = "pathway.engine", frozen, get_all)]
pub struct DataFormat {
    format_type: String,
    key_field_names: Option<Vec<String>>,
    value_fields: Vec<Py<ValueField>>,
    delimiter: Option<char>,
    table_name: Option<String>,
    column_paths: Option<HashMap<String, String>>,
    field_absence_is_error: bool,
    parse_utf8: bool,
    debezium_db_type: DebeziumDBType,
    session_type: SessionType,
    value_field_index: Option<usize>,
    key_generation_policy: KeyGenerationPolicy,
    schema_registry_settings: Option<PySchemaRegistrySettings>,
    subject: Option<String>,
    designated_timestamp_policy: Option<String>,
    external_diff_column_index: Option<usize>,
    timestamp_unit: Option<String>,
    message_queue_key_field: Option<String>,
}

#[pymethods]
impl DataStorage {
    #[new]
    #[pyo3(signature = (
        storage_type,
        path = None,
        rdkafka_settings = None,
        topic = None,
        connection_string = None,
        csv_parser_settings = None,
        mode = ConnectorMode::Streaming,
        read_method = ReadMethod::ByLine,
        snapshot_maintenance_on_output = false,
        aws_s3_settings = None,
        elasticsearch_params = None,
        parallel_readers = None,
        python_subject = None,
        unique_name = None,
        max_batch_size = None,
        object_pattern = "*".to_string(),
        mock_events = None,
        table_name = None,
        header_fields = None,
        key_field_index = None,
        min_commit_frequency = None,
        downloader_threads_count = None,
        database = None,
        start_from_timestamp_ms = None,
        namespace = None,
        table_writer_init_mode = TableWriterInitMode::Default,
        topic_name_index = None,
        partition_columns = None,
        backfilling_thresholds = None,
        azure_blob_storage_settings = None,
        delta_optimizer_rule = None,
        mqtt_settings = None,
        only_provide_metadata = false,
        sort_key_index = None,
        legacy_mode = false,
        js_stream_name = None,
        durable_consumer_name = None,
        iceberg_catalog = None,
    ))]
    #[allow(clippy::too_many_arguments)]
    #[allow(clippy::fn_params_excessive_bools)]
    fn new(
        storage_type: String,
        path: Option<String>,
        rdkafka_settings: Option<HashMap<String, String>>,
        topic: Option<String>,
        connection_string: Option<String>,
        csv_parser_settings: Option<CsvParserSettings>,
        mode: ConnectorMode,
        read_method: ReadMethod,
        snapshot_maintenance_on_output: bool,
        aws_s3_settings: Option<Py<AwsS3Settings>>,
        elasticsearch_params: Option<Py<ElasticSearchParams>>,
        parallel_readers: Option<usize>,
        python_subject: Option<Py<PythonSubject>>,
        unique_name: Option<UniqueName>,
        max_batch_size: Option<usize>,
        object_pattern: String,
        mock_events: Option<HashMap<(UniqueName, usize), Vec<SnapshotEvent>>>,
        table_name: Option<String>,
        header_fields: Option<Vec<(String, usize)>>,
        key_field_index: Option<usize>,
        min_commit_frequency: Option<u64>,
        downloader_threads_count: Option<usize>,
        database: Option<String>,
        start_from_timestamp_ms: Option<i64>,
        namespace: Option<Vec<String>>,
        table_writer_init_mode: TableWriterInitMode,
        topic_name_index: Option<usize>,
        partition_columns: Option<Vec<String>>,
        backfilling_thresholds: Option<Vec<BackfillingThreshold>>,
        azure_blob_storage_settings: Option<AzureBlobStorageSettings>,
        delta_optimizer_rule: Option<PyDeltaOptimizerRule>,
        mqtt_settings: Option<MqttSettings>,
        only_provide_metadata: bool,
        sort_key_index: Option<usize>,
        legacy_mode: bool,
        js_stream_name: Option<String>,
        durable_consumer_name: Option<String>,
        iceberg_catalog: Option<IcebergCatalogSettings>,
    ) -> Self {
        DataStorage {
            storage_type,
            path,
            rdkafka_settings,
            topic,
            connection_string,
            csv_parser_settings,
            mode,
            read_method,
            snapshot_maintenance_on_output,
            aws_s3_settings: aws_s3_settings.map(Into::into),
            elasticsearch_params: elasticsearch_params.map(Into::into),
            parallel_readers,
            python_subject: python_subject.map(Into::into),
            unique_name,
            max_batch_size,
            object_pattern,
            mock_events,
            table_name,
            header_fields: header_fields.unwrap_or_default(),
            key_field_index,
            min_commit_frequency,
            downloader_threads_count,
            database,
            start_from_timestamp_ms,
            namespace,
            table_writer_init_mode,
            topic_name_index,
            partition_columns,
            backfilling_thresholds,
            azure_blob_storage_settings,
            delta_optimizer_rule,
            mqtt_settings,
            only_provide_metadata,
            sort_key_index,
            legacy_mode,
            js_stream_name,
            durable_consumer_name,
            iceberg_catalog,
        }
    }

    #[pyo3(signature = ())]
    fn delta_s3_storage_options(&self) -> PyResult<HashMap<String, String>> {
        let (bucket_name, _) = S3Scanner::deduce_bucket_and_path(self.path()?);
        let s3_settings_py: &Py<_> = self
            .aws_s3_settings
            .as_ref()
            .ok_or_else(|| {
                PyValueError::new_err("S3 connection settings weren't specified for S3 data source")
            })?
            .borrow();
        let s3_settings = s3_settings_py.get();

        let mut storage_options = HashMap::new();
        storage_options.insert("AWS_S3_ALLOW_UNSAFE_RENAME".to_string(), "True".to_string());

        let virtual_hosted_style_request_flag = {
            // Virtually hosted-style requests are mutually exclusive with path-style requests
            if s3_settings.with_path_style {
                "False".to_string()
            } else {
                "True".to_string()
            }
        };
        storage_options.insert(
            "AWS_VIRTUAL_HOSTED_STYLE_REQUEST".to_string(),
            virtual_hosted_style_request_flag,
        );
        storage_options.insert(
            "AWS_BUCKET_NAME".to_string(),
            s3_settings.final_bucket_name(bucket_name.as_deref())?,
        );

        if let Some(access_key) = &s3_settings.access_key {
            storage_options.insert("AWS_ACCESS_KEY_ID".to_string(), access_key.clone());
        }
        if let Some(secret_access_key) = &s3_settings.secret_access_key {
            storage_options.insert(
                "AWS_SECRET_ACCESS_KEY".to_string(),
                secret_access_key.clone(),
            );
        }
        if let Some(session_token) = &s3_settings.session_token {
            storage_options.insert("AWS_SESSION_TOKEN".to_string(), session_token.clone());
        }
        if let Some(profile) = &s3_settings.profile {
            storage_options.insert("AWS_PROFILE".to_string(), profile.clone());
        }

        if let s3::Region::Custom { endpoint, region } = &s3_settings.region {
            if endpoint.starts_with("https://") || endpoint.starts_with("http://") {
                storage_options.insert("AWS_ENDPOINT_URL".to_string(), endpoint.clone());
            } else {
                storage_options.insert(
                    "AWS_ENDPOINT_URL".to_string(),
                    format!("https://{endpoint}"),
                );
            }
            storage_options.insert("AWS_ALLOW_HTTP".to_string(), "True".to_string());
            storage_options.insert("AWS_STORAGE_ALLOW_HTTP".to_string(), "True".to_string());
            if region != endpoint {
                storage_options.insert("AWS_REGION".to_string(), region.clone());
            }
        } else {
            storage_options.insert("AWS_REGION".to_string(), s3_settings.region.to_string());
        }

        Ok(storage_options)
    }
}

#[pymethods]
#[allow(clippy::needless_pass_by_value)]
impl DataFormat {
    #[new]
    #[pyo3(signature = (
        format_type,
        key_field_names,
        value_fields,
        delimiter = None,
        table_name = None,
        column_paths = None,
        field_absence_is_error = true,
        parse_utf8 = true,
        debezium_db_type = DebeziumDBType::Postgres,
        session_type = SessionType::Native,
        value_field_index = None,
        key_generation_policy = KeyGenerationPolicy::PreferMessageKey,
        schema_registry_settings = None,
        subject = None,
        designated_timestamp_policy = None,
        external_diff_column_index = None,
        timestamp_unit = None,
        message_queue_key_field = None,
    ))]
    #[allow(clippy::too_many_arguments)]
    fn new(
        format_type: String,
        key_field_names: Option<Vec<String>>,
        value_fields: Vec<Py<ValueField>>,
        delimiter: Option<char>,
        table_name: Option<String>,
        column_paths: Option<HashMap<String, String>>,
        field_absence_is_error: bool,
        parse_utf8: bool,
        debezium_db_type: DebeziumDBType,
        session_type: SessionType,
        value_field_index: Option<usize>,
        key_generation_policy: KeyGenerationPolicy,
        schema_registry_settings: Option<PySchemaRegistrySettings>,
        subject: Option<String>,
        designated_timestamp_policy: Option<String>,
        external_diff_column_index: Option<usize>,
        timestamp_unit: Option<String>,
        message_queue_key_field: Option<String>,
    ) -> Self {
        DataFormat {
            format_type,
            key_field_names,
            value_fields,
            delimiter,
            table_name,
            column_paths,
            field_absence_is_error,
            parse_utf8,
            debezium_db_type,
            session_type,
            value_field_index,
            key_generation_policy,
            schema_registry_settings,
            subject,
            designated_timestamp_policy,
            external_diff_column_index,
            timestamp_unit,
            message_queue_key_field,
        }
    }

    fn is_native_session_used(&self) -> bool {
        matches!(self.session_type, SessionType::Native)
    }
}

#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen)]
pub struct IcebergCatalogSettings {
    type_: String,
    uri: Option<String>,
    warehouse: Option<String>,
    catalog_id: Option<String>,
    aws_settings: Option<AwsS3Settings>,
}

#[pymethods]
impl IcebergCatalogSettings {
    #[new]
    #[pyo3(signature = (
        type_,
        uri = None,
        warehouse = None,
        catalog_id = None,
        aws_settings = None,
    ))]
    pub fn new(
        type_: String,
        uri: Option<String>,
        warehouse: Option<String>,
        catalog_id: Option<String>,
        aws_settings: Option<AwsS3Settings>,
    ) -> Self {
        Self {
            type_,
            uri,
            warehouse,
            catalog_id,
            aws_settings,
        }
    }
}

impl IcebergCatalogSettings {
    fn expect_parameter(parameter: Option<&String>, name: &str) -> PyResult<String> {
        Ok(parameter
            .ok_or(PyValueError::new_err(format!(
                "expected parameter: '{name}'"
            )))?
            .clone())
    }

    fn insert_if_some(
        props: &mut HashMap<String, String>,
        key: &str,
        value: Option<&impl ToString>,
    ) {
        if let Some(v) = value {
            props.insert(key.to_string(), v.to_string());
        }
    }

    async fn create_rest_catalog(&self) -> PyResult<Box<dyn IcebergCatalog>> {
        let mut props = HashMap::from([(
            ::iceberg_catalog_rest::REST_CATALOG_PROP_URI.to_string(),
            Self::expect_parameter(self.uri.as_ref(), "uri")?,
        )]);
        Self::insert_if_some(
            &mut props,
            ::iceberg_catalog_rest::REST_CATALOG_PROP_WAREHOUSE,
            self.warehouse.as_ref(),
        );
        let catalog = ::iceberg_catalog_rest::RestCatalogBuilder::default()
            .load("rest", props)
            .await
            .map_err(|e| {
                PyValueError::new_err(format!("failed to initialize Iceberg catalog: {e}"))
            })?;
        Ok(Box::new(catalog))
    }

    async fn create_glue_catalog(&self) -> PyResult<Box<dyn IcebergCatalog>> {
        let mut props = HashMap::from([(
            ::iceberg_catalog_glue::GLUE_CATALOG_PROP_WAREHOUSE.to_string(),
            Self::expect_parameter(self.warehouse.as_ref(), "warehouse")?,
        )]);
        Self::insert_if_some(
            &mut props,
            ::iceberg_catalog_glue::GLUE_CATALOG_PROP_URI,
            self.uri.as_ref(),
        );
        Self::insert_if_some(
            &mut props,
            ::iceberg_catalog_glue::GLUE_CATALOG_PROP_CATALOG_ID,
            self.catalog_id.as_ref(),
        );
        if let Some(aws_settings) = &self.aws_settings {
            props.insert(
                ::iceberg_catalog_glue::AWS_REGION_NAME.to_string(),
                aws_settings.region.to_string(),
            );
            Self::insert_if_some(
                &mut props,
                ::iceberg_catalog_glue::AWS_ACCESS_KEY_ID,
                aws_settings.access_key.as_ref(),
            );
            Self::insert_if_some(
                &mut props,
                ::iceberg_catalog_glue::AWS_SECRET_ACCESS_KEY,
                aws_settings.secret_access_key.as_ref(),
            );
            Self::insert_if_some(
                &mut props,
                ::iceberg_catalog_glue::AWS_PROFILE_NAME,
                aws_settings.profile.as_ref(),
            );
            Self::insert_if_some(
                &mut props,
                ::iceberg_catalog_glue::AWS_SESSION_TOKEN,
                aws_settings.session_token.as_ref(),
            );
        }
        let catalog = ::iceberg_catalog_glue::GlueCatalogBuilder::default()
            .load("glue", props)
            .await
            .map_err(|e| {
                PyValueError::new_err(format!("failed to initialize Iceberg catalog: {e}"))
            })?;
        Ok(Box::new(catalog))
    }

    pub fn create(&self, runtime: &TokioRuntime) -> PyResult<Box<dyn IcebergCatalog>> {
        runtime.block_on(async {
            let catalog = match self.type_.as_str() {
                "rest" => self.create_rest_catalog().await,
                "glue" => self.create_glue_catalog().await,
                other => {
                    return Err(PyValueError::new_err(format!(
                        "unknown type of Iceberg catalog: {other}"
                    )))
                }
            };
            let catalog: Box<dyn IcebergCatalog> = catalog.map_err(|e| {
                PyValueError::new_err(format!("failed to initialize Iceberg catalog: {e}"))
            })?;
            Ok(catalog)
        })
    }
}

#[derive(Clone, Debug)]
#[pyclass(module = "pathway.engine", frozen)]
pub struct CsvParserSettings {
    pub delimiter: u8,
    pub quote: u8,
    pub escape: Option<u8>,
    pub enable_double_quote_escapes: bool,
    pub enable_quoting: bool,
    pub comment_character: Option<u8>,
}

#[pymethods]
impl CsvParserSettings {
    #[new]
    #[pyo3(signature = (
        delimiter = ',',
        quote = '"', // "
        escape = None,
        enable_double_quote_escapes = true,
        enable_quoting = true,
        comment_character = None,
    ))]
    pub fn new(
        delimiter: char,
        quote: char,
        escape: Option<char>,
        enable_double_quote_escapes: bool,
        enable_quoting: bool,
        comment_character: Option<char>,
    ) -> PyResult<CsvParserSettings> {
        let mut comment_character_ascii: Option<u8> = None;
        if let Some(comment_character) = comment_character {
            comment_character_ascii = Some(u8::try_from(comment_character).map_err(|_| {
                PyValueError::new_err(
                    "Comment character, if specified, should be an ASCII character",
                )
            })?);
        }

        Ok(CsvParserSettings {
            delimiter: u8::try_from(delimiter).map_err(|_| {
                PyValueError::new_err("Delimiter, if specified, should be an ASCII character")
            })?,
            quote: u8::try_from(quote).map_err(|_| {
                PyValueError::new_err("Quote, if specified, should be an ASCII character")
            })?,
            escape: escape.map(|escape| escape as u8),
            enable_double_quote_escapes,
            enable_quoting,
            comment_character: comment_character_ascii,
        })
    }
}

impl CsvParserSettings {
    fn build_csv_reader_builder(&self) -> CsvReaderBuilder {
        let mut builder = CsvReaderBuilder::new();
        builder
            .delimiter(self.delimiter)
            .quote(self.quote)
            .escape(self.escape)
            .double_quote(self.enable_double_quote_escapes)
            .quoting(self.enable_quoting)
            .comment(self.comment_character)
            .flexible(true)
            .has_headers(false);
        builder
    }
}

impl DataStorage {
    fn extract_string_field<'a>(
        field: Option<&'a String>,
        error_message: &'static str,
    ) -> PyResult<&'a str> {
        let value = field
            .as_ref()
            .ok_or_else(|| PyValueError::new_err(error_message))?
            .as_str();
        Ok(value)
    }

    fn path(&self) -> PyResult<&str> {
        Self::extract_string_field(
            self.path.as_ref(),
            "For fs/s3 storage, path must be specified",
        )
    }

    fn table_name(&self) -> PyResult<&str> {
        Self::extract_string_field(
            self.table_name.as_ref(),
            "For MongoDB or QuestDB, the 'table_name' field must be specified",
        )
    }

    fn database(&self) -> PyResult<&str> {
        Self::extract_string_field(
            self.database.as_ref(),
            "For MongoDB, the 'database' field must be specified",
        )
    }

    fn connection_string(&self) -> PyResult<&str> {
        Self::extract_string_field(
            self.connection_string.as_ref(),
            "For Postgres, MongoDB, and MySQL, the 'connection_string' field must be specified",
        )
    }

    fn azure_blob_storage_settings(&self) -> PyResult<AzureBlobStorageSettings> {
        let value = self
            .azure_blob_storage_settings
            .as_ref()
            .ok_or_else(|| {
                PyValueError::new_err(
                    "For Azure Blob Storage, 'azure_blob_storage_settings' field must be specified",
                )
            })?
            .clone();
        Ok(value)
    }

    fn s3_bucket(&self) -> PyResult<S3Bucket> {
        let (bucket_name, _) = S3Scanner::deduce_bucket_and_path(self.path()?);
        let bucket_py: &Py<_> = self
            .aws_s3_settings
            .as_ref()
            .ok_or_else(|| {
                PyValueError::new_err("For AWS storage, aws_s3_settings must be specified")
            })?
            .borrow();
        bucket_py.get().construct_bucket(bucket_name.as_deref())
    }

    fn mqtt_settings(&self) -> PyResult<MqttSettings> {
        self.mqtt_settings
            .as_ref()
            .ok_or_else(|| PyValueError::new_err("For MQTT, mqtt_settings must be specified"))
            .cloned()
    }

    fn downloader_threads_count(&self) -> PyResult<usize> {
        if let Some(count) = self.downloader_threads_count {
            Ok(count)
        } else {
            let estimated = std::thread::available_parallelism().map_err(|e| {
                PyRuntimeError::new_err(format!(
                    "Failed to estimate the number of parallel downloaders to use: {e}"
                ))
            })?;
            let result: usize = std::convert::Into::<usize>::into(estimated);
            info!("S3 downloader defaults to {estimated} threads");
            Ok(result)
        }
    }

    fn create_iceberg_catalog(&self, runtime: &TokioRuntime) -> PyResult<Box<dyn IcebergCatalog>> {
        self
            .iceberg_catalog
            .as_ref()
            .ok_or(PyValueError::new_err(
                "Iceberg catalog configuration must be specified in the 'iceberg_catalog' field of DataStorage",
            ))?
            .create(runtime)
    }

    fn kafka_client_config(&self) -> PyResult<ClientConfig> {
        let rdkafka_settings = self.rdkafka_settings.as_ref().ok_or_else(|| {
            PyValueError::new_err("For kafka input, rdkafka_settings must be specified")
        })?;

        let mut client_config = ClientConfig::new();
        client_config.set("ssl.ca.location", "probe");
        for (key, value) in rdkafka_settings {
            client_config.set(key, value);
        }

        // If the starting timestamp is given, the positions
        // within the topic partitions will be reset lazily
        if self.start_from_timestamp_ms.is_some() {
            client_config.set("auto.offset.reset", "earliest");
        }

        Ok(client_config)
    }

    fn message_queue_topic(&self) -> PyResult<MessageQueueTopic> {
        if let Some(topic) = &self.topic {
            if self.topic_name_index.is_some() {
                Err(PyValueError::new_err(
                    "Either 'topic' or 'topic_name_index' must be defined, but not both",
                ))
            } else {
                Ok(MessageQueueTopic::Fixed(topic.clone()))
            }
        } else if let Some(topic_name_index) = self.topic_name_index {
            Ok(MessageQueueTopic::Dynamic(topic_name_index))
        } else {
            Err(PyValueError::new_err(
                "Either 'topic' or 'topic_name_index' must be defined, but none is",
            ))
        }
    }

    fn message_queue_fixed_topic(&self) -> PyResult<String> {
        let topic = self.message_queue_topic()?;
        match topic {
            MessageQueueTopic::Fixed(t) => Ok(t),
            MessageQueueTopic::Dynamic(_) => Err(PyValueError::new_err(
                "Dynamic topics aren't supported in the readers",
            )),
        }
    }

    fn build_csv_parser_settings(&self) -> CsvReaderBuilder {
        if let Some(parser_settings) = &self.csv_parser_settings {
            parser_settings.build_csv_reader_builder()
        } else {
            let mut builder = CsvReaderBuilder::new();
            builder.has_headers(false);
            builder
        }
    }

    fn build_tokenizer_for_posix_like_read(&self, data_format: &DataFormat) -> Box<dyn Tokenize> {
        match data_format.format_type.as_ref() {
            "dsv" => Box::new(CsvTokenizer::new(self.build_csv_parser_settings())),
            _ => Box::new(BufReaderTokenizer::new(self.read_method)),
        }
    }

    fn construct_fs_reader(
        &self,
        scope: &Scope,
        data_format: &DataFormat,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        let scanner = FilesystemScanner::new(self.path()?, &self.object_pattern).map_err(|e| {
            PyIOError::new_err(format!("Failed to initialize Filesystem scanner: {e}"))
        })?;
        let storage = PosixLikeReader::new(
            Box::new(scanner),
            self.build_tokenizer_for_posix_like_read(data_format),
            self.mode,
            self.only_provide_metadata,
            scope.is_persisted,
        )
        .map_err(|e| PyIOError::new_err(format!("Failed to initialize Filesystem reader: {e}")))?;
        Ok((Box::new(storage), 1))
    }

    fn construct_s3_reader(
        &self,
        scope: &Scope,
        data_format: &DataFormat,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        let (_, deduced_path) = S3Scanner::deduce_bucket_and_path(self.path()?);
        let scanner = S3Scanner::new(
            self.s3_bucket()?,
            deduced_path,
            self.object_pattern.clone(),
            self.downloader_threads_count()?,
            self.mode.is_polling_enabled(),
        )
        .map_err(|e| PyIOError::new_err(format!("Failed to initialize S3 scanner: {e}")))?;
        let storage = PosixLikeReader::new(
            Box::new(scanner),
            self.build_tokenizer_for_posix_like_read(data_format),
            self.mode,
            self.only_provide_metadata,
            scope.is_persisted,
        )
        .map_err(|e| PyRuntimeError::new_err(format!("Creating S3 reader failed: {e}")))?;
        Ok((Box::new(storage), 1))
    }

    /// Returns the total number of partitions for a Kafka topic
    fn total_partitions_for_topic(consumer: &BaseConsumer, topic: &str) -> PyResult<usize> {
        let metadata = consumer
            .fetch_metadata(Some(topic), KafkaReader::default_timeout())
            .map_err(|e| PyIOError::new_err(format!("Failed to fetch topic metadata: {e}")))?;
        if let Some(topic) = metadata.topics().iter().find(|t| t.name() == topic) {
            Ok(topic.partitions().len())
        } else {
            Err(PyIOError::new_err(format!("Topic '{topic}' not found")))
        }
    }

    /// Returns an array of partition watermarks.
    /// Used to handle cases where a later call to `offsets_for_times`
    /// might return `KafkaOffset::End` for some partitions, allowing for graceful handling.
    /// Also used in static mode to identify the boundaries of the data chunk that needs to be read.
    fn kafka_partition_watermarks(
        consumer: &BaseConsumer,
        topic: &str,
        total_partitions: usize,
    ) -> PyResult<Vec<RdkafkaWatermark>> {
        let mut next_used_offset_per_partition = Vec::with_capacity(total_partitions);
        for partition_idx in 0..total_partitions {
            let (start_offset, next_offset) = consumer
                .fetch_watermarks(
                    topic,
                    partition_idx.try_into().unwrap(),
                    KafkaReader::default_timeout(),
                )
                .map_err(|e| {
                    PyIOError::new_err(format!(
                        "Failed to fetch watermarks for ({topic}, {partition_idx}): {e}"
                    ))
                })?;
            next_used_offset_per_partition.push(RdkafkaWatermark::new(start_offset, next_offset));
        }
        Ok(next_used_offset_per_partition)
    }

    fn kafka_seek_positions_for_timestamp(
        consumer: &BaseConsumer,
        topic: &str,
        total_partitions: usize,
        start_from_timestamp_ms: i64,
        watermarks: &[RdkafkaWatermark],
    ) -> PyResult<HashMap<i32, KafkaOffset>> {
        let mut seek_positions = HashMap::new();
        let mut tpl = TopicPartitionList::new();
        for partition_idx in 0..total_partitions {
            tpl.add_partition_offset(
                topic,
                partition_idx.try_into().unwrap(),
                KafkaOffset::Offset(start_from_timestamp_ms),
            )
            .expect("Failed to add partition offset");
        }

        let offsets = consumer
            .offsets_for_times(tpl, KafkaReader::default_timeout())
            .map_err(|e| {
                PyIOError::new_err(format!("Failed to fetch offsets for the timestamp: {e}"))
            })?;

        // We could have done a simple `consumer.assign` here, but it would damage the automatic consumer rebalance
        // So we act differently: we pass the seek positions to consumer, and it seeks lazily
        for element in offsets.elements() {
            assert_eq!(element.topic(), topic);
            let offset = match element.offset() {
                KafkaOffset::Invalid => {
                    return Err(PyRuntimeError::new_err(format!(
                        "rdkafka returned invalid offset, details: {offsets:?}"
                    )))
                }
                KafkaOffset::End => {
                    let partition_idx: usize = element.partition().try_into().unwrap();
                    info!("Partition {partition_idx} doesn't have messages with timestamp greater than {start_from_timestamp_ms}. All new messages will be read.");
                    KafkaOffset::Offset(watermarks[partition_idx].high)
                }
                offset => offset,
            };
            info!(
                "Adding a lazy seek position for ({topic}, {}) to ({:?})",
                element.partition(),
                offset
            );
            seek_positions.insert(element.partition(), offset);
        }
        Ok(seek_positions)
    }

    fn construct_kafka_reader(
        &self,
        scope: &Scope,
        properties: &ConnectorProperties,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        let client_config = self.kafka_client_config()?;

        let consumer: BaseConsumer = client_config
            .create()
            .map_err(|e| PyValueError::new_err(format!("Creating Kafka consumer failed: {e}")))?;

        let topic = &self.message_queue_fixed_topic()?;
        consumer
            .subscribe(&[topic])
            .map_err(|e| PyIOError::new_err(format!("Subscription to Kafka topic failed: {e}")))?;

        let total_partitions = Self::total_partitions_for_topic(&consumer, topic)?;
        let watermarks = Self::kafka_partition_watermarks(&consumer, topic, total_partitions)?;

        let mut seek_positions = HashMap::new();
        if let Some(start_from_timestamp_ms) = self.start_from_timestamp_ms {
            let current_timestamp = current_unix_timestamp_ms();
            if start_from_timestamp_ms > current_timestamp.try_into().unwrap() {
                warn!("The timestamp {start_from_timestamp_ms} is greater than the current timestamp {current_timestamp}. All new entries will be read.");
            }
            seek_positions = Self::kafka_seek_positions_for_timestamp(
                &consumer,
                topic,
                total_partitions,
                start_from_timestamp_ms,
                &watermarks,
            )?;
        }
        let reader = KafkaReader::new(
            consumer,
            topic.clone(),
            seek_positions,
            watermarks,
            self.mode,
        );

        Ok((
            Box::new(reader),
            std::cmp::min(
                properties.max_parallel_readers(scope),
                self.parallel_readers.unwrap_or(NO_PARALLEL_READERS_LIMIT),
            ),
        ))
    }

    fn construct_python_reader(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        let subject = self
            .python_subject
            .as_ref()
            .map(|ps| ps.clone_ref(py))
            .ok_or_else(|| {
                PyValueError::new_err("For Python connector, python_subject should be specified")
            })?;

        if subject.borrow(py).is_internal && self.unique_name.is_some() {
            return Err(PyValueError::new_err(
                "Python connectors marked internal can't have unique names",
            ));
        }

        let reader = PythonReaderBuilder::new(subject, data_format.value_fields_type_map(py));
        Ok((Box::new(reader), 1))
    }

    fn construct_sqlite_reader(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        let connection = SqliteConnection::open_with_flags(
            self.path()?,
            SqliteOpenFlags::SQLITE_OPEN_READ_ONLY | SqliteOpenFlags::SQLITE_OPEN_NO_MUTEX,
        )
        .map_err(|e| PyRuntimeError::new_err(format!("Failed to open Sqlite connection: {e}")))?;
        let table_name = self.table_name.clone().ok_or_else(|| {
            PyValueError::new_err("For Sqlite connector, table_name should be specified")
        })?;

        let reader = SqliteReader::new(
            connection,
            table_name,
            data_format.value_fields_type_map(py).into_iter().collect(),
        );
        Ok((Box::new(reader), 1))
    }

    fn object_downloader(&self) -> PyResult<ObjectDownloader> {
        if self.aws_s3_settings.is_some() {
            Ok(ObjectDownloader::S3(Box::new(self.s3_bucket()?)))
        } else {
            Ok(ObjectDownloader::Local)
        }
    }

    fn delta_storage_options(&self) -> PyResult<HashMap<String, String>> {
        if self.aws_s3_settings.is_some() {
            self.delta_s3_storage_options()
        } else {
            Ok(HashMap::new())
        }
    }

    fn construct_deltalake_reader(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        scope: &Scope,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        if let Some(license) = scope.license.as_ref() {
            license.check_entitlements(["deltalake"])?;
        }
        let backfilling_thresholds = self.backfilling_thresholds.clone().unwrap_or_default();

        if self.start_from_timestamp_ms.is_some() && !backfilling_thresholds.is_empty() {
            return Err(PyValueError::new_err("The simultaneous use of 'start_from_timestamp_ms' and 'backfilling_thresholds' is not supported."));
        }
        let reader = DeltaTableReader::new(
            self.path()?,
            self.object_downloader()?,
            self.delta_storage_options()?,
            data_format.value_fields_type_map(py),
            self.mode,
            self.start_from_timestamp_ms,
            data_format.key_field_names.is_some(),
            backfilling_thresholds,
        )
        .map_err(|e| PyIOError::new_err(format!("Failed to connect to DeltaLake: {e}")))?;
        Ok((Box::new(reader), 1))
    }

    fn construct_nats_reader(
        &self,
        py: pyo3::Python,
        scope: &Scope,
        properties: &ConnectorProperties,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        let uri = self.path()?;
        let topic: String = self.message_queue_fixed_topic()?.clone();
        let runtime = create_async_tokio_runtime()?;
        let connector_index = *scope.total_connectors.get(py).borrow();
        let readers_group_name = format!("pathway-reader-{connector_index}");

        let poller = if let Some(js_stream_name) = &self.js_stream_name {
            let messages = runtime.block_on(async {
                let client = async_nats::connect(uri)
                    .await
                    .map_err(|e| PyIOError::new_err(format!("Failed to connect: {e}")))?;

                let js = jetstream::new(client);
                let stream = js
                    .get_stream(js_stream_name)
                    .await
                    .map_err(|e| PyIOError::new_err(format!("JetStream stream not found: {e}")))?;

                let consumer = if let Some(durable_consumer_name) = &self.durable_consumer_name {
                    stream
                        .get_consumer(durable_consumer_name)
                        .await
                        .map_err(|e| {
                            PyIOError::new_err(
                                format!(
                                    "Failed to retrieve the specified pull consumer '{durable_consumer_name}': {e}"
                                )
                            )
                        })?
                } else {
                    stream
                        .get_or_create_consumer(
                            &readers_group_name.clone(),
                            async_nats::jetstream::consumer::pull::Config {
                                durable_name: Some(readers_group_name),
                                ..Default::default()
                            },
                        )
                        .await
                        .map_err(|e| {
                            PyIOError::new_err(format!("Failed to create JetStream consumer: {e}"))
                        })?
                };

                let messages = consumer.stream().messages().await.map_err(|e| {
                    PyIOError::new_err(format!("Failed to request JetStream messages: {e}"))
                })?;

                Ok::<_, PyErr>(messages)
            })?;
            nats::NatsPoller::JetStream(Box::new(messages))
        } else {
            let subscriber = runtime.block_on(async {
                let client = nats_connect(uri)
                    .await
                    .map_err(|e| PyIOError::new_err(format!("Failed to connect to NATS: {e}")))?;
                let subscriber = client
                    .queue_subscribe(topic.clone(), readers_group_name) // Kafka "consumer group" equivalent to enable parallel reads
                    .await
                    .map_err(|e| {
                        PyIOError::new_err(format!("Failed to subscribe to NATS topic: {e}"))
                    })?;
                Ok::<NatsSubscriber, PyErr>(subscriber)
            })?;
            nats::NatsPoller::Simple(subscriber)
        };
        let reader = NatsReader::new(runtime, poller, scope.worker_index(), topic);
        Ok((Box::new(reader), properties.max_parallel_readers(scope)))
    }

    fn construct_iceberg_reader(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        scope: &Scope,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        if data_format.key_field_names.is_none() {
            return Err(PyValueError::new_err(
                "Iceberg reader requires explicit primary key fields specification",
            ));
        }

        if let Some(license) = scope.license.as_ref() {
            license.check_entitlements(["iceberg"])?;
        }

        let table_name = self.table_name()?;
        let namespace = self
            .namespace
            .clone()
            .ok_or_else(|| PyValueError::new_err("Namespace must be specified"))?;
        let value_fields = data_format.value_fields_vec(py);

        let runtime = create_async_tokio_runtime()
            .map_err(|e| PyRuntimeError::new_err(format!("Failed to create async runtime: {e}")))?;
        let catalog = self.create_iceberg_catalog(&runtime)?;
        let table_params = IcebergTableParams::new(
            table_name.to_string(),
            &value_fields,
            ArrowTimeUnit::Microsecond, // used only for schema generation, can be arbitrary here
        )
        .map_err(|e| {
            PyIOError::new_err(format!(
                "Unable to create table params for Iceberg reader: {e}"
            ))
        })?;
        let reader = IcebergReader::new(
            runtime,
            catalog,
            &namespace,
            &table_params,
            data_format.value_fields_type_map(py),
            self.mode,
        )
        .map_err(|e| {
            PyIOError::new_err(format!("Unable to start data lake input connector: {e}"))
        })?;

        Ok((Box::new(reader), 1))
    }

    fn construct_mqtt_reader(&self) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        let uri = self.path()?;
        let settings = self.mqtt_settings()?;
        let topic: String = self.message_queue_fixed_topic()?.clone();
        let connection_options = MqttOptions::parse_url(uri)
            .map_err(|e| PyValueError::new_err(format!("Incorrect MQTT URI: {e}")))?;
        let (client, mut connection) =
            MqttClient::new(connection_options, MQTT_CLIENT_MAX_CHANNEL_SIZE);
        client.subscribe(topic, settings.qos).map_err(|e| {
            PyIOError::new_err(format!(
                "Failed to establish connection with MQTT broker: {e}"
            ))
        })?;

        // Wait for the subscription acknowledgement from the broker
        loop {
            let maybe_event = connection
                .recv_timeout(std::time::Duration::from_secs(1))
                .map_err(|e| {
                    PyRuntimeError::new_err(format!("MQTT broker connection timeout: {e:?}"))
                })?;
            let event = maybe_event.map_err(|e| {
                PyRuntimeError::new_err(format!(
                    "Failed to receive subscription confirmation from MQTT broker: {e:?}"
                ))
            })?;
            if matches!(event, MqttEvent::Incoming(MqttPacket::SubAck(_))) {
                break;
            }
        }

        Ok((Box::new(MqttReader::new(connection)), 1))
    }

    fn construct_kinesis_reader(
        &self,
        scope: &Scope,
        properties: &ConnectorProperties,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        if let Some(license) = scope.license.as_ref() {
            license.check_entitlements(["kinesis"])?;
        }

        let runtime = create_async_tokio_runtime()
            .map_err(|e| PyRuntimeError::new_err(format!("Failed to create async runtime: {e}")))?;
        let config = runtime.block_on(async { ::aws_config::load_from_env().await });
        let topic = &self.message_queue_fixed_topic()?;
        let client = KinesisClient::new(&config);
        let refresh_duration = properties.commit_duration_ms.as_ref().map_or_else(
            || {
                Err(PyValueError::new_err(
                    "autocommit_duration_ms must be set for Kinesis input connector",
                ))
            },
            |refresh_duration| Ok(::std::time::Duration::from_millis(*refresh_duration)),
        )?;

        let reader = KinesisReader::new(
            runtime,
            client,
            topic.clone(),
            scope.worker_index(),
            scope.worker_count(),
            refresh_duration,
        )
        .map_err(|e| PyRuntimeError::new_err(format!("Failed to create Kinesis reader: {e}")))?;

        Ok((Box::new(reader), properties.max_parallel_readers(scope)))
    }

    fn construct_reader(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        scope: &Scope,
        properties: &ConnectorProperties,
    ) -> PyResult<(Box<dyn ReaderBuilder>, usize)> {
        match self.storage_type.as_ref() {
            "fs" => self.construct_fs_reader(scope, data_format),
            "s3" => self.construct_s3_reader(scope, data_format),
            "kafka" => self.construct_kafka_reader(scope, properties),
            "python" => self.construct_python_reader(py, data_format),
            "sqlite" => self.construct_sqlite_reader(py, data_format),
            "deltalake" => self.construct_deltalake_reader(py, data_format, scope),
            "nats" => self.construct_nats_reader(py, scope, properties),
            "iceberg" => self.construct_iceberg_reader(py, data_format, scope),
            "mqtt" => self.construct_mqtt_reader(),
            "kinesis" => self.construct_kinesis_reader(scope, properties),
            other => Err(PyValueError::new_err(format!(
                "Unknown data source {other:?}"
            ))),
        }
    }

    fn construct_persistent_storage_config(&self) -> PyResult<PersistentStorageConfig> {
        match self.storage_type.as_ref() {
            "fs" => Ok(PersistentStorageConfig::Filesystem(self.path()?.into())),
            "s3" => {
                let bucket = self.s3_bucket()?;
                let path = self.path()?;
                Ok(PersistentStorageConfig::S3 {
                    bucket: Box::new(bucket),
                    root_path: path.into(),
                })
            }
            "azure" => {
                let path = self.path()?;
                let azure_settings = self.azure_blob_storage_settings()?;
                Ok(PersistentStorageConfig::Azure {
                    credentials: azure_settings.credentials(),
                    account: azure_settings.account,
                    container: azure_settings.container,
                    root_path: path.into(),
                })
            }
            "mock" => {
                let mut events = HashMap::<ConnectorWorkerPair, Vec<SnapshotEvent>>::new();
                for ((unique_name, worker_id), es) in self.mock_events.as_ref().unwrap() {
                    let internal_persistent_id = unique_name.clone().into_persistent_id();
                    events.insert((internal_persistent_id, *worker_id), es.clone());
                }
                Ok(PersistentStorageConfig::Mock(events))
            }
            other => Err(PyValueError::new_err(format!(
                "Unsupported persistent storage format: {other:?}"
            ))),
        }
    }

    fn construct_fs_writer(&self) -> PyResult<Box<dyn Writer>> {
        let path = self.path()?;
        let storage = {
            let file = File::create(path);
            match file {
                Ok(f) => {
                    let buf_writer = BufWriter::new(f);
                    FileWriter::new(buf_writer, path.to_string())
                }
                Err(e) => {
                    return Err(PyIOError::new_err(format!(
                        "Filesystem operation (create) failed: {e}"
                    )))
                }
            }
        };
        Ok(Box::new(storage))
    }

    fn construct_kafka_writer(&self) -> PyResult<Box<dyn Writer>> {
        let client_config = self.kafka_client_config()?;

        let producer: ThreadedProducer<DefaultProducerContext> = match client_config.create() {
            Ok(producer) => producer,
            Err(e) => return Err(PyIOError::new_err(format!("Producer creation failed: {e}"))),
        };

        let topic = self.message_queue_topic()?;
        let writer = KafkaWriter::new(
            producer,
            topic,
            self.header_fields.clone(),
            self.key_field_index,
        );

        Ok(Box::new(writer))
    }

    fn construct_postgres_writer(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
    ) -> PyResult<Box<dyn Writer>> {
        let connection_string = self.connection_string()?;
        let storage = match Client::connect(connection_string, NoTls) {
            Ok(client) => PsqlWriter::new(
                client,
                self.max_batch_size,
                self.snapshot_maintenance_on_output,
                self.table_name()?,
                &data_format.value_fields_vec(py),
                data_format.key_field_names.as_deref(),
                self.table_writer_init_mode,
                self.legacy_mode,
            )
            .map_err(|e| {
                PyValueError::new_err(format!("Unable to initialize PostgreSQL table: {e}"))
            })?,
            Err(e) => {
                return Err(PyIOError::new_err(format!(
                    "Failed to establish PostgreSQL connection: {e:?}"
                )))
            }
        };
        Ok(Box::new(storage))
    }

    fn construct_elasticsearch_writer(
        &self,
        py: pyo3::Python,
        license: Option<&License>,
    ) -> PyResult<Box<dyn Writer>> {
        if let Some(license) = license {
            license.check_entitlements(["elasticsearch"])?;
        }

        let elasticsearch_client_params_py: &Py<_> = self
            .elasticsearch_params
            .as_ref()
            .ok_or_else(|| {
                PyValueError::new_err(
                    "For elastic search output, elasticsearch_params section must be specified",
                )
            })?
            .borrow();
        let elasticsearch_client_params = elasticsearch_client_params_py.get();
        let client = elasticsearch_client_params.client(py)?;
        let index_name = elasticsearch_client_params.index_name.clone();
        let max_batch_size = self.max_batch_size;

        let writer = ElasticSearchWriter::new(client, index_name, max_batch_size);
        Ok(Box::new(writer))
    }

    fn construct_deltalake_writer(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        license: Option<&License>,
    ) -> PyResult<Box<dyn Writer>> {
        if let Some(license) = license {
            license.check_entitlements(["deltalake"])?;
        }

        let path = self.path()?;
        let value_fields = data_format.value_fields_vec(py);
        let partition_columns = self.partition_columns.clone().unwrap_or_default();

        let table_type = if self.snapshot_maintenance_on_output {
            MaintenanceMode::Snapshot
        } else {
            MaintenanceMode::StreamOfChanges
        };
        let batch_writer = DeltaBatchWriter::new(
            path,
            &value_fields,
            self.delta_storage_options()?,
            partition_columns,
            table_type,
            self.delta_optimizer_rule
                .clone()
                .map(PyDeltaOptimizerRule::into_inner),
        )
        .map_err(|e| {
            let error_text = format!("Unable to create DeltaTable writer: {e}");
            match e {
                WriteError::DeltaTableSchemaMismatch(_) => PyTypeError::new_err(error_text),
                _ => PyIOError::new_err(error_text),
            }
        })?;

        let schema = construct_arrow_schema(&value_fields, &batch_writer, table_type)
            .map_err(|e| PyIOError::new_err(format!("Failed to construct table schema: {e}")))?;
        let buffer: Box<dyn ColumnBuffer> = if self.snapshot_maintenance_on_output {
            Box::new(
                SnapshotColumnBuffer::new_for_delta_table(
                    path,
                    self.delta_storage_options()?,
                    &value_fields,
                    Arc::new(schema),
                )
                .map_err(|e| {
                    PyIOError::new_err(format!("Failed to create snapshot writer: {e}"))
                })?,
            )
        } else {
            Box::new(AppendOnlyColumnBuffer::new(Arc::new(schema)))
        };
        let writer = LakeWriter::new(
            Box::new(batch_writer),
            buffer,
            self.min_commit_frequency.map(time::Duration::from_millis),
        );
        Ok(Box::new(writer))
    }

    fn construct_iceberg_writer(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        license: Option<&License>,
    ) -> PyResult<Box<dyn Writer>> {
        if let Some(license) = license {
            license.check_entitlements(["iceberg"])?;
        }

        if self.snapshot_maintenance_on_output {
            return Err(PyNotImplementedError::new_err(
                "Snapshot mode is not implemented for Apache Iceberg output",
            ));
        }

        let table_name = self.table_name()?;
        let namespace = self
            .namespace
            .clone()
            .ok_or_else(|| PyValueError::new_err("Namespace must be specified"))?;
        let value_fields = data_format.value_fields_vec(py);

        let runtime = create_async_tokio_runtime()
            .map_err(|e| PyRuntimeError::new_err(format!("Failed to create async runtime: {e}")))?;
        let catalog = self.create_iceberg_catalog(&runtime)?;
        let table_params = IcebergTableParams::new(
            table_name.to_string(),
            &value_fields,
            data_format.arrow_time_unit()?,
        )
        .map_err(|e| {
            PyIOError::new_err(format!(
                "Unable to create table params for Iceberg writer: {e}"
            ))
        })?;
        let batch_writer = IcebergBatchWriter::new(runtime, catalog, &namespace, &table_params)
            .map_err(|e| {
                PyIOError::new_err(format!(
                    "Unable to create batch writer for Iceberg writer: {e}"
                ))
            })?;
        let schema = construct_arrow_schema(
            &value_fields,
            &batch_writer,
            MaintenanceMode::StreamOfChanges, // Snapshot mode is not implemented for Iceberg
        )
        .map_err(|e| PyIOError::new_err(format!("Failed to construct table schema: {e}")))?;
        let buffer = AppendOnlyColumnBuffer::new(Arc::new(schema));
        let writer = LakeWriter::new(
            Box::new(batch_writer),
            Box::new(buffer),
            self.min_commit_frequency.map(time::Duration::from_millis),
        );
        Ok(Box::new(writer))
    }

    fn construct_nats_writer(&self) -> PyResult<Box<dyn Writer>> {
        let uri = self.path()?;
        let runtime = create_async_tokio_runtime()?;
        let client = runtime.block_on(async {
            let client = nats_connect(uri)
                .await
                .map_err(|e| PyIOError::new_err(format!("Failed to connect to NATS: {e}")))?;
            Ok::<NatsClient, PyErr>(client)
        })?;
        let accessor: Box<dyn nats::WriteAccessor> = if self.js_stream_name.is_some() {
            Box::new(nats::JetStreamWriteAccessor::new(client))
        } else {
            Box::new(nats::SimpleWriteAccessor::new(client))
        };
        let writer = NatsWriter::new(
            runtime,
            accessor,
            self.message_queue_topic()?,
            self.header_fields.clone(),
        );
        Ok(Box::new(writer))
    }

    fn construct_mongodb_writer(&self) -> PyResult<Box<dyn Writer>> {
        let uri = self.connection_string()?;
        let client = MongoClient::with_uri_str(uri)
            .map_err(|e| PyIOError::new_err(format!("Failed to connect to MongoDB: {e}")))?;
        let database = client.database(self.database()?);
        let collection = database.collection(self.table_name()?);
        let writer = MongoWriter::new(collection, self.max_batch_size);
        Ok(Box::new(writer))
    }

    fn construct_mqtt_writer(&self) -> PyResult<Box<dyn Writer>> {
        let uri = self.path()?;
        let topic = self.message_queue_topic()?;
        let settings = self.mqtt_settings()?;
        let connection_options = MqttOptions::parse_url(uri)
            .map_err(|e| PyValueError::new_err(format!("Incorrect MQTT URI: {e}")))?;
        let (client, eventloop) = MqttClient::new(connection_options, MQTT_CLIENT_MAX_CHANNEL_SIZE);
        let writer = MqttWriter::new(client, eventloop, topic, settings.qos, settings.retain);
        Ok(Box::new(writer))
    }

    fn construct_questdb_writer(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        license: Option<&License>,
    ) -> PyResult<Box<dyn Writer>> {
        if let Some(license) = license {
            license.check_entitlements(["questdb"])?;
        }

        let uri = self.path()?;
        let sender = QuestDBSender::from_conf(uri)
            .map_err(|e| PyValueError::new_err(format!("Failed to create QuestDB sender: {e}")))?;
        let table_name = self.table_name()?;
        let value_fields: Vec<_> = data_format.value_field_names(py);
        let designated_timestamp_policy_name = data_format
            .designated_timestamp_policy
            .clone()
            .ok_or_else(|| {
                PyValueError::new_err(
                    "For QuestDB, 'designated_timestamp_policy' must be specified",
                )
            })?;
        let designated_timestamp_policy = match designated_timestamp_policy_name.as_str() {
            "use_now" => QuestDBAtColumnPolicy::UseNow,
            "use_pathway_time" => QuestDBAtColumnPolicy::UsePathwayTime,
            "use_column" => {
                let column_id = self
                    .key_field_index
                    .ok_or_else(
                        || PyValueError::new_err(
                            "For QuestDB, if 'designated_timestamp_policy' is 'use_column', the 'key_column_index' must be specified"
                        ))?;
                if column_id >= value_fields.len() {
                    return Err(PyValueError::new_err(format!(
                        "Column index {column_id} is outside the total count: {}",
                        value_fields.len()
                    )));
                }
                let designated_timestamp_type =
                    data_format.value_fields[column_id].borrow(py).type_.clone();
                if designated_timestamp_type != Type::DateTimeNaive
                    && designated_timestamp_type != Type::DateTimeUtc
                {
                    return Err(PyValueError::new_err(format!(
                        "at column doesn't have the timestamp type: {designated_timestamp_type}"
                    )));
                }
                QuestDBAtColumnPolicy::UseColumn(column_id)
            }
            _ => {
                return Err(PyValueError::new_err(format!(
                    "Unknown 'designated_timestamp_policy': {designated_timestamp_policy_name}"
                )))
            }
        };
        let writer = QuestDBWriter::new(
            sender,
            table_name.to_string(),
            value_fields,
            designated_timestamp_policy,
        )
        .map_err(|e| PyValueError::new_err(format!("Failed to create QuestDB writer: {e}")))?;
        Ok(Box::new(writer))
    }

    fn construct_dynamodb_writer(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        license: Option<&License>,
    ) -> PyResult<Box<dyn Writer>> {
        if let Some(license) = license {
            license.check_entitlements(["dynamodb"])?;
        }

        let runtime = create_async_tokio_runtime()
            .map_err(|e| PyRuntimeError::new_err(format!("Failed to create async runtime: {e}")))?;
        let config = runtime.block_on(async { ::aws_config::load_from_env().await });
        let table_name = self.table_name()?;

        let client = DynamoDBClient::new(&config);
        let writer = DynamoDBWriter::new(
            runtime,
            client,
            table_name.to_string(),
            data_format.value_fields_vec(py),
            self.key_field_index
                .ok_or_else(|| PyValueError::new_err("'key_field_index' must be specified"))?,
            self.sort_key_index,
            self.table_writer_init_mode,
        )
        .map_err(|e| PyValueError::new_err(format!("Failed to create DynamoDB writer: {e}")))?;

        Ok(Box::new(writer))
    }

    fn construct_kinesis_writer(&self, license: Option<&License>) -> PyResult<Box<dyn Writer>> {
        if let Some(license) = license {
            license.check_entitlements(["kinesis"])?;
        }

        let runtime = create_async_tokio_runtime()
            .map_err(|e| PyRuntimeError::new_err(format!("Failed to create async runtime: {e}")))?;
        let config = runtime.block_on(async { ::aws_config::load_from_env().await });
        let client = KinesisClient::new(&config);
        let stream_name = self.message_queue_topic()?;

        let writer = KinesisWriter::new(runtime, client, stream_name, self.key_field_index)
            .map_err(|e| PyValueError::new_err(format!("Failed to create Kinesis writer: {e}")))?;

        Ok(Box::new(writer))
    }

    fn construct_mysql_writer(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        license: Option<&License>,
    ) -> PyResult<Box<dyn Writer>> {
        if let Some(license) = license {
            license.check_entitlements(["mysql"])?;
        }
        let connection_string = self.connection_string()?;
        let pool = MysqlConnectionPool::new(connection_string).map_err(|e| {
            PyRuntimeError::new_err(format!("Failed to create MySQL connection pool: {e}"))
        })?;
        let writer = MysqlWriter::new(
            pool,
            self.max_batch_size,
            self.snapshot_maintenance_on_output,
            self.table_name()?,
            &data_format.value_fields_vec(py),
            data_format.key_field_names.as_deref(),
            self.table_writer_init_mode,
        )
        .map_err(|e| PyValueError::new_err(format!("Failed to initialize MySQL writer: {e}")))?;

        Ok(Box::new(writer))
    }

    fn construct_writer(
        &self,
        py: pyo3::Python,
        data_format: &DataFormat,
        license: Option<&License>,
    ) -> PyResult<Box<dyn Writer>> {
        match self.storage_type.as_ref() {
            "fs" => self.construct_fs_writer(),
            "kafka" => self.construct_kafka_writer(),
            "postgres" => self.construct_postgres_writer(py, data_format),
            "elasticsearch" => self.construct_elasticsearch_writer(py, license),
            "deltalake" => self.construct_deltalake_writer(py, data_format, license),
            "mongodb" => self.construct_mongodb_writer(),
            "null" => Ok(Box::new(NullWriter::new())),
            "nats" => self.construct_nats_writer(),
            "iceberg" => self.construct_iceberg_writer(py, data_format, license),
            "mqtt" => self.construct_mqtt_writer(),
            "questdb" => self.construct_questdb_writer(py, data_format, license),
            "dynamodb" => self.construct_dynamodb_writer(py, data_format, license),
            "kinesis" => self.construct_kinesis_writer(license),
            "mysql" => self.construct_mysql_writer(py, data_format, license),
            other => Err(PyValueError::new_err(format!(
                "Unknown data sink {other:?}"
            ))),
        }
    }
}

impl DataFormat {
    pub fn value_fields_type_map(&self, py: pyo3::Python) -> HashMap<String, Type> {
        self.value_fields
            .iter()
            .map(|field| {
                let f = field.borrow(py);
                (f.name.clone(), f.type_.clone())
            })
            .collect()
    }

    fn value_fields_vec(&self, py: pyo3::Python) -> Vec<ValueField> {
        self.value_fields
            .iter()
            .map(|field| field.borrow(py).clone())
            .collect()
    }

    fn value_field_names(&self, py: pyo3::Python) -> Vec<String> {
        self.value_fields
            .iter()
            .map(|field| field.borrow(py).name.clone())
            .collect()
    }

    fn construct_dsv_settings(&self, py: pyo3::Python) -> PyResult<DsvSettings> {
        let Some(delimiter) = &self.delimiter else {
            return Err(PyValueError::new_err(
                "For dsv format, delimiter must be specified",
            ));
        };

        Ok(DsvSettings::new(
            self.key_field_names.clone(),
            self.value_field_names(py),
            *delimiter,
        ))
    }

    fn schema(&self, py: pyo3::Python) -> PyResult<HashMap<String, InnerSchemaField>> {
        let mut types = HashMap::new();
        for field in &self.value_fields {
            let borrowed_field = field.borrow(py);
            types.insert(
                borrowed_field.name.clone(),
                borrowed_field.as_inner_schema_field(),
            );
        }
        for name in self.key_field_names.as_ref().unwrap_or(&vec![]) {
            if !types.contains_key(name) {
                return Err(PyValueError::new_err(format!(
                    "key field {name} not found in schema"
                )));
            }
        }
        Ok(types)
    }

    fn arrow_time_unit(&self) -> PyResult<ArrowTimeUnit> {
        let unit = self
            .timestamp_unit
            .as_ref()
            .ok_or(PyValueError::new_err(
                "expected time unit, but it is not specified",
            ))?
            .as_ref();
        match unit {
            "us" => Ok(ArrowTimeUnit::Microsecond),
            "ns" => Ok(ArrowTimeUnit::Nanosecond),
            other => Err(PyValueError::new_err(format!(
                "unknown time unit: '{other}'"
            ))),
        }
    }

    fn construct_parser(&self, py: pyo3::Python) -> PyResult<Box<dyn Parser>> {
        match self.format_type.as_ref() {
            "dsv" => {
                let settings = self.construct_dsv_settings(py)?;
                Ok(settings.parser(self.schema(py)?)?)
            }
            "debezium" => {
                let parser = DebeziumMessageParser::new(
                    self.key_field_names.clone(),
                    self.value_field_names(py),
                    DebeziumMessageParser::standard_separator(),
                    self.debezium_db_type,
                );
                Ok(Box::new(parser))
            }
            "jsonlines" => {
                let parser = JsonLinesParser::new(
                    self.key_field_names.as_deref(),
                    self.value_fields_vec(py),
                    self.column_paths.clone().unwrap_or_default(),
                    self.field_absence_is_error,
                    self.schema(py)?,
                    self.session_type,
                    self.schema_registry_settings
                        .clone()
                        .map(PySchemaRegistrySettings::build_decoder)
                        .transpose()?,
                )?;
                Ok(Box::new(parser))
            }
            "identity" => Ok(Box::new(IdentityParser::new(
                self.value_field_names(py).as_slice(),
                self.parse_utf8,
                self.message_queue_key_field.as_ref(),
                self.key_generation_policy,
                self.session_type,
            ))),
            "transparent" => Ok(Box::new(TransparentParser::new(
                self.key_field_names.clone(),
                self.value_field_names(py),
                self.schema(py)?,
                self.session_type,
            )?)),
            _ => Err(PyValueError::new_err("Unknown data format")),
        }
    }

    fn construct_formatter(&self, py: pyo3::Python) -> PyResult<Box<dyn Formatter>> {
        match self.format_type.as_ref() {
            "dsv" => {
                let settings = self.construct_dsv_settings(py)?;
                Ok(settings.formatter())
            }
            "jsonlines" => {
                let schema_registry_settings =
                    if let Some(schema_registry_settings) = &self.schema_registry_settings {
                        let subject = self.subject.clone().ok_or_else(|| {
                            PyValueError::new_err(
                                "If a data formatter has 'schema_registry_settings' ".to_owned()
                                    + "specified, it must also have 'subject' set",
                            )
                        })?;
                        Some(RegistryEncoderWrapper::new(
                            schema_registry_settings.clone().build_encoder()?,
                            subject,
                        ))
                    } else {
                        None
                    };
                let formatter =
                    JsonLinesFormatter::new(self.value_field_names(py), schema_registry_settings);
                Ok(Box::new(formatter))
            }
            "null" => {
                let formatter = NullFormatter::new();
                Ok(Box::new(formatter))
            }
            "single_column" => {
                let index = self
                    .value_field_index
                    .ok_or_else(|| PyValueError::new_err("Payload column not specified"))?;
                let formatter = SingleColumnFormatter::new(index);
                Ok(Box::new(formatter))
            }
            "identity" => {
                let formatter = IdentityFormatter::new(self.external_diff_column_index);
                Ok(Box::new(formatter))
            }
            "bson" => {
                let formatter = BsonFormatter::new(self.value_field_names(py));
                Ok(Box::new(formatter))
            }
            _ => Err(PyValueError::new_err("Unknown data format")),
        }
    }
}

#[pyclass(module = "pathway.engine", frozen)]
#[derive(Clone)]
pub struct ColumnProperties(Arc<EngineColumnProperties>);

#[pymethods]
impl ColumnProperties {
    #[new]
    #[pyo3(signature = (
        dtype,
        trace = None,
        append_only = false
    ))]
    fn new(
        py: Python,
        dtype: Py<PyAny>,
        trace: Option<Py<Trace>>,
        append_only: bool,
    ) -> PyResult<Py<Self>> {
        let trace = trace.map_or(Ok(EngineTrace::Empty), |t| t.extract(py))?;
        let inner = Arc::new(EngineColumnProperties {
            append_only,
            dtype: dtype.extract(py)?,
            trace: Arc::new(trace),
        });
        let res = Py::new(py, Self(inner))?;
        Ok(res)
    }
}

#[pyclass(module = "pathway.engine", frozen, subclass)]
#[derive(Clone)]
pub struct TableProperties(Arc<EngineTableProperties>);

impl TableProperties {
    fn new(py: Python, inner: Arc<EngineTableProperties>) -> PyResult<Py<Self>> {
        let res = Py::new(py, Self(inner))?;
        Ok(res)
    }
}

#[pymethods]
impl TableProperties {
    #[staticmethod]
    fn column(py: Python, column_properties: ColumnProperties) -> PyResult<Py<Self>> {
        let inner = Arc::new(EngineTableProperties::Column(column_properties.0));
        TableProperties::new(py, inner)
    }

    #[staticmethod]
    #[pyo3(signature = (column_properties, trace=None))]
    fn from_column_properties(
        py: Python,
        #[pyo3(from_py_with = from_py_iterable)] column_properties: Vec<(
            ColumnPath,
            ColumnProperties,
        )>,
        trace: Option<Py<Trace>>,
    ) -> PyResult<Py<Self>> {
        let column_properties: Vec<_> = column_properties
            .into_iter()
            .map(|(path, props)| (path, props.0))
            .collect();
        let trace = trace.map_or(Ok(EngineTrace::Empty), |t| t.extract(py))?;
        let table_properties = EngineTableProperties::from_paths(
            column_properties
                .into_iter()
                .map(|(path, column_properties)| {
                    (path, EngineTableProperties::Column(column_properties))
                })
                .collect(),
            &Arc::new(trace),
        )?;

        TableProperties::new(py, Arc::new(table_properties))
    }
}

#[pyclass(module = "pathway.engine", frozen)]
#[derive(Clone)]
pub struct ConnectorProperties {
    #[pyo3(get)]
    commit_duration_ms: Option<u64>,
    #[allow(unused)]
    #[pyo3(get)]
    unsafe_trusted_ids: bool,
    #[pyo3(get)]
    column_properties: Vec<ColumnProperties>,
    #[pyo3(get)]
    unique_name: Option<UniqueName>,
    #[pyo3(get)]
    synchronization_group: Option<ConnectorGroupDescriptor>,
    #[pyo3(get)]
    max_backlog_size: Option<usize>,
}

#[pymethods]
impl ConnectorProperties {
    #[new]
    #[pyo3(signature = (
        commit_duration_ms = None,
        unsafe_trusted_ids = false,
        column_properties = vec![],
        unique_name = None,
        synchronization_group = None,
        max_backlog_size = None,
    ))]
    fn new(
        commit_duration_ms: Option<u64>,
        unsafe_trusted_ids: bool,
        #[pyo3(from_py_with = from_py_iterable)] column_properties: Vec<ColumnProperties>,
        unique_name: Option<String>,
        synchronization_group: Option<ConnectorGroupDescriptor>,
        max_backlog_size: Option<usize>,
    ) -> Self {
        Self {
            commit_duration_ms,
            unsafe_trusted_ids,
            column_properties,
            unique_name,
            synchronization_group,
            max_backlog_size,
        }
    }
}

// Greater than any allowed worker number
const NO_PARALLEL_READERS_LIMIT: usize = 256;

impl ConnectorProperties {
    fn column_properties(&self) -> Vec<Arc<EngineColumnProperties>> {
        self.column_properties.iter().map(|p| p.0.clone()).collect()
    }

    fn max_parallel_readers(&self, scope: &Scope) -> usize {
        if self.synchronization_group.is_some() {
            // Synchronization groups rely on a mutex-guarded structure that must be queried
            // after each read to ensure that values stay within the defined range.
            // When reads occur from multiple processes, maintaining this synchronization
            // would require inter-process communication to align the data streams
            // across processes on the shared time dimension.
            //
            // To avoid such overhead, the number of parallel readers is limited
            // to the number of workers in the first process. If this setup appears
            // to be too slow, the initial reading stage can be replicated across
            // multiple workers within the first process.
            scope.worker_count() / scope.process_count()
        } else {
            NO_PARALLEL_READERS_LIMIT
        }
    }
}

#[pyclass(module = "pathway.engine", frozen)]
#[derive(Clone)]
pub struct Trace {
    #[pyo3(get)]
    line: String,
    #[pyo3(get)]
    file_name: String,
    #[pyo3(get)]
    line_number: u32,
    #[pyo3(get)]
    function: String,
}

#[pymethods]
impl Trace {
    #[new]
    #[pyo3(signature = (
        line,
        file_name,
        line_number,
        function,
    ))]
    fn new(line: String, file_name: String, line_number: u32, function: String) -> Self {
        Self {
            line,
            file_name,
            line_number,
            function,
        }
    }
}

impl<'py> FromPyObject<'py> for EngineTrace {
    fn extract_bound(obj: &Bound<'py, PyAny>) -> PyResult<Self> {
        let Trace {
            file_name,
            line_number,
            line,
            function,
        } = obj.extract::<Trace>()?;
        Ok(Self::Frame {
            file_name,
            line,
            line_number,
            function,
        })
    }
}

impl<'py> IntoPyObject<'py> for EngineTrace {
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;

    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        match self {
            Self::Empty => Ok(py.None().into_bound(py)),
            Self::Frame {
                line,
                file_name,
                line_number,
                function,
            } => Trace {
                line,
                file_name,
                line_number,
                function,
            }
            .into_bound_py_any(py),
        }
    }
}

// submodule to make sure no other code can create instances of `Done`
mod done {
    use once_cell::sync::Lazy;
    use pyo3::prelude::*;

    struct InnerDone;

    #[pyclass(module = "pathway.engine", frozen)]
    pub struct Done(InnerDone);

    pub static DONE: Lazy<Py<Done>> = Lazy::new(|| {
        Python::with_gil(|py| Py::new(py, Done(InnerDone)).expect("creating DONE should not fail"))
    });
}
use done::{Done, DONE};

#[pymethods]
impl Done {
    pub fn __hash__(self_: &Bound<Self>) -> usize {
        // mimic the default Python hash
        (self_.as_ptr() as usize).rotate_right(4)
    }

    pub fn __richcmp__(self_: &Bound<Self>, other: &Bound<PyAny>, op: CompareOp) -> Py<PyAny> {
        let py = other.py();
        if other.is_instance_of::<Self>() {
            assert!(self_.is(other));
            op.matches(Ordering::Equal).into_py_any(py).unwrap()
        } else if other.is_instance_of::<PyInt>() {
            op.matches(Ordering::Greater).into_py_any(py).unwrap()
        } else {
            py.NotImplemented()
        }
    }
}

impl<'py, T> FromPyObject<'py> for TotalFrontier<T>
where
    T: FromPyObject<'py>,
{
    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
        if ob.is_instance_of::<Done>() {
            Ok(TotalFrontier::Done)
        } else {
            Ok(TotalFrontier::At(ob.extract()?))
        }
    }
}

impl<'py, T> IntoPyObject<'py> for TotalFrontier<T>
where
    T: IntoPyObject<'py>,
{
    type Target = PyAny;
    type Output = Bound<'py, Self::Target>;
    type Error = PyErr;
    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
        match self {
            Self::At(i) => i.into_bound_py_any(py),
            Self::Done => DONE.clone_ref(py).into_bound_py_any(py),
        }
    }
}

#[pyclass(module = "pathway.engine", frozen, name = "ExportedTable")]
pub struct PyExportedTable {
    inner: Arc<dyn ExportedTable>,
}

impl PyExportedTable {
    fn new(inner: Arc<dyn ExportedTable>) -> Self {
        Self { inner }
    }
}

#[pymethods]
impl PyExportedTable {
    fn frontier(&self) -> TotalFrontier<Timestamp> {
        self.inner.frontier()
    }

    fn snapshot_at(&self, frontier: TotalFrontier<Timestamp>) -> Vec<(Key, Vec<Value>)> {
        self.inner.snapshot_at(frontier)
    }

    fn failed(&self) -> bool {
        self.inner.failed()
    }
}

#[allow(clippy::struct_field_names)]
struct WakeupHandler<'py> {
    _fd: OwnedFd,
    set_wakeup_fd: Bound<'py, PyAny>,
    old_wakeup_fd: Bound<'py, PyAny>,
}

impl<'py> WakeupHandler<'py> {
    fn new(py: Python<'py>, fd: OwnedFd) -> PyResult<Option<Self>> {
        let signal_module = py.import(intern!(py, "signal"))?;
        let set_wakeup_fd = signal_module.getattr(intern!(py, "set_wakeup_fd"))?;
        let old_wakeup_fd = set_wakeup_fd.call1((fd.as_raw_fd(),));
        if let Err(ref error) = old_wakeup_fd {
            if error.is_instance_of::<PyValueError>(py) {
                // We are not the main thread. This means we can ignore signal handling.
                return Ok(None);
            }
        }
        let old_wakeup_fd = old_wakeup_fd?;
        let res = Some(Self {
            _fd: fd,
            set_wakeup_fd,
            old_wakeup_fd,
        });
        py.check_signals()?;
        Ok(res)
    }
}

impl Drop for WakeupHandler<'_> {
    fn drop(&mut self) {
        self.set_wakeup_fd
            .call1((&self.old_wakeup_fd,))
            .expect("restoring the wakeup fd should not fail");
    }
}

fn run_with_wakeup_receiver<R>(
    py: Python,
    logic: impl FnOnce(Option<WakeupReceiver>) -> R,
) -> PyResult<R> {
    let wakeup_pipe = pipe(ReaderType::Blocking, WriterType::NonBlocking)?;
    let wakeup_handler = WakeupHandler::new(py, wakeup_pipe.writer)?;
    let mut wakeup_reader = File::from(wakeup_pipe.reader);
    let (wakeup_sender, wakeup_receiver): (_, WakeupReceiver) = crossbeam_channel::unbounded();
    let wakeup_thread = thread::Builder::new()
        .name("pathway:signal_wakeup".to_string())
        .spawn(move || loop {
            let amount = wakeup_reader
                .read(&mut [0; 1024])
                .expect("reading from the wakeup pipe should not fail");
            if amount == 0 {
                break;
            }

            #[allow(clippy::redundant_closure_for_method_calls)]
            wakeup_sender
                .send(Box::new(|| {
                    Python::with_gil(|py| py.check_signals()).map_err(DynError::from)
                }))
                .unwrap_or(());
        })?;
    defer! {
        // Drop the handler first to close the writer end of the pipe.
        drop(wakeup_handler);
        wakeup_thread.join().unwrap()
    }
    Ok(logic(Some(wakeup_receiver)))
}

static LOGGING_RESET_HANDLE: Lazy<ResetHandle> = Lazy::new(logging::init);

impl From<LicenseError> for PyErr {
    fn from(error: LicenseError) -> Self {
        let message = error.to_string();
        PyRuntimeError::new_err(message)
    }
}

#[pyfunction]
#[pyo3(signature = (
    *,
    license_key,
    entitlements,
))]
fn check_entitlements(license_key: Option<String>, entitlements: Vec<String>) -> PyResult<()> {
    License::new(license_key)?.check_entitlements(entitlements)?;
    Ok(())
}

#[pymodule]
#[pyo3(name = "engine")]
fn engine(_py: Python<'_>, m: &Bound<PyModule>) -> PyResult<()> {
    // Initialize the logging
    let _ = Lazy::force(&LOGGING_RESET_HANDLE);

    // Enable S3 support in DeltaLake library
    deltalake::aws::register_handlers(None);

    m.add_class::<Pointer>()?;
    m.add_class::<PyObjectWrapper>()?;
    m.add_class::<PyReducer>()?;
    m.add_class::<PyReducerData>()?;
    m.add_class::<PyUnaryOperator>()?;
    m.add_class::<PyBinaryOperator>()?;
    m.add_class::<PyExpression>()?;
    m.add_class::<PyExpressionData>()?;
    m.add_class::<PathwayType>()?;
    m.add_class::<PyConnectorMode>()?;
    m.add_class::<PySessionType>()?;
    m.add_class::<PyPythonConnectorEventType>()?;
    m.add_class::<PyDebeziumDBType>()?;
    m.add_class::<PyKeyGenerationPolicy>()?;
    m.add_class::<PyReadMethod>()?;
    m.add_class::<PyFieldSource>()?;
    m.add_class::<PyMonitoringLevel>()?;
    m.add_class::<PyTableWriterInitMode>()?;
    m.add_class::<Universe>()?;
    m.add_class::<Column>()?;
    m.add_class::<LegacyTable>()?;
    m.add_class::<Table>()?;
    m.add_class::<DataRow>()?;
    m.add_class::<Computer>()?;
    m.add_class::<Scope>()?;
    m.add_class::<Context>()?;

    m.add_class::<AwsS3Settings>()?;
    m.add_class::<AzureBlobStorageSettings>()?;
    m.add_class::<ElasticSearchParams>()?;
    m.add_class::<ElasticSearchAuth>()?;
    m.add_class::<CsvParserSettings>()?;
    m.add_class::<ValueField>()?;
    m.add_class::<DataStorage>()?;
    m.add_class::<DataFormat>()?;
    m.add_class::<PersistenceConfig>()?;
    m.add_class::<PythonSubject>()?;
    m.add_class::<PyPersistenceMode>()?;
    m.add_class::<PySnapshotAccess>()?;
    m.add_class::<PySnapshotEvent>()?;
    m.add_class::<PyConnectorGroupDescriptor>()?;
    m.add_class::<TelemetryConfig>()?;
    m.add_class::<BackfillingThreshold>()?;
    m.add_class::<PyDeltaOptimizerRule>()?;
    m.add_class::<MqttSettings>()?;
    m.add_class::<PySchemaRegistrySettings>()?;
    m.add_class::<IcebergCatalogSettings>()?;

    m.add_class::<ConnectorProperties>()?;
    m.add_class::<ColumnProperties>()?;
    m.add_class::<TableProperties>()?;
    m.add_class::<Trace>()?;
    m.add_class::<Done>()?;
    m.add_class::<PyExportedTable>()?;
    m.add_class::<Error>()?;
    m.add_class::<Pending>()?;

    m.add_class::<PyExternalIndexFactory>()?;
    m.add_class::<PyExternalIndexData>()?;
    m.add_class::<PyExternalIndexQuery>()?;
    m.add_class::<PyUSearchMetricKind>()?;
    m.add_class::<PyBruteForceKnnMetricKind>()?;

    m.add_function(wrap_pyfunction!(run_with_new_graph, m)?)?;
    m.add_function(wrap_pyfunction!(ref_scalar, m)?)?;
    m.add_function(wrap_pyfunction!(ref_scalar_with_instance, m)?)?;
    #[allow(clippy::unsafe_removed_from_name)] // false positive
    m.add_function(wrap_pyfunction!(unsafe_make_pointer, m)?)?;
    m.add_function(wrap_pyfunction!(check_entitlements, m)?)?;
    m.add_function(wrap_pyfunction!(deserialize, m)?)?;
    m.add_function(wrap_pyfunction!(serialize, m)?)?;

    m.add("MissingValueError", &*MISSING_VALUE_ERROR_TYPE)?;
    m.add("EngineError", &*ENGINE_ERROR_TYPE)?;
    m.add("EngineErrorWithTrace", &*ENGINE_ERROR_WITH_TRACE_TYPE)?;
    m.add("OtherWorkerError", &*OTHER_WORKER_ERROR)?;

    m.add("DONE", &*DONE)?;
    m.add("ERROR", &*ERROR)?;
    m.add("PENDING", &*PENDING)?;

    Ok(())
}
