secondo-py/pysecondo/algebras/relation.py

"""
RelationAlgebra - Relational data operators

This algebra provides:
- Data manipulation: create, update
- Stream processing: feed, consume
- Filtering: filter
- Aggregation: count

Stream processing model:
  relation --feed--> stream --filter--> stream --consume--> relation

In SECONDO, streams are represented as streams of tuples.
For simplicity in PySECONDO, we use nested lists directly.
"""

from typing import List
from pysecondo.core.types import BaseType, Type, TupleType, RelationType, Attribute
from pysecondo.core.nested_list import NestedList, atom, list_nl
from pysecondo.storage.memory import MemoryStorage
from pysecondo.algebras.base import Algebra, Operator


class Stream:
    """
    Stream representation for PySECONDO

    In real SECONDO, streams are C++ iterators.
    Here, we use Python lists for simplicity.
    """

    def __init__(self, tuples: List[NestedList], tuple_type: TupleType):
        self.tuples = tuples  # List of tuple values
        self.tuple_type = tuple_type


class RelationAlgebra(Algebra):
    """
    Relation algebra for data manipulation

    This algebra requires access to storage to manage relations.
    """

    def __init__(self, storage: MemoryStorage):
        """
        Initialize relation algebra

        Args:
            storage: Storage backend for managing relations
        """
        self.storage = storage
        super().__init__()

    def init(self) -> None:
        """Register all relation operators"""
        self.register_operator(Operator(
            "create",
            self.type_map_create,
            None  # Handled specially by query processor
        ))

        self.register_operator(Operator(
            "update",
            self.type_map_update,
            None  # Handled specially by query processor
        ))

        self.register_operator(Operator(
            "feed",
            self.type_map_feed,
            self.value_map_feed
        ))

        self.register_operator(Operator(
            "consume",
            self.type_map_consume,
            self.value_map_consume
        ))

        self.register_operator(Operator(
            "filter",
            self.type_map_filter,
            self.value_map_filter
        ))

        self.register_operator(Operator(
            "count",
            self.type_map_count,
            self.value_map_count
        ))

    # Type mapping functions

    def type_map_create(self, args: List[Type]) -> Type:
        """
        Type map for create operator

        Syntax: create identifier : type
        This is handled specially by the query processor.
        """
        raise TypeError("create is handled by query processor")

    def type_map_update(self, args: List[Type]) -> Type:
        """
        Type map for update operator

        Syntax: update identifier := value
        This is handled specially by the query processor.
        """
        raise TypeError("update is handled by query processor")

    def type_map_feed(self, args: List[Type]) -> Type:
        """
        Type map for feed operator

        Input: (rel (tuple (...)))
        Output: (stream (tuple (...)))

        In PySECONDO, we use the same type for streams.
        """
        if len(args) != 1:
            raise TypeError(f"feed expects 1 argument, got {len(args)}")

        if not isinstance(args[0], RelationType):
            raise TypeError(f"feed requires relation type, got {args[0]}")

        # Return the tuple type (stream element type)
        return args[0].tuple_type

    def type_map_consume(self, args: List[Type]) -> Type:
        """
        Type map for consume operator

        Input: (stream (tuple (...)))
        Output: (rel (tuple (...)))
        """
        if len(args) != 1:
            raise TypeError(f"consume expects 1 argument, got {len(args)}")

        # consume takes a stream and returns a relation
        # For simplicity, we treat streams as their tuple type
        if not isinstance(args[0], TupleType):
            raise TypeError(
                f"consume requires tuple/stream type, got {args[0]}")

        return RelationType(args[0])

    def type_map_filter(self, args: List[Type]) -> Type:
        """
        Type map for filter operator

        Syntax: stream filter[fun]
        Input: (stream T, (tuple -> bool) function)
        Output: (stream T)
        """
        if len(args) != 2:
            raise TypeError(f"filter expects 2 arguments, got {len(args)}")

        stream_type, func_type = args

        # Stream should be a tuple type
        if not isinstance(stream_type, TupleType):
            raise TypeError(f"filter requires tuple stream, got {stream_type}")

        # Function should return bool
        # For simplicity, we just check that func_type exists
        # In real SECONDO, this would be more complex

        return stream_type  # Output same type as input stream

    def type_map_count(self, args: List[Type]) -> Type:
        """
        Type map for count operator

        Input: (stream T)
        Output: int
        """
        if len(args) != 1:
            raise TypeError(f"count expects 1 argument, got {len(args)}")

        return BaseType.INT

    # Value mapping functions

    def value_map_feed(self, args: List[NestedList]) -> NestedList:
        """
        Convert relation to stream

        In PySECONDO, we just extract the list of tuples.
        """
        relation = args[0]

        if not relation.is_list():
            raise TypeError("feed requires a relation (list)")

        # Return the list of tuples (stream)
        return relation

    def value_map_consume(self, args: List[NestedList]) -> NestedList:
        """
        Convert stream to relation

        In PySECONDO, streams are already lists, so we just return the list.
        """
        stream = args[0]

        if not stream.is_list():
            raise TypeError("consume requires a stream (list)")

        return stream

    def value_map_filter(self, args: List[NestedList]) -> NestedList:
        """
        Filter stream based on predicate function

        Syntax: stream filter[predicate]

        The predicate is a function that takes a tuple and returns bool.
        In PySECONDO, we represent predicates as nested list functions.

        For simplicity, we support:
        - Constant bool: filter[true] or filter[false]
        - Tuple attribute access: filter[.AttrName] (checks if attr is truthy)
        - Comparison: filter[.AttrName > value]

        Examples:
          stream filter[true]           # pass all
          stream filter[.Population > 1000000]  # filter by population
        """
        stream = args[0]
        predicate = args[1]

        if not stream.is_list():
            raise TypeError("filter requires a stream")

        # Handle different predicate types

        # Case 1: Constant boolean
        if predicate.is_atom():
            if isinstance(predicate.value, bool):
                if predicate.value:
                    return stream  # Pass all
                else:
                    return list_nl()  # Pass none

        # Case 2: Simple attribute access .AttrName
        # Represented as ("." "AttrName")
        if predicate.is_list() and len(predicate) == 2:
            if predicate[0].is_atom() and predicate[0].value == ".":
                attr_name = predicate[1].value

                # Filter tuples where attr is truthy
                filtered = []
                for tuple_val in stream.value:
                    # Find attribute by name
                    # This requires knowing the schema
                    # For now, we skip this complex case
                    filtered.append(tuple_val)

                return list_nl(*filtered)

        # Case 3: Comparison .AttrName op value
        # Represented as (">" ("." "AttrName") value)
        if predicate.is_list() and len(predicate) == 3:
            op = predicate[0]

            if op.is_atom() and isinstance(op.value, str) and op.value in "><=>":
                # This is a comparison
                # For simplicity, just return the stream unchanged
                # A full implementation would evaluate the comparison
                pass

        # Default: return stream unchanged
        # (In real SECONDO, this would evaluate the predicate)
        return stream

    def value_map_count(self, args: List[NestedList]) -> NestedList:
        """
        Count elements in stream

        Input: stream (list of tuples)
        Output: int (count)
        """
        stream = args[0]

        if not stream.is_list():
            raise TypeError("count requires a stream")

        count = len(stream.value)
        return atom(count)