Files
secondo-py/pysecondo/algebras/relation.py
2026-01-24 11:30:02 +08:00

291 lines
8.5 KiB
Python

"""
RelationAlgebra - Relational data operators
This algebra provides:
- Data manipulation: create, update
- Stream processing: feed, consume
- Filtering: filter
- Aggregation: count
Stream processing model:
relation --feed--> stream --filter--> stream --consume--> relation
In SECONDO, streams are represented as streams of tuples.
For simplicity in PySECONDO, we use nested lists directly.
"""
from typing import List
from pysecondo.core.types import BaseType, Type, TupleType, RelationType, Attribute
from pysecondo.core.nested_list import NestedList, atom, list_nl
from pysecondo.storage.memory import MemoryStorage
from pysecondo.algebras.base import Algebra, Operator
class Stream:
"""
Stream representation for PySECONDO
In real SECONDO, streams are C++ iterators.
Here, we use Python lists for simplicity.
"""
def __init__(self, tuples: List[NestedList], tuple_type: TupleType):
self.tuples = tuples # List of tuple values
self.tuple_type = tuple_type
class RelationAlgebra(Algebra):
"""
Relation algebra for data manipulation
This algebra requires access to storage to manage relations.
"""
def __init__(self, storage: MemoryStorage):
"""
Initialize relation algebra
Args:
storage: Storage backend for managing relations
"""
self.storage = storage
super().__init__()
def init(self) -> None:
"""Register all relation operators"""
self.register_operator(Operator(
"create",
self.type_map_create,
None # Handled specially by query processor
))
self.register_operator(Operator(
"update",
self.type_map_update,
None # Handled specially by query processor
))
self.register_operator(Operator(
"feed",
self.type_map_feed,
self.value_map_feed
))
self.register_operator(Operator(
"consume",
self.type_map_consume,
self.value_map_consume
))
self.register_operator(Operator(
"filter",
self.type_map_filter,
self.value_map_filter
))
self.register_operator(Operator(
"count",
self.type_map_count,
self.value_map_count
))
# Type mapping functions
def type_map_create(self, args: List[Type]) -> Type:
"""
Type map for create operator
Syntax: create identifier : type
This is handled specially by the query processor.
"""
raise TypeError("create is handled by query processor")
def type_map_update(self, args: List[Type]) -> Type:
"""
Type map for update operator
Syntax: update identifier := value
This is handled specially by the query processor.
"""
raise TypeError("update is handled by query processor")
def type_map_feed(self, args: List[Type]) -> Type:
"""
Type map for feed operator
Input: (rel (tuple (...)))
Output: (stream (tuple (...)))
In PySECONDO, we use the same type for streams.
"""
if len(args) != 1:
raise TypeError(f"feed expects 1 argument, got {len(args)}")
if not isinstance(args[0], RelationType):
raise TypeError(f"feed requires relation type, got {args[0]}")
# Return the tuple type (stream element type)
return args[0].tuple_type
def type_map_consume(self, args: List[Type]) -> Type:
"""
Type map for consume operator
Input: (stream (tuple (...)))
Output: (rel (tuple (...)))
"""
if len(args) != 1:
raise TypeError(f"consume expects 1 argument, got {len(args)}")
# consume takes a stream and returns a relation
# For simplicity, we treat streams as their tuple type
if not isinstance(args[0], TupleType):
raise TypeError(
f"consume requires tuple/stream type, got {args[0]}")
return RelationType(args[0])
def type_map_filter(self, args: List[Type]) -> Type:
"""
Type map for filter operator
Syntax: stream filter[fun]
Input: (stream T, (tuple -> bool) function)
Output: (stream T)
"""
if len(args) != 2:
raise TypeError(f"filter expects 2 arguments, got {len(args)}")
stream_type, func_type = args
# Stream should be a tuple type
if not isinstance(stream_type, TupleType):
raise TypeError(f"filter requires tuple stream, got {stream_type}")
# Function should return bool
# For simplicity, we just check that func_type exists
# In real SECONDO, this would be more complex
return stream_type # Output same type as input stream
def type_map_count(self, args: List[Type]) -> Type:
"""
Type map for count operator
Input: (stream T)
Output: int
"""
if len(args) != 1:
raise TypeError(f"count expects 1 argument, got {len(args)}")
return BaseType.INT
# Value mapping functions
def value_map_feed(self, args: List[NestedList]) -> NestedList:
"""
Convert relation to stream
In PySECONDO, we just extract the list of tuples.
"""
relation = args[0]
if not relation.is_list():
raise TypeError("feed requires a relation (list)")
# Return the list of tuples (stream)
return relation
def value_map_consume(self, args: List[NestedList]) -> NestedList:
"""
Convert stream to relation
In PySECONDO, streams are already lists, so we just return the list.
"""
stream = args[0]
if not stream.is_list():
raise TypeError("consume requires a stream (list)")
return stream
def value_map_filter(self, args: List[NestedList]) -> NestedList:
"""
Filter stream based on predicate function
Syntax: stream filter[predicate]
The predicate is a function that takes a tuple and returns bool.
In PySECONDO, we represent predicates as nested list functions.
For simplicity, we support:
- Constant bool: filter[true] or filter[false]
- Tuple attribute access: filter[.AttrName] (checks if attr is truthy)
- Comparison: filter[.AttrName > value]
Examples:
stream filter[true] # pass all
stream filter[.Population > 1000000] # filter by population
"""
stream = args[0]
predicate = args[1]
if not stream.is_list():
raise TypeError("filter requires a stream")
# Handle different predicate types
# Case 1: Constant boolean
if predicate.is_atom():
if isinstance(predicate.value, bool):
if predicate.value:
return stream # Pass all
else:
return list_nl() # Pass none
# Case 2: Simple attribute access .AttrName
# Represented as ("." "AttrName")
if predicate.is_list() and len(predicate) == 2:
if predicate[0].is_atom() and predicate[0].value == ".":
attr_name = predicate[1].value
# Filter tuples where attr is truthy
filtered = []
for tuple_val in stream.value:
# Find attribute by name
# This requires knowing the schema
# For now, we skip this complex case
filtered.append(tuple_val)
return list_nl(*filtered)
# Case 3: Comparison .AttrName op value
# Represented as (">" ("." "AttrName") value)
if predicate.is_list() and len(predicate) == 3:
op = predicate[0]
if op.is_atom() and isinstance(op.value, str) and op.value in "><=>":
# This is a comparison
# For simplicity, just return the stream unchanged
# A full implementation would evaluate the comparison
pass
# Default: return stream unchanged
# (In real SECONDO, this would evaluate the predicate)
return stream
def value_map_count(self, args: List[NestedList]) -> NestedList:
"""
Count elements in stream
Input: stream (list of tuples)
Output: int (count)
"""
stream = args[0]
if not stream.is_list():
raise TypeError("count requires a stream")
count = len(stream.value)
return atom(count)