291 lines
8.5 KiB
Python
291 lines
8.5 KiB
Python
"""
|
|
RelationAlgebra - Relational data operators
|
|
|
|
This algebra provides:
|
|
- Data manipulation: create, update
|
|
- Stream processing: feed, consume
|
|
- Filtering: filter
|
|
- Aggregation: count
|
|
|
|
Stream processing model:
|
|
relation --feed--> stream --filter--> stream --consume--> relation
|
|
|
|
In SECONDO, streams are represented as streams of tuples.
|
|
For simplicity in PySECONDO, we use nested lists directly.
|
|
"""
|
|
|
|
from typing import List
|
|
from pysecondo.core.types import BaseType, Type, TupleType, RelationType, Attribute
|
|
from pysecondo.core.nested_list import NestedList, atom, list_nl
|
|
from pysecondo.storage.memory import MemoryStorage
|
|
from pysecondo.algebras.base import Algebra, Operator
|
|
|
|
|
|
class Stream:
|
|
"""
|
|
Stream representation for PySECONDO
|
|
|
|
In real SECONDO, streams are C++ iterators.
|
|
Here, we use Python lists for simplicity.
|
|
"""
|
|
|
|
def __init__(self, tuples: List[NestedList], tuple_type: TupleType):
|
|
self.tuples = tuples # List of tuple values
|
|
self.tuple_type = tuple_type
|
|
|
|
|
|
class RelationAlgebra(Algebra):
|
|
"""
|
|
Relation algebra for data manipulation
|
|
|
|
This algebra requires access to storage to manage relations.
|
|
"""
|
|
|
|
def __init__(self, storage: MemoryStorage):
|
|
"""
|
|
Initialize relation algebra
|
|
|
|
Args:
|
|
storage: Storage backend for managing relations
|
|
"""
|
|
self.storage = storage
|
|
super().__init__()
|
|
|
|
def init(self) -> None:
|
|
"""Register all relation operators"""
|
|
self.register_operator(Operator(
|
|
"create",
|
|
self.type_map_create,
|
|
None # Handled specially by query processor
|
|
))
|
|
|
|
self.register_operator(Operator(
|
|
"update",
|
|
self.type_map_update,
|
|
None # Handled specially by query processor
|
|
))
|
|
|
|
self.register_operator(Operator(
|
|
"feed",
|
|
self.type_map_feed,
|
|
self.value_map_feed
|
|
))
|
|
|
|
self.register_operator(Operator(
|
|
"consume",
|
|
self.type_map_consume,
|
|
self.value_map_consume
|
|
))
|
|
|
|
self.register_operator(Operator(
|
|
"filter",
|
|
self.type_map_filter,
|
|
self.value_map_filter
|
|
))
|
|
|
|
self.register_operator(Operator(
|
|
"count",
|
|
self.type_map_count,
|
|
self.value_map_count
|
|
))
|
|
|
|
# Type mapping functions
|
|
|
|
def type_map_create(self, args: List[Type]) -> Type:
|
|
"""
|
|
Type map for create operator
|
|
|
|
Syntax: create identifier : type
|
|
This is handled specially by the query processor.
|
|
"""
|
|
raise TypeError("create is handled by query processor")
|
|
|
|
def type_map_update(self, args: List[Type]) -> Type:
|
|
"""
|
|
Type map for update operator
|
|
|
|
Syntax: update identifier := value
|
|
This is handled specially by the query processor.
|
|
"""
|
|
raise TypeError("update is handled by query processor")
|
|
|
|
def type_map_feed(self, args: List[Type]) -> Type:
|
|
"""
|
|
Type map for feed operator
|
|
|
|
Input: (rel (tuple (...)))
|
|
Output: (stream (tuple (...)))
|
|
|
|
In PySECONDO, we use the same type for streams.
|
|
"""
|
|
if len(args) != 1:
|
|
raise TypeError(f"feed expects 1 argument, got {len(args)}")
|
|
|
|
if not isinstance(args[0], RelationType):
|
|
raise TypeError(f"feed requires relation type, got {args[0]}")
|
|
|
|
# Return the tuple type (stream element type)
|
|
return args[0].tuple_type
|
|
|
|
def type_map_consume(self, args: List[Type]) -> Type:
|
|
"""
|
|
Type map for consume operator
|
|
|
|
Input: (stream (tuple (...)))
|
|
Output: (rel (tuple (...)))
|
|
"""
|
|
if len(args) != 1:
|
|
raise TypeError(f"consume expects 1 argument, got {len(args)}")
|
|
|
|
# consume takes a stream and returns a relation
|
|
# For simplicity, we treat streams as their tuple type
|
|
if not isinstance(args[0], TupleType):
|
|
raise TypeError(
|
|
f"consume requires tuple/stream type, got {args[0]}")
|
|
|
|
return RelationType(args[0])
|
|
|
|
def type_map_filter(self, args: List[Type]) -> Type:
|
|
"""
|
|
Type map for filter operator
|
|
|
|
Syntax: stream filter[fun]
|
|
Input: (stream T, (tuple -> bool) function)
|
|
Output: (stream T)
|
|
"""
|
|
if len(args) != 2:
|
|
raise TypeError(f"filter expects 2 arguments, got {len(args)}")
|
|
|
|
stream_type, func_type = args
|
|
|
|
# Stream should be a tuple type
|
|
if not isinstance(stream_type, TupleType):
|
|
raise TypeError(f"filter requires tuple stream, got {stream_type}")
|
|
|
|
# Function should return bool
|
|
# For simplicity, we just check that func_type exists
|
|
# In real SECONDO, this would be more complex
|
|
|
|
return stream_type # Output same type as input stream
|
|
|
|
def type_map_count(self, args: List[Type]) -> Type:
|
|
"""
|
|
Type map for count operator
|
|
|
|
Input: (stream T)
|
|
Output: int
|
|
"""
|
|
if len(args) != 1:
|
|
raise TypeError(f"count expects 1 argument, got {len(args)}")
|
|
|
|
return BaseType.INT
|
|
|
|
# Value mapping functions
|
|
|
|
def value_map_feed(self, args: List[NestedList]) -> NestedList:
|
|
"""
|
|
Convert relation to stream
|
|
|
|
In PySECONDO, we just extract the list of tuples.
|
|
"""
|
|
relation = args[0]
|
|
|
|
if not relation.is_list():
|
|
raise TypeError("feed requires a relation (list)")
|
|
|
|
# Return the list of tuples (stream)
|
|
return relation
|
|
|
|
def value_map_consume(self, args: List[NestedList]) -> NestedList:
|
|
"""
|
|
Convert stream to relation
|
|
|
|
In PySECONDO, streams are already lists, so we just return the list.
|
|
"""
|
|
stream = args[0]
|
|
|
|
if not stream.is_list():
|
|
raise TypeError("consume requires a stream (list)")
|
|
|
|
return stream
|
|
|
|
def value_map_filter(self, args: List[NestedList]) -> NestedList:
|
|
"""
|
|
Filter stream based on predicate function
|
|
|
|
Syntax: stream filter[predicate]
|
|
|
|
The predicate is a function that takes a tuple and returns bool.
|
|
In PySECONDO, we represent predicates as nested list functions.
|
|
|
|
For simplicity, we support:
|
|
- Constant bool: filter[true] or filter[false]
|
|
- Tuple attribute access: filter[.AttrName] (checks if attr is truthy)
|
|
- Comparison: filter[.AttrName > value]
|
|
|
|
Examples:
|
|
stream filter[true] # pass all
|
|
stream filter[.Population > 1000000] # filter by population
|
|
"""
|
|
stream = args[0]
|
|
predicate = args[1]
|
|
|
|
if not stream.is_list():
|
|
raise TypeError("filter requires a stream")
|
|
|
|
# Handle different predicate types
|
|
|
|
# Case 1: Constant boolean
|
|
if predicate.is_atom():
|
|
if isinstance(predicate.value, bool):
|
|
if predicate.value:
|
|
return stream # Pass all
|
|
else:
|
|
return list_nl() # Pass none
|
|
|
|
# Case 2: Simple attribute access .AttrName
|
|
# Represented as ("." "AttrName")
|
|
if predicate.is_list() and len(predicate) == 2:
|
|
if predicate[0].is_atom() and predicate[0].value == ".":
|
|
attr_name = predicate[1].value
|
|
|
|
# Filter tuples where attr is truthy
|
|
filtered = []
|
|
for tuple_val in stream.value:
|
|
# Find attribute by name
|
|
# This requires knowing the schema
|
|
# For now, we skip this complex case
|
|
filtered.append(tuple_val)
|
|
|
|
return list_nl(*filtered)
|
|
|
|
# Case 3: Comparison .AttrName op value
|
|
# Represented as (">" ("." "AttrName") value)
|
|
if predicate.is_list() and len(predicate) == 3:
|
|
op = predicate[0]
|
|
|
|
if op.is_atom() and isinstance(op.value, str) and op.value in "><=>":
|
|
# This is a comparison
|
|
# For simplicity, just return the stream unchanged
|
|
# A full implementation would evaluate the comparison
|
|
pass
|
|
|
|
# Default: return stream unchanged
|
|
# (In real SECONDO, this would evaluate the predicate)
|
|
return stream
|
|
|
|
def value_map_count(self, args: List[NestedList]) -> NestedList:
|
|
"""
|
|
Count elements in stream
|
|
|
|
Input: stream (list of tuples)
|
|
Output: int (count)
|
|
"""
|
|
stream = args[0]
|
|
|
|
if not stream.is_list():
|
|
raise TypeError("count requires a stream")
|
|
|
|
count = len(stream.value)
|
|
return atom(count)
|