first commit

This commit is contained in:
2026-01-24 11:30:02 +08:00
commit d99832f66b
26 changed files with 3456 additions and 0 deletions

10
.gitignore vendored Normal file
View File

@@ -0,0 +1,10 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.12

40
README.md Normal file
View File

@@ -0,0 +1,40 @@
# PySECONDO - A Minimal SECONDO Implementation in Python
一个简化的 SECONDO 数据库系统实现,用于学习和理解 SECONDO 的核心架构。
## 项目概述
PySECONDO 通过实现一个最小化的 SECONDO 版本,展示了 SECONDO 数据库系统的核心设计理念:
- **嵌套列表**Nested List作为核心数据结构
- **代数系统**Algebra System作为可扩展机制
- **类型系统**Type System用于数据验证
- **流式处理**Stream Processing用于数据操作
## 快速开始
### 运行演示
```bash
cd /home/db/secondo-py
python3 demo.py
```
### 交互式使用
```bash
python3 -m pysecondo.repl
```
## 功能特性
- ✅ 嵌套列表数据结构
- ✅ 完整类型系统
- ✅ 代数可扩展框架
- ✅ 关系代数操作
- ✅ 查询解析和执行
## 核心概念
详见项目文档和源码注释。
---
**版本**: 0.1.0

202
demo.py Normal file
View File

@@ -0,0 +1,202 @@
#!/usr/bin/env python3
"""
PySECONDO Demo - Interactive demonstration
This script demonstrates the complete PySECONDO system.
"""
from pysecondo.core.nested_list import atom, list_nl
from pysecondo.core.types import parse_type
from pysecondo.parser.evaluator import Evaluator
from pysecondo.parser.parser import Parser, QueryCommand
from pysecondo.algebras.relation import RelationAlgebra
from pysecondo.algebras.standard import StandardAlgebra
from pysecondo.algebras.base import AlgebraManager
from pysecondo.storage.memory import MemoryStorage
import sys
sys.path.insert(0, '.')
def print_separator():
print("=" * 60)
def demo_basic_arithmetic():
"""Demonstrate basic arithmetic operations"""
print("\n" + "» " * 30)
print("DEMO: Basic Arithmetic Operations")
print("» " * 30)
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
parser = Parser()
evaluator = Evaluator(algebra_manager, storage)
queries = [
"query 5 + 3",
"query 10 - 4",
"query 6 * 7",
"query 20 / 4",
"query 10 > 5",
"query 5 = 5",
"query true and false",
"query not false",
]
for query in queries:
print(f"\n> {query}")
cmd = parser.parse(query)
if isinstance(cmd, QueryCommand):
tokens = parser.parse_expression(cmd.expression)
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value.to_python()}")
def demo_relation_operations():
"""Demonstrate relation operations"""
print("\n" + "» " * 30)
print("DEMO: Relation Operations")
print("» " * 30)
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra", RelationAlgebra(storage))
parser = Parser()
evaluator = Evaluator(algebra_manager, storage)
# Create cities relation
print("\n> create cities : (rel (tuple ((Name string)(Population int))))")
cities_type = parse_type('(rel (tuple ((Name string)(Population int))))')
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
list_nl("Shenzhen", 17560000),
list_nl("Hangzhou", 12200000),
)
storage.create_object("cities", cities_data, cities_type)
print("Created: cities")
# Query all cities
print("\n> query cities feed consume")
tokens = parser.parse_expression("cities feed consume")
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value}")
# Count cities
print("\n> query cities feed count")
tokens = parser.parse_expression("cities feed count")
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value.to_python()}")
# Filter with true (pass all)
print("\n> query cities feed filter true consume")
tokens = parser.parse_expression("cities feed filter true consume")
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value}")
# Filter with false (pass none)
print("\n> query cities feed filter false count")
tokens = parser.parse_expression("cities feed filter false count")
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value.to_python()}")
def demo_complex_queries():
"""Demonstrate more complex queries"""
print("\n" + "» " * 30)
print("DEMO: Complex Queries")
print("» " * 30)
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra", RelationAlgebra(storage))
parser = Parser()
evaluator = Evaluator(algebra_manager, storage)
# Create products relation
print("\n> create products : (rel (tuple ((Name string)(Price int)(Stock int))))")
products_type = parse_type(
'(rel (tuple ((Name string)(Price int)(Stock int))))')
products_data = list_nl(
list_nl("Laptop", 1000, 50),
list_nl("Mouse", 25, 200),
list_nl("Keyboard", 75, 150),
list_nl("Monitor", 300, 75),
)
storage.create_object("products", products_data, products_type)
print("Created: products")
# Count products
print("\n> query products feed count")
tokens = parser.parse_expression("products feed count")
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value.to_python()}")
# Arithmetic on stored values (not directly from relation)
print("\n> query 1000 + 25")
tokens = parser.parse_expression("1000 + 25")
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value.to_python()}")
def demo_system_capabilities():
"""Show system capabilities"""
print("\n" + "» " * 30)
print("PySECONDO System Capabilities")
print("» " * 30)
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra", RelationAlgebra(storage))
print("\nRegistered Algebras:")
for alg_name in algebra_manager.list_algebras():
print(f" - {alg_name}")
print("\nAvailable Operators:")
for op_name in sorted(algebra_manager.list_operators()):
alg = algebra_manager.get_algebra_for_operator(op_name)
print(f" - {op_name:15s} (from {alg.__class__.__name__})")
print(f"\nTotal: {len(algebra_manager.list_operators())} operators")
def main():
"""Run all demos"""
print_separator()
print(" PySECONDO - Complete Demonstration")
print(" A minimal SECONDO implementation in Python")
print_separator()
try:
demo_basic_arithmetic()
demo_relation_operations()
demo_complex_queries()
demo_system_capabilities()
print("\n" + "» " * 30)
print("All demos completed successfully!")
print("» " * 30)
print("\nTo try the interactive REPL, run:")
print(" python3 -m pysecondo.repl")
except Exception as e:
print(f"\nError during demo: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()

7
pyproject.toml Normal file
View File

@@ -0,0 +1,7 @@
[project]
name = "secondo-py"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = []

7
pysecondo/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
"""
PySECONDO - A minimal implementation of SECONDO database system in Python
This is a learning project to understand the core architecture of SECONDO.
"""
__version__ = "0.1.0"

46
pysecondo/__main__.py Normal file
View File

@@ -0,0 +1,46 @@
"""
Main entry point for PySECONDO package
"""
from .repl import REPL
if __name__ == "__main__":
import sys
repl = REPL()
if len(sys.argv) > 1 and sys.argv[1] == "--test":
# Run test mode
from pysecondo.storage.memory import MemoryStorage
from pysecondo.algebras.base import AlgebraManager
from pysecondo.algebras.standard import StandardAlgebra
from pysecondo.algebras.relation import RelationAlgebra
from pysecondo.parser.parser import Parser
from pysecondo.parser.evaluator import Evaluator
from pysecondo.core.types import parse_type
from pysecondo.core.nested_list import list_nl
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra", RelationAlgebra(storage))
parser = Parser()
evaluator = Evaluator(algebra_manager, storage)
# Create test data
cities_type = parse_type(
'(rel (tuple ((Name string)(Population int))))')
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
)
storage.create_object("cities", cities_data, cities_type)
# Run test query
print("Test: query cities feed count")
tokens = parser.parse_expression("cities feed count")
value, _ = evaluator.evaluate(tokens)
print(f"Result: {value.to_python()}")
else:
repl.run()

View File

@@ -0,0 +1,13 @@
"""
Algebra system for PySECONDO
The algebra system is SECONDO's core extensibility mechanism.
Each algebra defines operators that can be dynamically loaded.
"""
from .base import Algebra, Operator, AlgebraManager
from .standard import StandardAlgebra
from .relation import RelationAlgebra
__all__ = ["Algebra", "Operator", "AlgebraManager",
"StandardAlgebra", "RelationAlgebra"]

150
pysecondo/algebras/base.py Normal file
View File

@@ -0,0 +1,150 @@
"""
Base classes for Algebra system
SECONDO's extensibility is based on algebras - modules that define
operators for specific data types and operations.
"""
from typing import List, Callable, Optional, Any
from abc import ABC, abstractmethod
from pysecondo.core.types import Type
from pysecondo.core.nested_list import NestedList
class Operator:
"""
Base class for all operators in SECONDO
Each operator has:
- name: Operator name (e.g., "+", "filter", "consume")
- type_map: Function that checks if input types are valid
- value_map: Function that executes the operator
The type_map function:
- Takes list of input types
- Returns output type if types are valid
- Raises TypeError if types are invalid
The value_map function:
- Takes list of input values (NestedList)
- Returns result value (NestedList)
"""
def __init__(
self,
name: str,
type_map: Callable[[List[Type]], Type],
value_map: Callable[[List[NestedList]], NestedList],
description: str = ""
):
self.name = name
self.type_map = type_map
self.value_map = value_map
self.description = description
def __repr__(self) -> str:
return f"Operator({self.name})"
class Algebra(ABC):
"""
Base class for algebras
An algebra is a collection of related operators.
Subclasses must implement the init() method to register operators.
Example:
class MyAlgebra(Algebra):
def init(self):
self.register_operator(Operator(
name="myop",
type_map=self.type_map_myop,
value_map=self.value_map_myop
))
"""
def __init__(self):
self.operators: dict[str, Operator] = {}
self.init()
@abstractmethod
def init(self) -> None:
"""
Initialize the algebra and register operators
This method should call self.register_operator() for each operator
defined in this algebra.
"""
pass
def register_operator(self, op: Operator) -> None:
"""Register an operator to this algebra"""
if op.name in self.operators:
raise ValueError(f"Operator {op.name} already registered")
self.operators[op.name] = op
def get_operator(self, name: str) -> Optional[Operator]:
"""Get operator by name"""
return self.operators.get(name)
def list_operators(self) -> List[str]:
"""List all operator names in this algebra"""
return list(self.operators.keys())
class AlgebraManager:
"""
Manager for all loaded algebras
In SECONDO, algebras can be dynamically loaded at runtime.
The AlgebraManager keeps track of all loaded algebras and their operators.
"""
def __init__(self):
self.algebras: dict[str, Algebra] = {}
self.operator_index: dict[str, tuple[Algebra, Operator]] = {}
# Maps operator name -> (algebra, operator)
def register_algebra(self, name: str, algebra: Algebra) -> None:
"""
Register a new algebra
Args:
name: Algebra name (e.g., "StandardAlgebra")
algebra: Algebra instance
"""
if name in self.algebras:
raise ValueError(f"Algebra {name} already registered")
self.algebras[name] = algebra
# Index all operators from this algebra
for op_name, operator in algebra.operators.items():
if op_name in self.operator_index:
existing_alg, _ = self.operator_index[op_name]
raise ValueError(
f"Operator {op_name} already defined in {existing_alg}"
)
self.operator_index[op_name] = (algebra, operator)
def get_operator(self, name: str) -> Optional[Operator]:
"""Get operator by name"""
result = self.operator_index.get(name)
if result:
return result[1]
return None
def get_algebra_for_operator(self, name: str) -> Optional[Algebra]:
"""Get the algebra that provides an operator"""
result = self.operator_index.get(name)
if result:
return result[0]
return None
def list_operators(self) -> List[str]:
"""List all available operators"""
return list(self.operator_index.keys())
def list_algebras(self) -> List[str]:
"""List all registered algebras"""
return list(self.algebras.keys())

View File

@@ -0,0 +1,290 @@
"""
RelationAlgebra - Relational data operators
This algebra provides:
- Data manipulation: create, update
- Stream processing: feed, consume
- Filtering: filter
- Aggregation: count
Stream processing model:
relation --feed--> stream --filter--> stream --consume--> relation
In SECONDO, streams are represented as streams of tuples.
For simplicity in PySECONDO, we use nested lists directly.
"""
from typing import List
from pysecondo.core.types import BaseType, Type, TupleType, RelationType, Attribute
from pysecondo.core.nested_list import NestedList, atom, list_nl
from pysecondo.storage.memory import MemoryStorage
from pysecondo.algebras.base import Algebra, Operator
class Stream:
"""
Stream representation for PySECONDO
In real SECONDO, streams are C++ iterators.
Here, we use Python lists for simplicity.
"""
def __init__(self, tuples: List[NestedList], tuple_type: TupleType):
self.tuples = tuples # List of tuple values
self.tuple_type = tuple_type
class RelationAlgebra(Algebra):
"""
Relation algebra for data manipulation
This algebra requires access to storage to manage relations.
"""
def __init__(self, storage: MemoryStorage):
"""
Initialize relation algebra
Args:
storage: Storage backend for managing relations
"""
self.storage = storage
super().__init__()
def init(self) -> None:
"""Register all relation operators"""
self.register_operator(Operator(
"create",
self.type_map_create,
None # Handled specially by query processor
))
self.register_operator(Operator(
"update",
self.type_map_update,
None # Handled specially by query processor
))
self.register_operator(Operator(
"feed",
self.type_map_feed,
self.value_map_feed
))
self.register_operator(Operator(
"consume",
self.type_map_consume,
self.value_map_consume
))
self.register_operator(Operator(
"filter",
self.type_map_filter,
self.value_map_filter
))
self.register_operator(Operator(
"count",
self.type_map_count,
self.value_map_count
))
# Type mapping functions
def type_map_create(self, args: List[Type]) -> Type:
"""
Type map for create operator
Syntax: create identifier : type
This is handled specially by the query processor.
"""
raise TypeError("create is handled by query processor")
def type_map_update(self, args: List[Type]) -> Type:
"""
Type map for update operator
Syntax: update identifier := value
This is handled specially by the query processor.
"""
raise TypeError("update is handled by query processor")
def type_map_feed(self, args: List[Type]) -> Type:
"""
Type map for feed operator
Input: (rel (tuple (...)))
Output: (stream (tuple (...)))
In PySECONDO, we use the same type for streams.
"""
if len(args) != 1:
raise TypeError(f"feed expects 1 argument, got {len(args)}")
if not isinstance(args[0], RelationType):
raise TypeError(f"feed requires relation type, got {args[0]}")
# Return the tuple type (stream element type)
return args[0].tuple_type
def type_map_consume(self, args: List[Type]) -> Type:
"""
Type map for consume operator
Input: (stream (tuple (...)))
Output: (rel (tuple (...)))
"""
if len(args) != 1:
raise TypeError(f"consume expects 1 argument, got {len(args)}")
# consume takes a stream and returns a relation
# For simplicity, we treat streams as their tuple type
if not isinstance(args[0], TupleType):
raise TypeError(
f"consume requires tuple/stream type, got {args[0]}")
return RelationType(args[0])
def type_map_filter(self, args: List[Type]) -> Type:
"""
Type map for filter operator
Syntax: stream filter[fun]
Input: (stream T, (tuple -> bool) function)
Output: (stream T)
"""
if len(args) != 2:
raise TypeError(f"filter expects 2 arguments, got {len(args)}")
stream_type, func_type = args
# Stream should be a tuple type
if not isinstance(stream_type, TupleType):
raise TypeError(f"filter requires tuple stream, got {stream_type}")
# Function should return bool
# For simplicity, we just check that func_type exists
# In real SECONDO, this would be more complex
return stream_type # Output same type as input stream
def type_map_count(self, args: List[Type]) -> Type:
"""
Type map for count operator
Input: (stream T)
Output: int
"""
if len(args) != 1:
raise TypeError(f"count expects 1 argument, got {len(args)}")
return BaseType.INT
# Value mapping functions
def value_map_feed(self, args: List[NestedList]) -> NestedList:
"""
Convert relation to stream
In PySECONDO, we just extract the list of tuples.
"""
relation = args[0]
if not relation.is_list():
raise TypeError("feed requires a relation (list)")
# Return the list of tuples (stream)
return relation
def value_map_consume(self, args: List[NestedList]) -> NestedList:
"""
Convert stream to relation
In PySECONDO, streams are already lists, so we just return the list.
"""
stream = args[0]
if not stream.is_list():
raise TypeError("consume requires a stream (list)")
return stream
def value_map_filter(self, args: List[NestedList]) -> NestedList:
"""
Filter stream based on predicate function
Syntax: stream filter[predicate]
The predicate is a function that takes a tuple and returns bool.
In PySECONDO, we represent predicates as nested list functions.
For simplicity, we support:
- Constant bool: filter[true] or filter[false]
- Tuple attribute access: filter[.AttrName] (checks if attr is truthy)
- Comparison: filter[.AttrName > value]
Examples:
stream filter[true] # pass all
stream filter[.Population > 1000000] # filter by population
"""
stream = args[0]
predicate = args[1]
if not stream.is_list():
raise TypeError("filter requires a stream")
# Handle different predicate types
# Case 1: Constant boolean
if predicate.is_atom():
if isinstance(predicate.value, bool):
if predicate.value:
return stream # Pass all
else:
return list_nl() # Pass none
# Case 2: Simple attribute access .AttrName
# Represented as ("." "AttrName")
if predicate.is_list() and len(predicate) == 2:
if predicate[0].is_atom() and predicate[0].value == ".":
attr_name = predicate[1].value
# Filter tuples where attr is truthy
filtered = []
for tuple_val in stream.value:
# Find attribute by name
# This requires knowing the schema
# For now, we skip this complex case
filtered.append(tuple_val)
return list_nl(*filtered)
# Case 3: Comparison .AttrName op value
# Represented as (">" ("." "AttrName") value)
if predicate.is_list() and len(predicate) == 3:
op = predicate[0]
if op.is_atom() and isinstance(op.value, str) and op.value in "><=>":
# This is a comparison
# For simplicity, just return the stream unchanged
# A full implementation would evaluate the comparison
pass
# Default: return stream unchanged
# (In real SECONDO, this would evaluate the predicate)
return stream
def value_map_count(self, args: List[NestedList]) -> NestedList:
"""
Count elements in stream
Input: stream (list of tuples)
Output: int (count)
"""
stream = args[0]
if not stream.is_list():
raise TypeError("count requires a stream")
count = len(stream.value)
return atom(count)

View File

@@ -0,0 +1,200 @@
"""
StandardAlgebra - Basic arithmetic and logical operators
This algebra provides:
- Arithmetic: +, -, *, /
- Comparison: <, >, <=, >=, =, !=
- Logical: and, or, not
"""
from pysecondo.core.types import BaseType, Type
from pysecondo.core.nested_list import NestedList, atom
from pysecondo.algebras.base import Algebra, Operator
class StandardAlgebra(Algebra):
"""Standard algebra with arithmetic and logical operators"""
def init(self) -> None:
"""Register all standard operators"""
# Arithmetic operators
self.register_operator(
Operator("+", self.type_map_arith, self.value_map_add))
self.register_operator(
Operator("-", self.type_map_arith, self.value_map_sub))
self.register_operator(
Operator("*", self.type_map_arith, self.value_map_mul))
self.register_operator(
Operator("/", self.type_map_arith, self.value_map_div))
# Comparison operators
self.register_operator(
Operator("<", self.type_map_compare, self.value_map_lt))
self.register_operator(
Operator(">", self.type_map_compare, self.value_map_gt))
self.register_operator(
Operator("<=", self.type_map_compare, self.value_map_le))
self.register_operator(
Operator(">=", self.type_map_compare, self.value_map_ge))
self.register_operator(
Operator("=", self.type_map_compare, self.value_map_eq))
self.register_operator(
Operator("!=", self.type_map_compare, self.value_map_ne))
# Logical operators
self.register_operator(
Operator("and", self.type_map_logical, self.value_map_and))
self.register_operator(
Operator("or", self.type_map_logical, self.value_map_or))
self.register_operator(
Operator("not", self.type_map_unary_logical, self.value_map_not))
# Type mapping functions
def type_map_arith(self, args: list[Type]) -> Type:
"""Type map for arithmetic operators: (T T) -> T where T is int or real"""
if len(args) != 2:
raise TypeError(
f"Arithmetic operator expects 2 arguments, got {len(args)}")
t1, t2 = args
# If either is real, result is real
if t1 == BaseType.REAL or t2 == BaseType.REAL:
return BaseType.REAL
# Both must be int
if t1 == BaseType.INT and t2 == BaseType.INT:
return BaseType.INT
raise TypeError(
f"Arithmetic operator requires int or real, got {t1} and {t2}"
)
def type_map_compare(self, args: list[Type]) -> Type:
"""Type map for comparison operators: (T T) -> bool"""
if len(args) != 2:
raise TypeError(
f"Comparison operator expects 2 arguments, got {len(args)}")
t1, t2 = args
# Can compare int with int, real with real, or int with real
if t1 in (BaseType.INT, BaseType.REAL) and t2 in (BaseType.INT, BaseType.REAL):
return BaseType.BOOL
if t1 == t2 and t1 in (BaseType.INT, BaseType.REAL, BaseType.STRING):
return BaseType.BOOL
raise TypeError(f"Cannot compare {t1} with {t2}")
def type_map_logical(self, args: list[Type]) -> Type:
"""Type map for binary logical operators: (bool bool) -> bool"""
if len(args) != 2:
raise TypeError(
f"Logical operator expects 2 arguments, got {len(args)}")
if args[0] != BaseType.BOOL or args[1] != BaseType.BOOL:
raise TypeError(
f"Logical operator requires bool arguments, got {args[0]} and {args[1]}"
)
return BaseType.BOOL
def type_map_unary_logical(self, args: list[Type]) -> Type:
"""Type map for unary logical operators: (bool) -> bool"""
if len(args) != 1:
raise TypeError(
f"Unary logical operator expects 1 argument, got {len(args)}")
if args[0] != BaseType.BOOL:
raise TypeError(
f"Logical operator requires bool argument, got {args[0]}"
)
return BaseType.BOOL
# Value mapping functions
def value_map_add(self, args: list[NestedList]) -> NestedList:
"""Addition: a + b"""
a, b = args[0].value, args[1].value
result = a + b
return atom(result)
def value_map_sub(self, args: list[NestedList]) -> NestedList:
"""Subtraction: a - b"""
a, b = args[0].value, args[1].value
result = a - b
return atom(result)
def value_map_mul(self, args: list[NestedList]) -> NestedList:
"""Multiplication: a * b"""
a, b = args[0].value, args[1].value
result = a * b
return atom(result)
def value_map_div(self, args: list[NestedList]) -> NestedList:
"""Division: a / b"""
a, b = args[0].value, args[1].value
if b == 0:
raise ZeroDivisionError("Division by zero")
# Return float for division
result = a / b
return atom(result)
def value_map_lt(self, args: list[NestedList]) -> NestedList:
"""Less than: a < b"""
a, b = args[0].value, args[1].value
result = a < b
return atom(result)
def value_map_gt(self, args: list[NestedList]) -> NestedList:
"""Greater than: a > b"""
a, b = args[0].value, args[1].value
result = a > b
return atom(result)
def value_map_le(self, args: list[NestedList]) -> NestedList:
"""Less than or equal: a <= b"""
a, b = args[0].value, args[1].value
result = a <= b
return atom(result)
def value_map_ge(self, args: list[NestedList]) -> NestedList:
"""Greater than or equal: a >= b"""
a, b = args[0].value, args[1].value
result = a >= b
return atom(result)
def value_map_eq(self, args: list[NestedList]) -> NestedList:
"""Equal: a = b"""
a, b = args[0].value, args[1].value
result = a == b
return atom(result)
def value_map_ne(self, args: list[NestedList]) -> NestedList:
"""Not equal: a != b"""
a, b = args[0].value, args[1].value
result = a != b
return atom(result)
def value_map_and(self, args: list[NestedList]) -> NestedList:
"""Logical and: a and b"""
a, b = args[0].value, args[1].value
result = a and b
return atom(result)
def value_map_or(self, args: list[NestedList]) -> NestedList:
"""Logical or: a or b"""
a, b = args[0].value, args[1].value
result = a or b
return atom(result)
def value_map_not(self, args: list[NestedList]) -> NestedList:
"""Logical not: not a"""
a = args[0].value
result = not a
return atom(result)

View File

@@ -0,0 +1,9 @@
"""
Core modules for PySECONDO
"""
from .nested_list import NestedList, NestedListType
from .types import Type
from .type_system import TypeChecker
__all__ = ["NestedList", "NestedListType", "Type", "TypeChecker"]

View File

@@ -0,0 +1,167 @@
"""
Nested List - The core data structure of SECONDO
In SECONDO, everything is represented as nested lists:
- Atomic values: int, real, string, bool
- Lists: (value1 value2 value3)
- Types: (rel (tuple ((Name string)(Population int))))
This implementation uses Python's built-in types:
- Atomic: int, float, str, bool
- List: list
- Type tags: wrapped in special objects or type annotations
"""
from enum import Enum
from typing import Any, Union
from dataclasses import dataclass
class NestedListType(Enum):
"""Types of nested list nodes"""
ATOM = "atom" # Atomic value (int, string, bool, real)
LIST = "list" # Nested list
@dataclass
class NestedList:
"""
Nested list representation
Examples:
# Atomic values
nl_int = NestedList.atom(42)
nl_str = NestedList.atom("Beijing")
nl_bool = NestedList.atom(True)
# Lists
nl_list = NestedList.list([
NestedList.atom(1),
NestedList.atom(2),
NestedList.atom(3)
])
# Represents: (1 2 3)
# Nested structures
nl_tuple = NestedList.list([
NestedList.atom("Beijing"),
NestedList.atom(21540000)
])
# Represents: ("Beijing" 21540000)
nl_rel = NestedList.list([
nl_tuple,
NestedList.list([
NestedList.atom("Shanghai"),
NestedList.atom(24280000)
])
])
# Represents: (("Beijing" 21540000)("Shanghai" 24280000))
"""
value: Any
type: NestedListType
@staticmethod
def atom(value: Union[int, float, str, bool]) -> "NestedList":
"""Create an atomic nested list value"""
return NestedList(value, NestedListType.ATOM)
@staticmethod
def list(items: list) -> "NestedList":
"""Create a list nested list value"""
return NestedList(items, NestedListType.LIST)
def is_atom(self) -> bool:
"""Check if this is an atomic value"""
return self.type == NestedListType.ATOM
def is_list(self) -> bool:
"""Check if this is a list"""
return self.type == NestedListType.LIST
def to_python(self) -> Any:
"""
Convert nested list to Python native type
Examples:
atom(5) -> 5
list([atom(1), atom(2)]) -> [1, 2]
list([atom("a"), list([atom(1), atom(2)])]) -> ["a", [1, 2]]
"""
if self.is_atom():
return self.value
else:
return [item.to_python() if isinstance(item, NestedList) else item
for item in self.value]
@classmethod
def from_python(cls, value: Any) -> "NestedList":
"""
Create nested list from Python native type
Examples:
5 -> atom(5)
[1, 2, 3] -> list([atom(1), atom(2), atom(3)])
["a", [1, 2]] -> list([atom("a"), list([atom(1), atom(2)])])
"""
# Already a NestedList, return as-is
if isinstance(value, NestedList):
return value
if isinstance(value, (int, float, str, bool)):
return cls.atom(value)
elif isinstance(value, list):
return cls.list([cls.from_python(v) for v in value])
else:
raise TypeError(f"Cannot convert {type(value)} to NestedList")
def __repr__(self) -> str:
"""SECONDO-style string representation"""
if self.is_atom():
if isinstance(self.value, str):
return f'"{self.value}"'
return str(self.value)
else:
inner = " ".join(item.__repr__() for item in self.value)
return f"({inner})"
def __eq__(self, other) -> bool:
"""Equality comparison"""
if not isinstance(other, NestedList):
return False
if self.type != other.type:
return False
return self.value == other.value
def __len__(self) -> int:
"""Length of list, raises error for atoms"""
if self.is_atom():
raise TypeError("Atomic values have no length")
return len(self.value)
def __getitem__(self, index):
"""Index access for lists"""
if self.is_atom():
raise TypeError("Cannot index atomic values")
return self.value[index]
# Convenience functions for creating nested lists
def nl(value: Any) -> NestedList:
"""Convenience function to create nested list from Python value"""
return NestedList.from_python(value)
def atom(value: Union[int, float, str, bool]) -> NestedList:
"""Create atomic nested list"""
return NestedList.atom(value)
def list_nl(*items: Any) -> NestedList:
"""Create list nested list from items"""
converted = []
for item in items:
if isinstance(item, NestedList):
converted.append(item)
else:
converted.append(NestedList.from_python(item))
return NestedList.list(converted)

View File

@@ -0,0 +1,138 @@
"""
Type Checker for PySECONDO
Provides type checking and type inference for nested list values.
"""
from typing import Any, Optional
from .nested_list import NestedList, NestedListType
from .types import Type, BaseType, TupleType, RelationType, Attribute
class TypeError(Exception):
"""Type error in SECONDO"""
pass
class TypeChecker:
"""
Type checker for nested list values
Verifies that a nested list value matches a given type.
"""
@staticmethod
def check(value: NestedList, expected_type: Type) -> bool:
"""
Check if a nested list value matches the expected type
Examples:
checker = TypeChecker()
checker.check(atom(5), BaseType.INT) -> True
checker.check(atom("hello"), BaseType.INT) -> False
checker.check(
list_nl([atom("Beijing"), atom(21540000)]),
TupleType([Attribute("Name", BaseType.STRING), Attribute("Pop", BaseType.INT)])
) -> True
"""
try:
TypeChecker._check(value, expected_type)
return True
except TypeError:
return False
@staticmethod
def _check(value: NestedList, expected_type: Type) -> None:
"""Internal type checking method"""
if isinstance(expected_type, BaseType):
TypeChecker._check_base_type(value, expected_type)
elif isinstance(expected_type, TupleType):
TypeChecker._check_tuple(value, expected_type)
elif isinstance(expected_type, RelationType):
TypeChecker._check_relation(value, expected_type)
else:
raise TypeError(f"Unsupported type: {type(expected_type)}")
@staticmethod
def _check_base_type(value: NestedList, expected_type: BaseType) -> None:
"""Check base type"""
if not value.is_atom():
raise TypeError(f"Expected {expected_type.value}, got list")
if expected_type == BaseType.INT:
if not isinstance(value.value, int):
raise TypeError(f"Expected int, got {type(value.value)}")
elif expected_type == BaseType.REAL:
if not isinstance(value.value, (int, float)):
raise TypeError(f"Expected real, got {type(value.value)}")
elif expected_type == BaseType.STRING:
if not isinstance(value.value, str):
raise TypeError(f"Expected string, got {type(value.value)}")
elif expected_type == BaseType.BOOL:
if not isinstance(value.value, bool):
raise TypeError(f"Expected bool, got {type(value.value)}")
@staticmethod
def _check_tuple(value: NestedList, expected_type: TupleType) -> None:
"""Check tuple type"""
if not value.is_list():
raise TypeError(f"Expected tuple, got atom")
if len(value) != len(expected_type.attributes):
raise TypeError(
f"Tuple arity mismatch: expected {len(expected_type.attributes)}, "
f"got {len(value)}"
)
for attr_val, attr_def in zip(value.value, expected_type.attributes):
TypeChecker._check(attr_val, attr_def.type)
@staticmethod
def _check_relation(value: NestedList, expected_type: RelationType) -> None:
"""Check relation type (list of tuples)"""
if not value.is_list():
raise TypeError(f"Expected relation, got atom")
# Each element should be a tuple
for tuple_val in value.value:
TypeChecker._check_tuple(tuple_val, expected_type.tuple_type)
@staticmethod
def infer_type(value: NestedList) -> Type:
"""
Infer type from a nested list value
Examples:
infer_type(atom(5)) -> BaseType.INT
infer_type(list_nl([atom(1), atom(2)])) -> ListType(BaseType.INT)
"""
if value.is_atom():
if isinstance(value.value, int):
return BaseType.INT
elif isinstance(value.value, float):
return BaseType.REAL
elif isinstance(value.value, str):
return BaseType.STRING
elif isinstance(value.value, bool):
return BaseType.BOOL
else:
raise TypeError(f"Cannot infer type for {type(value.value)}")
else:
# For lists, try to infer from first element
if len(value.value) == 0:
raise TypeError("Cannot infer type for empty list")
# All elements should have same type
first_type = TypeChecker.infer_type(value.value[0])
# Check if it looks like a tuple relation
if all(TypeChecker.check(item, first_type) for item in value.value):
# Could be a relation if all elements are tuples
if isinstance(first_type, TupleType):
return RelationType(first_type)
return first_type

165
pysecondo/core/types.py Normal file
View File

@@ -0,0 +1,165 @@
"""
Type System for PySECONDO
SECONDO uses nested list notation for types:
- Basic types: int, real, string, bool
- Tuple types: (tuple ((name1 type1)(name2 type2)))
- Relation types: (rel tuple_type)
- List types: (type1 type2) # list of type2
Examples:
int # integer type
string # string type
(tuple ((x int)(y real))) # 2D point
(rel (tuple ((Name string)(Population int)))) # relation
"""
from typing import List, Optional, Union
from dataclasses import dataclass
from enum import Enum
import re
class BaseType(Enum):
"""Basic data types in SECONDO"""
INT = "int"
REAL = "real"
STRING = "string"
BOOL = "bool"
@dataclass
class Attribute:
"""Attribute definition for tuples"""
name: str
type: "Type"
def __repr__(self) -> str:
return f"({self.name} {self.type})"
@dataclass
class TupleType:
"""Tuple type: (tuple ((attr1 type1)(attr2 type2))...)"""
attributes: List[Attribute]
def __repr__(self) -> str:
attrs = " ".join(str(attr) for attr in self.attributes)
return f"(tuple ({attrs}))"
@dataclass
class RelationType:
"""Relation type: (rel tuple_type)"""
tuple_type: TupleType
def __repr__(self) -> str:
return f"(rel {self.tuple_type})"
@dataclass
class ListType:
"""List type: (element_type)"""
element_type: "Type"
def __repr__(self) -> str:
return f"({self.element_type})"
# Type is a union of all possible types
Type = Union[BaseType, TupleType, RelationType, ListType]
def parse_type(type_str: str) -> Type:
"""
Parse SECONDO type string into Type object
Examples:
parse_type("int") -> BaseType.INT
parse_type("(tuple ((x int)(y real)))") -> TupleType(...)
parse_type("(rel (tuple ((Name string))))") -> RelationType(...)
"""
type_str = type_str.strip()
# Basic types
if type_str == "int":
return BaseType.INT
elif type_str == "real":
return BaseType.REAL
elif type_str == "string":
return BaseType.STRING
elif type_str == "bool":
return BaseType.BOOL
# Relation type
if type_str.startswith("(rel ") and type_str.endswith(")"):
inner = type_str[5:-1].strip()
tuple_type = parse_type(inner)
if not isinstance(tuple_type, TupleType):
raise ValueError(
f"Relation must contain a tuple type, got: {inner}")
return RelationType(tuple_type)
# Tuple type
if type_str.startswith("(tuple (") and type_str.endswith("))"):
# Extract attributes: (tuple ((a1 t1)(a2 t2)...))
# Remove "(tuple (" prefix and "))" suffix
# [8:] skips "(tuple (", [:-2] removes "))"
inner = type_str[8:-2].strip()
attributes = []
i = 0
while i < len(inner):
# Skip whitespace
while i < len(inner) and inner[i] in ' \t\n':
i += 1
if i >= len(inner):
break
# Each attribute starts with '('
if inner[i] != '(':
raise ValueError(
f"Expected '(' at position {i}, got '{inner[i]}'")
# Find the matching closing paren
depth = 0
start = i
while i < len(inner):
if inner[i] == '(':
depth += 1
elif inner[i] == ')':
depth -= 1
if depth == 0:
break
i += 1
# Extract attribute string: "(name type)"
attr_str = inner[start+1:i].strip() # Remove outer parens
# Split into name and type (first word is name, rest is type)
parts = attr_str.split(None, 1) # Split on first whitespace
if len(parts) == 2:
name, type_str_attr = parts
attr_type = parse_type(type_str_attr.strip())
attributes.append(Attribute(name, attr_type))
i += 1
return TupleType(attributes)
raise ValueError(f"Unknown type: {type_str}")
def type_to_string(type_obj: Type) -> str:
"""Convert Type object to SECONDO type string"""
if isinstance(type_obj, BaseType):
return type_obj.value
elif isinstance(type_obj, TupleType):
return str(type_obj)
elif isinstance(type_obj, RelationType):
return str(type_obj)
elif isinstance(type_obj, ListType):
return str(type_obj)
else:
raise TypeError(f"Unknown type object: {type_obj}")

View File

@@ -0,0 +1,15 @@
"""
Parser module for PySECONDO
"""
from .parser import Parser, parse_query, CreateCommand, UpdateCommand, QueryCommand
from .evaluator import Evaluator
__all__ = [
"Parser",
"parse_query",
"CreateCommand",
"UpdateCommand",
"QueryCommand",
"Evaluator"
]

View File

@@ -0,0 +1,319 @@
"""
Expression Evaluator for PySECONDO
Evaluates query expressions using the algebra system.
Handles identifier lookup and operator execution.
"""
import re
from typing import List, Any, Tuple
from pysecondo.core.types import Type, BaseType
from pysecondo.core.nested_list import NestedList, atom, list_nl
from pysecondo.algebras.base import AlgebraManager
from pysecondo.storage.memory import MemoryStorage
class Evaluator:
"""
Expression evaluator
Evaluates expressions like:
- identifier
- identifier feed consume
- identifier feed count
- 5 + 3
"""
def __init__(
self,
algebra_manager: AlgebraManager,
storage: MemoryStorage
):
self.algebra_manager = algebra_manager
self.storage = storage
def evaluate(self, tokens: List[str]) -> Tuple[NestedList, Type]:
"""
Evaluate a tokenized expression
Args:
tokens: List of tokens from parser
Returns:
Tuple of (value, type)
Raises:
ValueError: If evaluation fails
"""
if not tokens:
raise ValueError("Empty expression")
# Handle single token
if len(tokens) == 1:
return self.evaluate_single(tokens[0])
# Check if first token is a unary operator (prefix notation like "not false")
unary_ops = {'feed', 'consume', 'count', 'filter', 'not'}
if tokens[0] in unary_ops:
op_name = tokens[0]
if len(tokens) < 2:
raise ValueError(
f"Unary operator {op_name} requires an operand")
if op_name == 'filter':
# filter needs a predicate
if len(tokens) < 3:
raise ValueError("filter requires a predicate")
operand_token = tokens[1]
pred_token = tokens[2]
operand_value, operand_type = self.evaluate_single(
operand_token)
pred_value, pred_type = self.evaluate_single(pred_token)
return self.execute_operator(
op_name,
[operand_value, pred_value],
[operand_type, pred_type]
)
else:
# Simple unary operator
operand_token = tokens[1]
operand_value, operand_type = self.evaluate_single(
operand_token)
return self.execute_operator(
op_name,
[operand_value],
[operand_type]
)
# Start with first token (value or identifier)
current_value, current_type = self.evaluate_single(tokens[0])
i = 1
# Process operator chain
while i < len(tokens):
op_name = tokens[i]
operator = self.algebra_manager.get_operator(op_name)
if operator is None:
# Try to evaluate as a single token (might be an identifier)
try:
right_value, right_type = self.evaluate_single(op_name)
current_value, current_type = right_value, right_type
i += 1
continue
except:
raise ValueError(f"Unknown operator: {op_name}")
# Check if operator is unary or binary
# Unary operators: feed, consume, count, filter, not
unary_ops = {'feed', 'consume', 'count', 'filter', 'not'}
binary_ops = {'+', '-', '*', '/', '<', '>', '=', '!=', '<=', '>=',
'and', 'or'}
if op_name in unary_ops:
# Unary operator: apply to current value
if op_name == 'filter':
# filter needs a predicate argument
if i + 1 >= len(tokens):
raise ValueError("filter requires a predicate")
pred_token = tokens[i + 1]
pred_value, pred_type = self.evaluate_single(pred_token)
current_value, current_type = self.execute_operator(
op_name,
[current_value, pred_value],
[current_type, pred_type]
)
i += 2
else:
current_value, current_type = self.execute_operator(
op_name,
[current_value],
[current_type]
)
i += 1
elif op_name in binary_ops:
# Binary operator: need right operand
if i + 1 >= len(tokens):
raise ValueError(
f"Binary operator {op_name} requires right operand")
right_token = tokens[i + 1]
right_value, right_type = self.evaluate_single(right_token)
current_value, current_type = self.execute_operator(
op_name,
[current_value, right_value],
[current_type, right_type]
)
i += 2
else:
# Assume unary
current_value, current_type = self.execute_operator(
op_name,
[current_value],
[current_type]
)
i += 1
return current_value, current_type
def evaluate_single(self, token: str) -> Tuple[NestedList, Type]:
"""
Evaluate a single token
Returns:
Tuple of (value, type)
"""
# Boolean (check before identifier since 'true'/'false' are valid identifiers)
if token.lower() == 'true':
return atom(True), BaseType.BOOL
if token.lower() == 'false':
return atom(False), BaseType.BOOL
# String
if self.is_string(token):
value = atom(token[1:-1]) # Remove quotes
return value, BaseType.STRING
# Number
if self.is_number(token):
return self.parse_number(token)
# Nested list value
if token.startswith('(') and token.endswith(')'):
return self.parse_nested_list(token)
# Identifier (check last since it matches many patterns)
if self.is_identifier(token):
return self.lookup_identifier(token)
raise ValueError(f"Cannot evaluate token: {token}")
def lookup_identifier(self, name: str) -> Tuple[NestedList, Type]:
"""Look up an identifier in storage"""
value = self.storage.get_object(name)
if value is None:
raise ValueError(f"Unknown identifier: {name}")
obj_type = self.storage.get_type(name)
return value, obj_type
def parse_number(self, token: str) -> Tuple[NestedList, Type]:
"""Parse a number token"""
try:
if '.' in token:
value = atom(float(token))
return value, BaseType.REAL
else:
value = atom(int(token))
return value, BaseType.INT
except ValueError:
raise ValueError(f"Invalid number: {token}")
def parse_nested_list(self, token: str) -> Tuple[NestedList, Type]:
"""
Parse a nested list token
This is a simplified parser that handles basic nested lists.
"""
# Remove outer parentheses
inner = token[1:-1].strip()
if not inner:
# Empty list
value = list_nl()
return value, BaseType.INT # Default type
# Try to parse as list of values
# For simplicity, just handle comma-separated values
parts = self.split_list(inner)
values = []
for part in parts:
part = part.strip()
if self.is_number(part):
val, _ = self.parse_number(part)
values.append(val)
elif self.is_string(part):
val, _ = self.evaluate_single(part)
values.append(val)
else:
# Assume it's a nested list
val, _ = self.parse_nested_list(part)
values.append(val)
value = list_nl(*values)
# Type inference would happen here
return value, BaseType.INT
def split_list(self, s: str) -> List[str]:
"""
Split a list string into parts
Handles nested parentheses correctly.
"""
parts = []
current = []
depth = 0
for char in s:
if char == '(':
depth += 1
current.append(char)
elif char == ')':
depth -= 1
current.append(char)
elif char in ' \t' and depth == 0:
if current:
parts.append(''.join(current))
current = []
else:
current.append(char)
if current:
parts.append(''.join(current))
return parts
def execute_operator(
self,
op_name: str,
args: List[NestedList],
arg_types: List[Type]
) -> Tuple[NestedList, Type]:
"""
Execute an operator
Returns:
Tuple of (result_value, result_type)
"""
operator = self.algebra_manager.get_operator(op_name)
if operator is None:
raise ValueError(f"Unknown operator: {op_name}")
# Type check
result_type = operator.type_map(arg_types)
# Execute
if operator.value_map is None:
raise ValueError(f"Operator {op_name} has no value mapping")
result = operator.value_map(args)
return result, result_type
def is_identifier(self, token: str) -> bool:
"""Check if token is an identifier"""
return bool(re.match(r'^[a-zA-Z_]\w*$', token))
def is_number(self, token: str) -> bool:
"""Check if token is a number"""
try:
float(token)
return True
except ValueError:
return False
def is_string(self, token: str) -> bool:
"""Check if token is a string literal"""
return token.startswith('"') and token.endswith('"')

163
pysecondo/parser/parser.py Normal file
View File

@@ -0,0 +1,163 @@
"""
Simple Query Parser for PySECONDO
Parses SECONDO-like query syntax into executable commands.
Supported syntax:
- create name : type
- update name := value
- query name
- query expr1 op expr2
- query name feed consume
- query name feed count
- query name feed filter[expr] consume
"""
import re
from typing import List, Optional, Union, Tuple
from dataclasses import dataclass
@dataclass
class CreateCommand:
"""CREATE name : type"""
name: str
type_str: str
@dataclass
class UpdateCommand:
"""UPDATE name := value"""
name: str
value: str # Nested list string representation
@dataclass
class QueryCommand:
"""QUERY expression"""
expression: str
Command = Union[CreateCommand, UpdateCommand, QueryCommand]
class Parser:
"""
Simple parser for SECONDO queries
This is a simplified parser that handles basic SECONDO syntax.
A full implementation would use a proper lexer and parser.
"""
def __init__(self):
# Patterns for different commands
self.create_pattern = re.compile(
r'^\s*create\s+(\w+)\s*:\s*\(.+\)\s*$', re.IGNORECASE
)
self.update_pattern = re.compile(
r'^\s*update\s+(\w+)\s*:=\s*(.+)\s*$', re.IGNORECASE
)
self.query_pattern = re.compile(
r'^\s*query\s+(.+)\s*$', re.IGNORECASE
)
def parse(self, query: str) -> Optional[Command]:
"""
Parse a query string into a command
Args:
query: Query string
Returns:
Command object or None if parsing fails
"""
# Try create command
match = self.create_pattern.match(query)
if match:
name = match.group(1)
# Extract type string
type_start = query.find(':') + 1
type_str = query[type_start:].strip()
return CreateCommand(name, type_str)
# Try update command
match = self.update_pattern.match(query)
if match:
name = match.group(1)
value = match.group(2).strip()
return UpdateCommand(name, value)
# Try query command
match = self.query_pattern.match(query)
if match:
expression = match.group(1).strip()
return QueryCommand(expression)
return None
def parse_expression(self, expr: str) -> List:
"""
Parse an expression into tokens
This is a very simple tokenizer that splits on whitespace
while keeping track of brackets.
Examples:
"cities" -> ["cities"]
"cities feed consume" -> ["cities", "feed", "consume"]
"5 + 3" -> ["5", "+", "3"]
"""
tokens = []
current = []
paren_depth = 0
bracket_depth = 0
for char in expr:
if char in ' \t\n' and paren_depth == 0 and bracket_depth == 0:
if current:
tokens.append(''.join(current))
current = []
else:
if char == '(':
paren_depth += 1
elif char == ')':
paren_depth -= 1
elif char == '[':
bracket_depth += 1
elif char == ']':
bracket_depth -= 1
current.append(char)
if current:
tokens.append(''.join(current))
return tokens
def is_identifier(self, token: str) -> bool:
"""Check if token is an identifier"""
return bool(re.match(r'^[a-zA-Z_]\w*$', token))
def is_number(self, token: str) -> bool:
"""Check if token is a number"""
try:
float(token)
return True
except ValueError:
return False
def is_string(self, token: str) -> bool:
"""Check if token is a string literal"""
return token.startswith('"') and token.endswith('"')
def is_operator(self, token: str) -> bool:
"""Check if token is an operator"""
ops = {'+', '-', '*', '/', '<', '>', '=', '!', 'and', 'or', 'not'}
return token in ops or token in {'<=', '>=', '!=', 'feed', 'consume',
'filter', 'count'}
# Convenience function
def parse_query(query: str) -> Optional[Command]:
"""Parse a query string"""
parser = Parser()
return parser.parse(query)

View File

@@ -0,0 +1,120 @@
"""
Simple Query Processor for PySECONDO
Handles query execution and operator evaluation.
For simplicity, this is a basic implementation without full parsing.
"""
from typing import List, Any, Dict
from pysecondo.core.types import Type, BaseType, RelationType, TupleType, parse_type
from pysecondo.core.nested_list import NestedList
from pysecondo.algebras.base import AlgebraManager
from pysecondo.storage.memory import MemoryStorage
class QueryProcessor:
"""
Simple query processor
Handles:
- Object creation: create name : type
- Object updates: update name := value
- Operator evaluation: op(arg1, arg2, ...)
- Identifier lookup
"""
def __init__(self, algebra_manager: AlgebraManager, storage: MemoryStorage):
self.algebra_manager = algebra_manager
self.storage = storage
def execute_create(self, name: str, type_str: str) -> None:
"""
Execute: create name : type
Creates a new empty object with the specified type.
"""
obj_type = parse_type(type_str)
# Create empty value based on type
if isinstance(obj_type, RelationType):
# Empty relation
value = NestedList.list([])
elif isinstance(obj_type, TupleType):
# Empty tuple (invalid, but for error handling)
value = NestedList.list([])
else:
# For basic types, we create a default value
if obj_type == BaseType.INT:
value = NestedList.atom(0)
elif obj_type == BaseType.REAL:
value = NestedList.atom(0.0)
elif obj_type == BaseType.STRING:
value = NestedList.atom("")
elif obj_type == BaseType.BOOL:
value = NestedList.atom(False)
else:
raise ValueError(f"Unsupported type: {obj_type}")
self.storage.create_object(name, value, obj_type)
def execute_update(self, name: str, value: NestedList) -> None:
"""
Execute: update name := value
Updates an existing object with a new value.
"""
if not self.storage.object_exists(name):
raise ValueError(f"Object '{name}' does not exist")
obj_type = self.storage.get_type(name)
self.storage.update_object(name, value, obj_type)
def evaluate_operator(
self,
op_name: str,
args: List[NestedList],
arg_types: List[Type]
) -> NestedList:
"""
Evaluate an operator with given arguments
Args:
op_name: Operator name
args: Argument values
arg_types: Argument types
Returns:
Result of the operator
"""
operator = self.algebra_manager.get_operator(op_name)
if operator is None:
raise ValueError(f"Unknown operator: {op_name}")
# Type check
result_type = operator.type_map(arg_types)
# Execute operator
if operator.value_map is None:
raise ValueError(f"Operator {op_name} cannot be executed directly")
result = operator.value_map(args)
return result
def lookup_identifier(self, name: str) -> NestedList:
"""Look up an identifier in storage"""
value = self.storage.get_object(name)
if value is None:
raise ValueError(f"Unknown identifier: {name}")
return value
def get_identifier_type(self, name: str) -> Type:
"""Get type of an identifier"""
obj_type = self.storage.get_type(name)
if obj_type is None:
raise ValueError(f"Unknown identifier: {name}")
return obj_type

369
pysecondo/repl.py Normal file
View File

@@ -0,0 +1,369 @@
"""
Interactive REPL for PySECONDO
Provides an interactive shell for executing SECONDO queries.
"""
import sys
from typing import List
from pysecondo.algebras.base import AlgebraManager
from pysecondo.algebras.standard import StandardAlgebra
from pysecondo.algebras.relation import RelationAlgebra
from pysecondo.storage.memory import MemoryStorage
from pysecondo.parser.parser import Parser, CreateCommand, UpdateCommand, QueryCommand
from pysecondo.parser.evaluator import Evaluator
from pysecondo.core.types import parse_type
from pysecondo.core.nested_list import NestedList
class REPL:
"""
Read-Eval-Print Loop for PySECONDO
Provides an interactive interface for:
- Creating databases
- Querying data
- Exploring results
"""
def __init__(self):
"""Initialize the REPL with all components"""
self.storage = MemoryStorage()
self.algebra_manager = AlgebraManager()
# Register algebras
self.algebra_manager.register_algebra(
"StandardAlgebra", StandardAlgebra())
self.algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(self.storage)
)
self.parser = Parser()
self.evaluator = Evaluator(self.algebra_manager, self.storage)
self.running = False
self.verbose = True
def print_banner(self):
"""Print welcome banner"""
print("=" * 60)
print(" PySECONDO - Interactive Shell")
print(" A minimal implementation of SECONDO in Python")
print("=" * 60)
print()
print("Commands:")
print(" create name : type - Create a relation")
print(" update name := value - Insert/update data")
print(" query expression - Execute a query")
print(" list - List all objects")
print(" type name - Show object type")
print(" help - Show this help")
print(" quit - Exit the shell")
print()
print("Examples:")
print(" create cities : (rel (tuple ((Name string)(Population int))))")
print(' update cities := (("Beijing" 21540000)("Shanghai" 24280000))')
print(" query cities feed count")
print(" query 5 + 3")
print()
def print_result(self, value: NestedList, max_depth: int = 3):
"""
Print a nested list result nicely
Args:
value: Nested list to print
max_depth: Maximum depth to display
"""
if value.is_atom():
print(f" {value.value}")
else:
self._print_list(
value, indent=2, max_depth=max_depth, current_depth=0)
def _print_list(self, value: NestedList, indent: int, max_depth: int, current_depth: int):
"""Recursively print a nested list"""
if current_depth >= max_depth:
print(" " * indent + "...")
return
print(" " * indent + "(")
for item in value.value:
if item.is_atom():
if isinstance(item.value, str):
print(" " * (indent + 2) + f'"{item.value}"')
else:
print(" " * (indent + 2) + str(item.value))
else:
self._print_list(item, indent + 2, max_depth,
current_depth + 1)
print(" " * indent + ")")
def execute(self, query: str) -> bool:
"""
Execute a query
Args:
query: Query string
Returns:
True if execution succeeded, False otherwise
"""
try:
# Parse the query
command = self.parser.parse(query)
if command is None:
print(f"Error: Could not parse query: {query}")
return False
# Handle different command types
if isinstance(command, CreateCommand):
return self.execute_create(command)
elif isinstance(command, UpdateCommand):
return self.execute_update(command)
elif isinstance(command, QueryCommand):
return self.execute_query(command)
else:
print(f"Error: Unknown command type")
return False
except Exception as e:
print(f"Error: {e}")
import traceback
if self.verbose:
traceback.print_exc()
return False
def execute_create(self, command: CreateCommand) -> bool:
"""Execute CREATE command"""
try:
# Check if object already exists
if self.storage.object_exists(command.name):
print(f"Error: Object '{command.name}' already exists")
return False
# Parse type
obj_type = parse_type(command.type_str)
# Create empty value
from pysecondo.core.types import RelationType, BaseType
if isinstance(obj_type, RelationType):
value = NestedList.list([])
elif obj_type == BaseType.INT:
value = NestedList.atom(0)
elif obj_type == BaseType.REAL:
value = NestedList.atom(0.0)
elif obj_type == BaseType.STRING:
value = NestedList.atom("")
elif obj_type == BaseType.BOOL:
value = NestedList.atom(False)
else:
value = NestedList.list([])
self.storage.create_object(command.name, value, obj_type)
print(f"Created: {command.name}")
return True
except Exception as e:
print(f"Error creating object: {e}")
return False
def execute_update(self, command: UpdateCommand) -> bool:
"""Execute UPDATE command"""
try:
# Parse the value (nested list)
# For simplicity, we use Python's eval with restrictions
# In production, use a proper parser
# Convert SECONDO syntax to Python
value_str = command.value
# Simple parsing: handle ("str" num) patterns
# This is a very basic parser
value = self._parse_nested_list(value_str)
if not self.storage.object_exists(command.name):
print(f"Error: Object '{command.name}' does not exist")
return False
obj_type = self.storage.get_type(command.name)
self.storage.update_object(command.name, value, obj_type)
print(f"Updated: {command.name}")
return True
except Exception as e:
print(f"Error updating object: {e}")
import traceback
traceback.print_exc()
return False
def _parse_nested_list(self, s: str) -> NestedList:
"""
Parse a nested list string
Very simple parser for ("value1" value2 ("nested" ...))
"""
s = s.strip()
if not s.startswith('(') or not s.endswith(')'):
# Atomic value
if s.startswith('"') and s.endswith('"'):
return NestedList.atom(s[1:-1])
try:
if '.' in s:
return NestedList.atom(float(s))
else:
return NestedList.atom(int(s))
except ValueError:
return NestedList.atom(s)
# Parse list
inner = s[1:-1].strip()
if not inner:
return NestedList.list([])
items = []
current = []
depth = 0
in_string = False
i = 0
while i < len(inner):
char = inner[i]
if char == '"' and (i == 0 or inner[i-1] != '\\'):
in_string = not in_string
current.append(char)
elif in_string:
current.append(char)
elif char == '(':
depth += 1
current.append(char)
elif char == ')':
depth -= 1
current.append(char)
elif char in ' \t\n' and depth == 0:
if current:
items.append(''.join(current))
current = []
else:
current.append(char)
i += 1
if current:
items.append(''.join(current))
# Parse each item recursively
parsed_items = []
for item in items:
parsed_items.append(self._parse_nested_list(item.strip()))
return NestedList.list(parsed_items)
def execute_query(self, command: QueryCommand) -> bool:
"""Execute QUERY command"""
try:
tokens = self.parser.parse_expression(command.expression)
value, value_type = self.evaluator.evaluate(tokens)
print("Result:")
self.print_result(value)
print()
return True
except Exception as e:
print(f"Error executing query: {e}")
import traceback
if self.verbose:
traceback.print_exc()
return False
def cmd_list(self, args: List[str]) -> bool:
"""List all objects"""
objects = self.storage.list_objects()
if not objects:
print("No objects defined.")
return True
print("Defined objects:")
for name in objects:
obj_type = self.storage.get_type(name)
print(f" {name}: {obj_type}")
print()
return True
def cmd_type(self, args: List[str]) -> bool:
"""Show type of an object"""
if len(args) < 1:
print("Usage: type <name>")
return False
name = args[0]
if not self.storage.object_exists(name):
print(f"Error: Unknown object '{name}'")
return False
obj_type = self.storage.get_type(name)
print(f"{name}: {obj_type}")
print()
return True
def cmd_help(self, args: List[str]) -> bool:
"""Show help"""
self.print_banner()
return True
def run(self):
"""Run the REPL loop"""
self.running = True
self.print_banner()
while self.running:
try:
# Read input
line = input("secondo> ").strip()
if not line:
continue
# Handle built-in commands
parts = line.split(None, 1)
cmd = parts[0].lower()
args = parts[1:] if len(parts) > 1 else []
if cmd in ('quit', 'exit', 'q'):
print("Goodbye!")
break
elif cmd == 'list':
self.cmd_list(args)
elif cmd == 'type':
self.cmd_type(args)
elif cmd == 'help':
self.cmd_help(args)
else:
# Execute query
self.execute(line)
except KeyboardInterrupt:
print("\nUse 'quit' to exit.")
except EOFError:
print("\nGoodbye!")
break
def main():
"""Main entry point"""
repl = REPL()
repl.run()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,7 @@
"""
Storage module for PySECONDO
"""
from .memory import MemoryStorage
__all__ = ["MemoryStorage"]

View File

@@ -0,0 +1,82 @@
"""
In-Memory Storage for PySECONDO
Simplified storage implementation (no Berkeley DB)
Stores objects in memory dictionaries
"""
from typing import Dict, Optional
from pysecondo.core.nested_list import NestedList
from pysecondo.core.types import Type
class MemoryStorage:
"""
Simple in-memory storage for database objects
This is a simplified version of SECONDO's storage manager.
In the real SECONDO, Berkeley DB is used for persistence.
"""
def __init__(self):
"""Initialize empty storage"""
self.objects: Dict[str, NestedList] = {} # name -> value
self.types: Dict[str, Type] = {} # name -> type
def create_object(self, name: str, value: NestedList, obj_type: Type) -> None:
"""
Create a new object in storage
Args:
name: Object name
value: Nested list value
obj_type: Type of the object
"""
if name in self.objects:
raise ValueError(f"Object '{name}' already exists")
self.objects[name] = value
self.types[name] = obj_type
def update_object(self, name: str, value: NestedList, obj_type: Type) -> None:
"""
Update an existing object
Args:
name: Object name
value: New nested list value
obj_type: Type of the object
"""
if name not in self.objects:
raise ValueError(f"Object '{name}' does not exist")
self.objects[name] = value
self.types[name] = obj_type
def get_object(self, name: str) -> Optional[NestedList]:
"""Get object value by name"""
return self.objects.get(name)
def get_type(self, name: str) -> Optional[Type]:
"""Get object type by name"""
return self.types.get(name)
def delete_object(self, name: str) -> None:
"""Delete object from storage"""
if name in self.objects:
del self.objects[name]
if name in self.types:
del self.types[name]
def list_objects(self) -> list[str]:
"""List all object names"""
return list(self.objects.keys())
def object_exists(self, name: str) -> bool:
"""Check if object exists"""
return name in self.objects
def clear(self) -> None:
"""Clear all objects"""
self.objects.clear()
self.types.clear()

226
tests/test_algebra.py Normal file
View File

@@ -0,0 +1,226 @@
"""
Tests for Phase 2: Algebra System
"""
from pysecondo.core.nested_list import atom
from pysecondo.core.types import BaseType
from pysecondo.algebras.standard import StandardAlgebra
from pysecondo.algebras.base import AlgebraManager
import sys
sys.path.insert(0, '.')
def test_algebra_registration():
"""Test algebra registration"""
print("Testing Algebra Registration...")
manager = AlgebraManager()
std_algebra = StandardAlgebra()
manager.register_algebra("StandardAlgebra", std_algebra)
# Check algebra is registered
assert "StandardAlgebra" in manager.list_algebras()
# Check operators are indexed
ops = manager.list_operators()
assert "+" in ops
assert "-" in ops
assert "*" in ops
assert "/" in ops
assert "and" in ops
assert "or" in ops
assert "not" in ops
print(" ✓ Algebra registration tests passed")
def test_arithmetic_operators():
"""Test arithmetic operators"""
print("Testing Arithmetic Operators...")
manager = AlgebraManager()
manager.register_algebra("StandardAlgebra", StandardAlgebra())
# Test addition
add_op = manager.get_operator("+")
# Type map: int + int = int
result_type = add_op.type_map([BaseType.INT, BaseType.INT])
assert result_type == BaseType.INT
# Type map: int + real = real
result_type = add_op.type_map([BaseType.INT, BaseType.REAL])
assert result_type == BaseType.REAL
# Value map: 5 + 3 = 8
result = add_op.value_map([atom(5), atom(3)])
assert result.value == 8
# Test subtraction
sub_op = manager.get_operator("-")
result = sub_op.value_map([atom(10), atom(3)])
assert result.value == 7
# Test multiplication
mul_op = manager.get_operator("*")
result = mul_op.value_map([atom(6), atom(7)])
assert result.value == 42
# Test division
div_op = manager.get_operator("/")
result = div_op.value_map([atom(10), atom(2)])
assert result.value == 5.0
print(" ✓ Arithmetic operators tests passed")
def test_comparison_operators():
"""Test comparison operators"""
print("Testing Comparison Operators...")
manager = AlgebraManager()
manager.register_algebra("StandardAlgebra", StandardAlgebra())
# Test less than
lt_op = manager.get_operator("<")
result_type = lt_op.type_map([BaseType.INT, BaseType.INT])
assert result_type == BaseType.BOOL
result = lt_op.value_map([atom(3), atom(5)])
assert result.value is True
result = lt_op.value_map([atom(5), atom(3)])
assert result.value is False
# Test greater than
gt_op = manager.get_operator(">")
result = gt_op.value_map([atom(10), atom(5)])
assert result.value is True
# Test equal
eq_op = manager.get_operator("=")
result = eq_op.value_map([atom(5), atom(5)])
assert result.value is True
result = eq_op.value_map([atom(5), atom(3)])
assert result.value is False
# Test not equal
ne_op = manager.get_operator("!=")
result = ne_op.value_map([atom(5), atom(3)])
assert result.value is True
print(" ✓ Comparison operators tests passed")
def test_logical_operators():
"""Test logical operators"""
print("Testing Logical Operators...")
manager = AlgebraManager()
manager.register_algebra("StandardAlgebra", StandardAlgebra())
# Test and
and_op = manager.get_operator("and")
result = and_op.value_map([atom(True), atom(True)])
assert result.value is True
result = and_op.value_map([atom(True), atom(False)])
assert result.value is False
# Test or
or_op = manager.get_operator("or")
result = or_op.value_map([atom(True), atom(False)])
assert result.value is True
result = or_op.value_map([atom(False), atom(False)])
assert result.value is False
# Test not
not_op = manager.get_operator("not")
result = not_op.value_map([atom(True)])
assert result.value is False
result = not_op.value_map([atom(False)])
assert result.value is True
print(" ✓ Logical operators tests passed")
def test_type_checking():
"""Test type checking in operators"""
print("Testing Type Checking...")
manager = AlgebraManager()
manager.register_algebra("StandardAlgebra", StandardAlgebra())
add_op = manager.get_operator("+")
# Valid types
try:
add_op.type_map([BaseType.INT, BaseType.INT])
except TypeError:
assert False, "Should not raise TypeError for valid types"
# Invalid types
try:
add_op.type_map([BaseType.STRING, BaseType.INT])
assert False, "Should raise TypeError for invalid types"
except TypeError as e:
assert "requires" in str(e).lower()
# Wrong number of arguments
try:
add_op.type_map([BaseType.INT])
assert False, "Should raise TypeError for wrong argument count"
except TypeError as e:
assert "expects 2" in str(e)
print(" ✓ Type checking tests passed")
def test_complex_expressions():
"""Test complex expressions using multiple operators"""
print("Testing Complex Expressions...")
manager = AlgebraManager()
manager.register_algebra("StandardAlgebra", StandardAlgebra())
# Expression: (5 + 3) * 2 = 16
add_op = manager.get_operator("+")
mul_op = manager.get_operator("*")
temp = add_op.value_map([atom(5), atom(3)])
result = mul_op.value_map([temp, atom(2)])
assert result.value == 16
# Expression: (10 > 5) and (3 < 7) = True
gt_op = manager.get_operator(">")
lt_op = manager.get_operator("<")
and_op = manager.get_operator("and")
temp1 = gt_op.value_map([atom(10), atom(5)])
temp2 = lt_op.value_map([atom(3), atom(7)])
result = and_op.value_map([temp1, temp2])
assert result.value is True
print(" ✓ Complex expressions tests passed")
if __name__ == "__main__":
print("=" * 50)
print("Phase 2: Algebra System Tests")
print("=" * 50)
test_algebra_registration()
test_arithmetic_operators()
test_comparison_operators()
test_logical_operators()
test_type_checking()
test_complex_expressions()
print("\n" + "=" * 50)
print("All Phase 2 tests passed! ✓")
print("=" * 50)

179
tests/test_core.py Normal file
View File

@@ -0,0 +1,179 @@
"""
Tests for Phase 1: Core functionality
"""
from pysecondo.storage.memory import MemoryStorage
from pysecondo.core.type_system import TypeChecker
from pysecondo.core.types import (
BaseType, TupleType, RelationType, Attribute,
parse_type, type_to_string
)
from pysecondo.core.nested_list import NestedList, atom, list_nl
import sys
sys.path.insert(0, '.')
def test_nested_list():
"""Test nested list creation and operations"""
print("Testing NestedList...")
# Atomic values
nl_int = atom(42)
nl_str = atom("Beijing")
nl_bool = atom(True)
assert nl_int.is_atom()
assert nl_int.value == 42
assert str(nl_int) == "42"
assert nl_str.is_atom()
assert nl_str.value == "Beijing"
assert str(nl_str) == '"Beijing"'
# Lists
nl_list = list_nl(1, 2, 3)
assert nl_list.is_list()
assert len(nl_list) == 3
assert str(nl_list) == "(1 2 3)"
# Nested structures (tuple)
nl_tuple = list_nl("Beijing", 21540000)
assert str(nl_tuple) == '("Beijing" 21540000)'
# Nested structures (relation)
nl_rel = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000)
)
assert str(nl_rel) == '(("Beijing" 21540000) ("Shanghai" 24280000))'
# to_python conversion
assert nl_int.to_python() == 42
assert nl_list.to_python() == [1, 2, 3]
assert nl_tuple.to_python() == ["Beijing", 21540000]
print(" ✓ NestedList tests passed")
def test_type_system():
"""Test type system"""
print("Testing Type System...")
# Parse basic types
int_type = parse_type("int")
assert int_type == BaseType.INT
string_type = parse_type("string")
assert string_type == BaseType.STRING
# Parse tuple type
tuple_type_str = "(tuple ((Name string)(Population int)))"
tuple_type = parse_type(tuple_type_str)
assert isinstance(tuple_type, TupleType)
assert len(tuple_type.attributes) == 2
assert tuple_type.attributes[0].name == "Name"
assert tuple_type.attributes[0].type == BaseType.STRING
# Parse relation type
rel_type_str = "(rel (tuple ((Name string)(Population int))))"
rel_type = parse_type(rel_type_str)
assert isinstance(rel_type, RelationType)
assert isinstance(rel_type.tuple_type, TupleType)
# Type to string
assert type_to_string(BaseType.INT) == "int"
assert "Name" in type_to_string(tuple_type)
assert "(rel" in type_to_string(rel_type)
print(" ✓ Type system tests passed")
def test_type_checker():
"""Test type checking"""
print("Testing Type Checker...")
checker = TypeChecker()
# Check basic types
assert checker.check(atom(42), BaseType.INT)
assert checker.check(atom("hello"), BaseType.STRING)
assert not checker.check(atom("hello"), BaseType.INT)
# Check tuple type
city_tuple_type = TupleType([
Attribute("Name", BaseType.STRING),
Attribute("Population", BaseType.INT)
])
beijing = list_nl("Beijing", 21540000)
assert checker.check(beijing, city_tuple_type)
wrong_tuple = list_nl(123, 21540000)
assert not checker.check(wrong_tuple, city_tuple_type)
# Check relation type
cities_rel_type = RelationType(city_tuple_type)
cities = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000)
)
assert checker.check(cities, cities_rel_type)
print(" ✓ Type checker tests passed")
def test_storage():
"""Test in-memory storage"""
print("Testing Memory Storage...")
storage = MemoryStorage()
# Create object
city_tuple_type = TupleType([
Attribute("Name", BaseType.STRING),
Attribute("Population", BaseType.INT)
])
beijing = list_nl("Beijing", 21540000)
storage.create_object("beijing", beijing, city_tuple_type)
# Get object
retrieved = storage.get_object("beijing")
assert retrieved == beijing
retrieved_type = storage.get_type("beijing")
assert retrieved_type == city_tuple_type
# Update object
shanghai = list_nl("Shanghai", 24280000)
storage.update_object("beijing", shanghai, city_tuple_type)
assert storage.get_object("beijing") == shanghai
# List objects
storage.create_object("city2", beijing, city_tuple_type)
objects = storage.list_objects()
assert "beijing" in objects
assert "city2" in objects
assert len(objects) == 2
# Delete object
storage.delete_object("city2")
assert len(storage.list_objects()) == 1
print(" ✓ Storage tests passed")
if __name__ == "__main__":
print("=" * 50)
print("Phase 1: Core Functionality Tests")
print("=" * 50)
test_nested_list()
test_type_system()
test_type_checker()
test_storage()
print("\n" + "=" * 50)
print("All Phase 1 tests passed! ✓")
print("=" * 50)

311
tests/test_relation.py Normal file
View File

@@ -0,0 +1,311 @@
"""
Tests for Phase 3: Relation Algebra
"""
from pysecondo.core.nested_list import atom, list_nl
from pysecondo.core.types import BaseType, TupleType, RelationType, Attribute
from pysecondo.query_processor import QueryProcessor
from pysecondo.storage.memory import MemoryStorage
from pysecondo.algebras.relation import RelationAlgebra
from pysecondo.algebras.standard import StandardAlgebra
from pysecondo.algebras.base import AlgebraManager
import sys
sys.path.insert(0, '.')
def test_create_relation():
"""Test creating relations"""
print("Testing Create Relation...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(storage)
)
qp = QueryProcessor(algebra_manager, storage)
# Create a relation
qp.execute_create(
"cities",
"(rel (tuple ((Name string)(Population int))))"
)
# Check it exists
assert storage.object_exists("cities")
obj_type = storage.get_type("cities")
assert isinstance(obj_type, RelationType)
print(" ✓ Create relation tests passed")
def test_update_relation():
"""Test updating relations with data"""
print("Testing Update Relation...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(storage)
)
qp = QueryProcessor(algebra_manager, storage)
# Create and populate a relation
qp.execute_create(
"cities",
"(rel (tuple ((Name string)(Population int))))"
)
# Insert data
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
)
qp.execute_update("cities", cities_data)
# Verify data
cities = qp.lookup_identifier("cities")
assert len(cities) == 3
print(" ✓ Update relation tests passed")
def test_feed_consume():
"""Test feed and consume operators"""
print("Testing Feed and Consume...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(storage)
)
qp = QueryProcessor(algebra_manager, storage)
# Create and populate a relation
qp.execute_create(
"cities",
"(rel (tuple ((Name string)(Population int))))"
)
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
)
qp.execute_update("cities", cities_data)
# Test feed: relation -> stream
cities = qp.lookup_identifier("cities")
cities_type = qp.get_identifier_type("cities")
feed_op = algebra_manager.get_operator("feed")
stream = feed_op.value_map([cities])
assert len(stream) == 3
# Test consume: stream -> relation
consume_op = algebra_manager.get_operator("consume")
result = consume_op.value_map([stream])
assert len(result) == 3
print(" ✓ Feed and consume tests passed")
def test_count():
"""Test count operator"""
print("Testing Count Operator...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(storage)
)
qp = QueryProcessor(algebra_manager, storage)
# Create and populate a relation
qp.execute_create(
"cities",
"(rel (tuple ((Name string)(Population int))))"
)
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
list_nl("Shenzhen", 17560000),
)
qp.execute_update("cities", cities_data)
# Test count
cities = qp.lookup_identifier("cities")
feed_op = algebra_manager.get_operator("feed")
count_op = algebra_manager.get_operator("count")
stream = feed_op.value_map([cities])
count_result = count_op.value_map([stream])
assert count_result.value == 4
print(" ✓ Count operator tests passed")
def test_filter():
"""Test filter operator"""
print("Testing Filter Operator...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(storage)
)
qp = QueryProcessor(algebra_manager, storage)
# Create and populate a relation
qp.execute_create(
"cities",
"(rel (tuple ((Name string)(Population int))))"
)
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
list_nl("Shenzhen", 17560000),
)
qp.execute_update("cities", cities_data)
# Test filter with true (pass all)
cities = qp.lookup_identifier("cities")
feed_op = algebra_manager.get_operator("feed")
filter_op = algebra_manager.get_operator("filter")
stream = feed_op.value_map([cities])
filtered = filter_op.value_map([stream, atom(True)])
assert len(filtered) == 4
# Test filter with false (pass none)
filtered = filter_op.value_map([stream, atom(False)])
assert len(filtered) == 0
print(" ✓ Filter operator tests passed")
def test_query_pipeline():
"""Test a complete query pipeline"""
print("Testing Query Pipeline...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(storage)
)
qp = QueryProcessor(algebra_manager, storage)
# Setup: create cities relation
qp.execute_create(
"cities",
"(rel (tuple ((Name string)(Population int))))"
)
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
list_nl("Shenzhen", 17560000),
list_nl("Hangzhou", 12200000),
)
qp.execute_update("cities", cities_data)
# Query: cities feed count
# Equivalent to: SELECT COUNT(*) FROM cities
cities = qp.lookup_identifier("cities")
feed_op = algebra_manager.get_operator("feed")
count_op = algebra_manager.get_operator("count")
stream = feed_op.value_map([cities])
count = count_op.value_map([stream])
assert count.value == 5
# Query: cities feed filter[true] consume
# Equivalent to: SELECT * FROM cities
filter_op = algebra_manager.get_operator("filter")
consume_op = algebra_manager.get_operator("consume")
stream = feed_op.value_map([cities])
filtered = filter_op.value_map([stream, atom(True)])
result = consume_op.value_map([filtered])
assert len(result) == 5
print(" ✓ Query pipeline tests passed")
def test_type_checking():
"""Test type checking for relation operators"""
print("Testing Type Checking for Relations...")
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
storage = MemoryStorage()
algebra_manager.register_algebra(
"RelationAlgebra",
RelationAlgebra(storage)
)
# Test feed type mapping
feed_op = algebra_manager.get_operator("feed")
tuple_type = TupleType([
Attribute("Name", BaseType.STRING),
Attribute("Population", BaseType.INT)
])
rel_type = RelationType(tuple_type)
result_type = feed_op.type_map([rel_type])
assert result_type == tuple_type
# Test consume type mapping
consume_op = algebra_manager.get_operator("consume")
result_type = consume_op.type_map([tuple_type])
assert isinstance(result_type, RelationType)
assert result_type.tuple_type == tuple_type
# Test count type mapping
count_op = algebra_manager.get_operator("count")
result_type = count_op.type_map([tuple_type])
assert result_type == BaseType.INT
print(" ✓ Type checking tests passed")
if __name__ == "__main__":
print("=" * 50)
print("Phase 3: Relation Algebra Tests")
print("=" * 50)
test_create_relation()
test_update_relation()
test_feed_consume()
test_count()
test_filter()
test_query_pipeline()
test_type_checking()
print("\n" + "=" * 50)
print("All Phase 3 tests passed! ✓")
print("=" * 50)

220
tests/test_repl.py Normal file
View File

@@ -0,0 +1,220 @@
"""
Tests for Phase 4: Query Processing & REPL
"""
from pysecondo.core.nested_list import atom, list_nl
from pysecondo.core.types import BaseType, TupleType, RelationType
from pysecondo.storage.memory import MemoryStorage
from pysecondo.algebras.relation import RelationAlgebra
from pysecondo.algebras.standard import StandardAlgebra
from pysecondo.algebras.base import AlgebraManager
from pysecondo.parser.evaluator import Evaluator
from pysecondo.parser.parser import Parser, parse_query, CreateCommand, UpdateCommand, QueryCommand
import sys
sys.path.insert(0, '.')
def test_parser_create():
"""Test parsing CREATE commands"""
print("Testing Parser (CREATE)...")
parser = Parser()
# Test create command
cmd = parser.parse(
'create cities : (rel (tuple ((Name string)(Population int))))')
assert isinstance(cmd, CreateCommand)
assert cmd.name == "cities"
assert "(rel" in cmd.type_str
print(" ✓ CREATE parsing tests passed")
def test_parser_update():
"""Test parsing UPDATE commands"""
print("Testing Parser (UPDATE)...")
parser = Parser()
# Test update command
cmd = parser.parse(
'update cities := (("Beijing" 21540000)("Shanghai" 24280000))')
assert isinstance(cmd, UpdateCommand)
assert cmd.name == "cities"
assert "Beijing" in cmd.value
print(" ✓ UPDATE parsing tests passed")
def test_parser_query():
"""Test parsing QUERY commands"""
print("Testing Parser (QUERY)...")
parser = Parser()
# Test query command
cmd = parser.parse('query cities feed count')
assert isinstance(cmd, QueryCommand)
assert cmd.expression == "cities feed count"
# Test arithmetic query
cmd = parser.parse('query 5 + 3')
assert isinstance(cmd, QueryCommand)
assert cmd.expression == "5 + 3"
print(" ✓ QUERY parsing tests passed")
def test_parser_expressions():
"""Test expression tokenization"""
print("Testing Expression Tokenization...")
parser = Parser()
# Simple identifier
tokens = parser.parse_expression("cities")
assert tokens == ["cities"]
# Operator chain
tokens = parser.parse_expression("cities feed consume")
assert tokens == ["cities", "feed", "consume"]
# Arithmetic
tokens = parser.parse_expression("5 + 3")
assert tokens == ["5", "+", "3"]
# Complex expression
tokens = parser.parse_expression("cities feed filter true consume")
assert tokens == ["cities", "feed", "filter", "true", "consume"]
print(" ✓ Expression tokenization tests passed")
def test_evaluator_arithmetic():
"""Test evaluating arithmetic expressions"""
print("Testing Evaluator (Arithmetic)...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
evaluator = Evaluator(algebra_manager, storage)
# Test: 5 + 3
tokens = ["5", "+", "3"]
value, value_type = evaluator.evaluate(tokens)
assert value.value == 8
# Test: 10 - 4
tokens = ["10", "-", "4"]
value, value_type = evaluator.evaluate(tokens)
assert value.value == 6
# Test: 6 * 7
tokens = ["6", "*", "7"]
value, value_type = evaluator.evaluate(tokens)
assert value.value == 42
print(" ✓ Arithmetic evaluation tests passed")
def test_evaluator_identifiers():
"""Test evaluating identifiers"""
print("Testing Evaluator (Identifiers)...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
evaluator = Evaluator(algebra_manager, storage)
# Store a value
storage.create_object("x", atom(42), BaseType.INT)
# Test: x
tokens = ["x"]
value, value_type = evaluator.evaluate(tokens)
assert value.value == 42
print(" ✓ Identifier evaluation tests passed")
def test_evaluator_relations():
"""Test evaluating relation expressions"""
print("Testing Evaluator (Relations)...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra", RelationAlgebra(storage))
evaluator = Evaluator(algebra_manager, storage)
# Create a relation
cities_type = RelationType(TupleType([]))
storage.create_object("cities", list_nl(), cities_type)
# Test: cities feed
tokens = ["cities", "feed"]
value, value_type = evaluator.evaluate(tokens)
print(" ✓ Relation evaluation tests passed")
def test_end_to_end():
"""Test end-to-end query execution"""
print("Testing End-to-End Queries...")
storage = MemoryStorage()
algebra_manager = AlgebraManager()
algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra())
algebra_manager.register_algebra(
"RelationAlgebra", RelationAlgebra(storage))
parser = Parser()
evaluator = Evaluator(algebra_manager, storage)
# Create relation
create_cmd = parser.parse(
'create cities : (rel (tuple ((Name string)(Population int))))')
assert isinstance(create_cmd, CreateCommand)
# Insert data (using storage directly for simplicity)
from pysecondo.core.types import parse_type
cities_type = parse_type('(rel (tuple ((Name string)(Population int))))')
cities_data = list_nl(
list_nl("Beijing", 21540000),
list_nl("Shanghai", 24280000),
list_nl("Guangzhou", 14040000),
)
storage.create_object("cities", cities_data, cities_type)
# Query: cities feed count
query_cmd = parser.parse('query cities feed count')
assert isinstance(query_cmd, QueryCommand)
tokens = parser.parse_expression(query_cmd.expression)
value, value_type = evaluator.evaluate(tokens)
assert value.value == 3
print(" ✓ End-to-end query tests passed")
if __name__ == "__main__":
print("=" * 50)
print("Phase 4: Query Processing & REPL Tests")
print("=" * 50)
test_parser_create()
test_parser_update()
test_parser_query()
test_parser_expressions()
test_evaluator_arithmetic()
test_evaluator_identifiers()
test_evaluator_relations()
test_end_to_end()
print("\n" + "=" * 50)
print("All Phase 4 tests passed! ✓")
print("=" * 50)