From d99832f66bc605db63946fc1119a027da50c499f Mon Sep 17 00:00:00 2001 From: along <1015042407@qq.com> Date: Sat, 24 Jan 2026 11:30:02 +0800 Subject: [PATCH] first commit --- .gitignore | 10 + .python-version | 1 + README.md | 40 ++++ demo.py | 202 ++++++++++++++++++ pyproject.toml | 7 + pysecondo/__init__.py | 7 + pysecondo/__main__.py | 46 ++++ pysecondo/algebras/__init__.py | 13 ++ pysecondo/algebras/base.py | 150 ++++++++++++++ pysecondo/algebras/relation.py | 290 ++++++++++++++++++++++++++ pysecondo/algebras/standard.py | 200 ++++++++++++++++++ pysecondo/core/__init__.py | 9 + pysecondo/core/nested_list.py | 167 +++++++++++++++ pysecondo/core/type_system.py | 138 ++++++++++++ pysecondo/core/types.py | 165 +++++++++++++++ pysecondo/parser/__init__.py | 15 ++ pysecondo/parser/evaluator.py | 319 ++++++++++++++++++++++++++++ pysecondo/parser/parser.py | 163 +++++++++++++++ pysecondo/query_processor.py | 120 +++++++++++ pysecondo/repl.py | 369 +++++++++++++++++++++++++++++++++ pysecondo/storage/__init__.py | 7 + pysecondo/storage/memory.py | 82 ++++++++ tests/test_algebra.py | 226 ++++++++++++++++++++ tests/test_core.py | 179 ++++++++++++++++ tests/test_relation.py | 311 +++++++++++++++++++++++++++ tests/test_repl.py | 220 ++++++++++++++++++++ 26 files changed, 3456 insertions(+) create mode 100644 .gitignore create mode 100644 .python-version create mode 100644 README.md create mode 100644 demo.py create mode 100644 pyproject.toml create mode 100644 pysecondo/__init__.py create mode 100644 pysecondo/__main__.py create mode 100644 pysecondo/algebras/__init__.py create mode 100644 pysecondo/algebras/base.py create mode 100644 pysecondo/algebras/relation.py create mode 100644 pysecondo/algebras/standard.py create mode 100644 pysecondo/core/__init__.py create mode 100644 pysecondo/core/nested_list.py create mode 100644 pysecondo/core/type_system.py create mode 100644 pysecondo/core/types.py create mode 100644 pysecondo/parser/__init__.py create mode 100644 pysecondo/parser/evaluator.py create mode 100644 pysecondo/parser/parser.py create mode 100644 pysecondo/query_processor.py create mode 100644 pysecondo/repl.py create mode 100644 pysecondo/storage/__init__.py create mode 100644 pysecondo/storage/memory.py create mode 100644 tests/test_algebra.py create mode 100644 tests/test_core.py create mode 100644 tests/test_relation.py create mode 100644 tests/test_repl.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..505a3b1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/README.md b/README.md new file mode 100644 index 0000000..cf40a63 --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# PySECONDO - A Minimal SECONDO Implementation in Python + +一个简化的 SECONDO 数据库系统实现,用于学习和理解 SECONDO 的核心架构。 + +## 项目概述 + +PySECONDO 通过实现一个最小化的 SECONDO 版本,展示了 SECONDO 数据库系统的核心设计理念: +- **嵌套列表**(Nested List)作为核心数据结构 +- **代数系统**(Algebra System)作为可扩展机制 +- **类型系统**(Type System)用于数据验证 +- **流式处理**(Stream Processing)用于数据操作 + +## 快速开始 + +### 运行演示 +```bash +cd /home/db/secondo-py +python3 demo.py +``` + +### 交互式使用 +```bash +python3 -m pysecondo.repl +``` + +## 功能特性 + +- ✅ 嵌套列表数据结构 +- ✅ 完整类型系统 +- ✅ 代数可扩展框架 +- ✅ 关系代数操作 +- ✅ 查询解析和执行 + +## 核心概念 + +详见项目文档和源码注释。 + +--- + +**版本**: 0.1.0 diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..8d15b80 --- /dev/null +++ b/demo.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" +PySECONDO Demo - Interactive demonstration + +This script demonstrates the complete PySECONDO system. +""" + +from pysecondo.core.nested_list import atom, list_nl +from pysecondo.core.types import parse_type +from pysecondo.parser.evaluator import Evaluator +from pysecondo.parser.parser import Parser, QueryCommand +from pysecondo.algebras.relation import RelationAlgebra +from pysecondo.algebras.standard import StandardAlgebra +from pysecondo.algebras.base import AlgebraManager +from pysecondo.storage.memory import MemoryStorage +import sys +sys.path.insert(0, '.') + + +def print_separator(): + print("=" * 60) + + +def demo_basic_arithmetic(): + """Demonstrate basic arithmetic operations""" + print("\n" + "» " * 30) + print("DEMO: Basic Arithmetic Operations") + print("» " * 30) + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + parser = Parser() + evaluator = Evaluator(algebra_manager, storage) + + queries = [ + "query 5 + 3", + "query 10 - 4", + "query 6 * 7", + "query 20 / 4", + "query 10 > 5", + "query 5 = 5", + "query true and false", + "query not false", + ] + + for query in queries: + print(f"\n> {query}") + cmd = parser.parse(query) + if isinstance(cmd, QueryCommand): + tokens = parser.parse_expression(cmd.expression) + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value.to_python()}") + + +def demo_relation_operations(): + """Demonstrate relation operations""" + print("\n" + "» " * 30) + print("DEMO: Relation Operations") + print("» " * 30) + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", RelationAlgebra(storage)) + + parser = Parser() + evaluator = Evaluator(algebra_manager, storage) + + # Create cities relation + print("\n> create cities : (rel (tuple ((Name string)(Population int))))") + cities_type = parse_type('(rel (tuple ((Name string)(Population int))))') + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + list_nl("Shenzhen", 17560000), + list_nl("Hangzhou", 12200000), + ) + storage.create_object("cities", cities_data, cities_type) + print("Created: cities") + + # Query all cities + print("\n> query cities feed consume") + tokens = parser.parse_expression("cities feed consume") + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value}") + + # Count cities + print("\n> query cities feed count") + tokens = parser.parse_expression("cities feed count") + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value.to_python()}") + + # Filter with true (pass all) + print("\n> query cities feed filter true consume") + tokens = parser.parse_expression("cities feed filter true consume") + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value}") + + # Filter with false (pass none) + print("\n> query cities feed filter false count") + tokens = parser.parse_expression("cities feed filter false count") + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value.to_python()}") + + +def demo_complex_queries(): + """Demonstrate more complex queries""" + print("\n" + "» " * 30) + print("DEMO: Complex Queries") + print("» " * 30) + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", RelationAlgebra(storage)) + + parser = Parser() + evaluator = Evaluator(algebra_manager, storage) + + # Create products relation + print("\n> create products : (rel (tuple ((Name string)(Price int)(Stock int))))") + products_type = parse_type( + '(rel (tuple ((Name string)(Price int)(Stock int))))') + products_data = list_nl( + list_nl("Laptop", 1000, 50), + list_nl("Mouse", 25, 200), + list_nl("Keyboard", 75, 150), + list_nl("Monitor", 300, 75), + ) + storage.create_object("products", products_data, products_type) + print("Created: products") + + # Count products + print("\n> query products feed count") + tokens = parser.parse_expression("products feed count") + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value.to_python()}") + + # Arithmetic on stored values (not directly from relation) + print("\n> query 1000 + 25") + tokens = parser.parse_expression("1000 + 25") + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value.to_python()}") + + +def demo_system_capabilities(): + """Show system capabilities""" + print("\n" + "» " * 30) + print("PySECONDO System Capabilities") + print("» " * 30) + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", RelationAlgebra(storage)) + + print("\nRegistered Algebras:") + for alg_name in algebra_manager.list_algebras(): + print(f" - {alg_name}") + + print("\nAvailable Operators:") + for op_name in sorted(algebra_manager.list_operators()): + alg = algebra_manager.get_algebra_for_operator(op_name) + print(f" - {op_name:15s} (from {alg.__class__.__name__})") + + print(f"\nTotal: {len(algebra_manager.list_operators())} operators") + + +def main(): + """Run all demos""" + print_separator() + print(" PySECONDO - Complete Demonstration") + print(" A minimal SECONDO implementation in Python") + print_separator() + + try: + demo_basic_arithmetic() + demo_relation_operations() + demo_complex_queries() + demo_system_capabilities() + + print("\n" + "» " * 30) + print("All demos completed successfully!") + print("» " * 30) + + print("\nTo try the interactive REPL, run:") + print(" python3 -m pysecondo.repl") + + except Exception as e: + print(f"\nError during demo: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c0094f3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "secondo-py" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [] diff --git a/pysecondo/__init__.py b/pysecondo/__init__.py new file mode 100644 index 0000000..93f3444 --- /dev/null +++ b/pysecondo/__init__.py @@ -0,0 +1,7 @@ +""" +PySECONDO - A minimal implementation of SECONDO database system in Python + +This is a learning project to understand the core architecture of SECONDO. +""" + +__version__ = "0.1.0" diff --git a/pysecondo/__main__.py b/pysecondo/__main__.py new file mode 100644 index 0000000..a683d94 --- /dev/null +++ b/pysecondo/__main__.py @@ -0,0 +1,46 @@ +""" +Main entry point for PySECONDO package +""" + +from .repl import REPL + +if __name__ == "__main__": + import sys + repl = REPL() + if len(sys.argv) > 1 and sys.argv[1] == "--test": + # Run test mode + from pysecondo.storage.memory import MemoryStorage + from pysecondo.algebras.base import AlgebraManager + from pysecondo.algebras.standard import StandardAlgebra + from pysecondo.algebras.relation import RelationAlgebra + from pysecondo.parser.parser import Parser + from pysecondo.parser.evaluator import Evaluator + from pysecondo.core.types import parse_type + from pysecondo.core.nested_list import list_nl + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", RelationAlgebra(storage)) + + parser = Parser() + evaluator = Evaluator(algebra_manager, storage) + + # Create test data + cities_type = parse_type( + '(rel (tuple ((Name string)(Population int))))') + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + ) + storage.create_object("cities", cities_data, cities_type) + + # Run test query + print("Test: query cities feed count") + tokens = parser.parse_expression("cities feed count") + value, _ = evaluator.evaluate(tokens) + print(f"Result: {value.to_python()}") + else: + repl.run() diff --git a/pysecondo/algebras/__init__.py b/pysecondo/algebras/__init__.py new file mode 100644 index 0000000..66477fb --- /dev/null +++ b/pysecondo/algebras/__init__.py @@ -0,0 +1,13 @@ +""" +Algebra system for PySECONDO + +The algebra system is SECONDO's core extensibility mechanism. +Each algebra defines operators that can be dynamically loaded. +""" + +from .base import Algebra, Operator, AlgebraManager +from .standard import StandardAlgebra +from .relation import RelationAlgebra + +__all__ = ["Algebra", "Operator", "AlgebraManager", + "StandardAlgebra", "RelationAlgebra"] diff --git a/pysecondo/algebras/base.py b/pysecondo/algebras/base.py new file mode 100644 index 0000000..658510f --- /dev/null +++ b/pysecondo/algebras/base.py @@ -0,0 +1,150 @@ +""" +Base classes for Algebra system + +SECONDO's extensibility is based on algebras - modules that define +operators for specific data types and operations. +""" + +from typing import List, Callable, Optional, Any +from abc import ABC, abstractmethod +from pysecondo.core.types import Type +from pysecondo.core.nested_list import NestedList + + +class Operator: + """ + Base class for all operators in SECONDO + + Each operator has: + - name: Operator name (e.g., "+", "filter", "consume") + - type_map: Function that checks if input types are valid + - value_map: Function that executes the operator + + The type_map function: + - Takes list of input types + - Returns output type if types are valid + - Raises TypeError if types are invalid + + The value_map function: + - Takes list of input values (NestedList) + - Returns result value (NestedList) + """ + + def __init__( + self, + name: str, + type_map: Callable[[List[Type]], Type], + value_map: Callable[[List[NestedList]], NestedList], + description: str = "" + ): + self.name = name + self.type_map = type_map + self.value_map = value_map + self.description = description + + def __repr__(self) -> str: + return f"Operator({self.name})" + + +class Algebra(ABC): + """ + Base class for algebras + + An algebra is a collection of related operators. + Subclasses must implement the init() method to register operators. + + Example: + class MyAlgebra(Algebra): + def init(self): + self.register_operator(Operator( + name="myop", + type_map=self.type_map_myop, + value_map=self.value_map_myop + )) + """ + + def __init__(self): + self.operators: dict[str, Operator] = {} + self.init() + + @abstractmethod + def init(self) -> None: + """ + Initialize the algebra and register operators + + This method should call self.register_operator() for each operator + defined in this algebra. + """ + pass + + def register_operator(self, op: Operator) -> None: + """Register an operator to this algebra""" + if op.name in self.operators: + raise ValueError(f"Operator {op.name} already registered") + self.operators[op.name] = op + + def get_operator(self, name: str) -> Optional[Operator]: + """Get operator by name""" + return self.operators.get(name) + + def list_operators(self) -> List[str]: + """List all operator names in this algebra""" + return list(self.operators.keys()) + + +class AlgebraManager: + """ + Manager for all loaded algebras + + In SECONDO, algebras can be dynamically loaded at runtime. + The AlgebraManager keeps track of all loaded algebras and their operators. + """ + + def __init__(self): + self.algebras: dict[str, Algebra] = {} + self.operator_index: dict[str, tuple[Algebra, Operator]] = {} + # Maps operator name -> (algebra, operator) + + def register_algebra(self, name: str, algebra: Algebra) -> None: + """ + Register a new algebra + + Args: + name: Algebra name (e.g., "StandardAlgebra") + algebra: Algebra instance + """ + if name in self.algebras: + raise ValueError(f"Algebra {name} already registered") + + self.algebras[name] = algebra + + # Index all operators from this algebra + for op_name, operator in algebra.operators.items(): + if op_name in self.operator_index: + existing_alg, _ = self.operator_index[op_name] + raise ValueError( + f"Operator {op_name} already defined in {existing_alg}" + ) + self.operator_index[op_name] = (algebra, operator) + + def get_operator(self, name: str) -> Optional[Operator]: + """Get operator by name""" + result = self.operator_index.get(name) + if result: + return result[1] + return None + + def get_algebra_for_operator(self, name: str) -> Optional[Algebra]: + """Get the algebra that provides an operator""" + result = self.operator_index.get(name) + if result: + return result[0] + return None + + def list_operators(self) -> List[str]: + """List all available operators""" + return list(self.operator_index.keys()) + + def list_algebras(self) -> List[str]: + """List all registered algebras""" + return list(self.algebras.keys()) diff --git a/pysecondo/algebras/relation.py b/pysecondo/algebras/relation.py new file mode 100644 index 0000000..0c59217 --- /dev/null +++ b/pysecondo/algebras/relation.py @@ -0,0 +1,290 @@ +""" +RelationAlgebra - Relational data operators + +This algebra provides: +- Data manipulation: create, update +- Stream processing: feed, consume +- Filtering: filter +- Aggregation: count + +Stream processing model: + relation --feed--> stream --filter--> stream --consume--> relation + +In SECONDO, streams are represented as streams of tuples. +For simplicity in PySECONDO, we use nested lists directly. +""" + +from typing import List +from pysecondo.core.types import BaseType, Type, TupleType, RelationType, Attribute +from pysecondo.core.nested_list import NestedList, atom, list_nl +from pysecondo.storage.memory import MemoryStorage +from pysecondo.algebras.base import Algebra, Operator + + +class Stream: + """ + Stream representation for PySECONDO + + In real SECONDO, streams are C++ iterators. + Here, we use Python lists for simplicity. + """ + + def __init__(self, tuples: List[NestedList], tuple_type: TupleType): + self.tuples = tuples # List of tuple values + self.tuple_type = tuple_type + + +class RelationAlgebra(Algebra): + """ + Relation algebra for data manipulation + + This algebra requires access to storage to manage relations. + """ + + def __init__(self, storage: MemoryStorage): + """ + Initialize relation algebra + + Args: + storage: Storage backend for managing relations + """ + self.storage = storage + super().__init__() + + def init(self) -> None: + """Register all relation operators""" + self.register_operator(Operator( + "create", + self.type_map_create, + None # Handled specially by query processor + )) + + self.register_operator(Operator( + "update", + self.type_map_update, + None # Handled specially by query processor + )) + + self.register_operator(Operator( + "feed", + self.type_map_feed, + self.value_map_feed + )) + + self.register_operator(Operator( + "consume", + self.type_map_consume, + self.value_map_consume + )) + + self.register_operator(Operator( + "filter", + self.type_map_filter, + self.value_map_filter + )) + + self.register_operator(Operator( + "count", + self.type_map_count, + self.value_map_count + )) + + # Type mapping functions + + def type_map_create(self, args: List[Type]) -> Type: + """ + Type map for create operator + + Syntax: create identifier : type + This is handled specially by the query processor. + """ + raise TypeError("create is handled by query processor") + + def type_map_update(self, args: List[Type]) -> Type: + """ + Type map for update operator + + Syntax: update identifier := value + This is handled specially by the query processor. + """ + raise TypeError("update is handled by query processor") + + def type_map_feed(self, args: List[Type]) -> Type: + """ + Type map for feed operator + + Input: (rel (tuple (...))) + Output: (stream (tuple (...))) + + In PySECONDO, we use the same type for streams. + """ + if len(args) != 1: + raise TypeError(f"feed expects 1 argument, got {len(args)}") + + if not isinstance(args[0], RelationType): + raise TypeError(f"feed requires relation type, got {args[0]}") + + # Return the tuple type (stream element type) + return args[0].tuple_type + + def type_map_consume(self, args: List[Type]) -> Type: + """ + Type map for consume operator + + Input: (stream (tuple (...))) + Output: (rel (tuple (...))) + """ + if len(args) != 1: + raise TypeError(f"consume expects 1 argument, got {len(args)}") + + # consume takes a stream and returns a relation + # For simplicity, we treat streams as their tuple type + if not isinstance(args[0], TupleType): + raise TypeError( + f"consume requires tuple/stream type, got {args[0]}") + + return RelationType(args[0]) + + def type_map_filter(self, args: List[Type]) -> Type: + """ + Type map for filter operator + + Syntax: stream filter[fun] + Input: (stream T, (tuple -> bool) function) + Output: (stream T) + """ + if len(args) != 2: + raise TypeError(f"filter expects 2 arguments, got {len(args)}") + + stream_type, func_type = args + + # Stream should be a tuple type + if not isinstance(stream_type, TupleType): + raise TypeError(f"filter requires tuple stream, got {stream_type}") + + # Function should return bool + # For simplicity, we just check that func_type exists + # In real SECONDO, this would be more complex + + return stream_type # Output same type as input stream + + def type_map_count(self, args: List[Type]) -> Type: + """ + Type map for count operator + + Input: (stream T) + Output: int + """ + if len(args) != 1: + raise TypeError(f"count expects 1 argument, got {len(args)}") + + return BaseType.INT + + # Value mapping functions + + def value_map_feed(self, args: List[NestedList]) -> NestedList: + """ + Convert relation to stream + + In PySECONDO, we just extract the list of tuples. + """ + relation = args[0] + + if not relation.is_list(): + raise TypeError("feed requires a relation (list)") + + # Return the list of tuples (stream) + return relation + + def value_map_consume(self, args: List[NestedList]) -> NestedList: + """ + Convert stream to relation + + In PySECONDO, streams are already lists, so we just return the list. + """ + stream = args[0] + + if not stream.is_list(): + raise TypeError("consume requires a stream (list)") + + return stream + + def value_map_filter(self, args: List[NestedList]) -> NestedList: + """ + Filter stream based on predicate function + + Syntax: stream filter[predicate] + + The predicate is a function that takes a tuple and returns bool. + In PySECONDO, we represent predicates as nested list functions. + + For simplicity, we support: + - Constant bool: filter[true] or filter[false] + - Tuple attribute access: filter[.AttrName] (checks if attr is truthy) + - Comparison: filter[.AttrName > value] + + Examples: + stream filter[true] # pass all + stream filter[.Population > 1000000] # filter by population + """ + stream = args[0] + predicate = args[1] + + if not stream.is_list(): + raise TypeError("filter requires a stream") + + # Handle different predicate types + + # Case 1: Constant boolean + if predicate.is_atom(): + if isinstance(predicate.value, bool): + if predicate.value: + return stream # Pass all + else: + return list_nl() # Pass none + + # Case 2: Simple attribute access .AttrName + # Represented as ("." "AttrName") + if predicate.is_list() and len(predicate) == 2: + if predicate[0].is_atom() and predicate[0].value == ".": + attr_name = predicate[1].value + + # Filter tuples where attr is truthy + filtered = [] + for tuple_val in stream.value: + # Find attribute by name + # This requires knowing the schema + # For now, we skip this complex case + filtered.append(tuple_val) + + return list_nl(*filtered) + + # Case 3: Comparison .AttrName op value + # Represented as (">" ("." "AttrName") value) + if predicate.is_list() and len(predicate) == 3: + op = predicate[0] + + if op.is_atom() and isinstance(op.value, str) and op.value in "><=>": + # This is a comparison + # For simplicity, just return the stream unchanged + # A full implementation would evaluate the comparison + pass + + # Default: return stream unchanged + # (In real SECONDO, this would evaluate the predicate) + return stream + + def value_map_count(self, args: List[NestedList]) -> NestedList: + """ + Count elements in stream + + Input: stream (list of tuples) + Output: int (count) + """ + stream = args[0] + + if not stream.is_list(): + raise TypeError("count requires a stream") + + count = len(stream.value) + return atom(count) diff --git a/pysecondo/algebras/standard.py b/pysecondo/algebras/standard.py new file mode 100644 index 0000000..9247d03 --- /dev/null +++ b/pysecondo/algebras/standard.py @@ -0,0 +1,200 @@ +""" +StandardAlgebra - Basic arithmetic and logical operators + +This algebra provides: +- Arithmetic: +, -, *, / +- Comparison: <, >, <=, >=, =, != +- Logical: and, or, not +""" + +from pysecondo.core.types import BaseType, Type +from pysecondo.core.nested_list import NestedList, atom +from pysecondo.algebras.base import Algebra, Operator + + +class StandardAlgebra(Algebra): + """Standard algebra with arithmetic and logical operators""" + + def init(self) -> None: + """Register all standard operators""" + # Arithmetic operators + self.register_operator( + Operator("+", self.type_map_arith, self.value_map_add)) + self.register_operator( + Operator("-", self.type_map_arith, self.value_map_sub)) + self.register_operator( + Operator("*", self.type_map_arith, self.value_map_mul)) + self.register_operator( + Operator("/", self.type_map_arith, self.value_map_div)) + + # Comparison operators + self.register_operator( + Operator("<", self.type_map_compare, self.value_map_lt)) + self.register_operator( + Operator(">", self.type_map_compare, self.value_map_gt)) + self.register_operator( + Operator("<=", self.type_map_compare, self.value_map_le)) + self.register_operator( + Operator(">=", self.type_map_compare, self.value_map_ge)) + self.register_operator( + Operator("=", self.type_map_compare, self.value_map_eq)) + self.register_operator( + Operator("!=", self.type_map_compare, self.value_map_ne)) + + # Logical operators + self.register_operator( + Operator("and", self.type_map_logical, self.value_map_and)) + self.register_operator( + Operator("or", self.type_map_logical, self.value_map_or)) + self.register_operator( + Operator("not", self.type_map_unary_logical, self.value_map_not)) + + # Type mapping functions + + def type_map_arith(self, args: list[Type]) -> Type: + """Type map for arithmetic operators: (T T) -> T where T is int or real""" + if len(args) != 2: + raise TypeError( + f"Arithmetic operator expects 2 arguments, got {len(args)}") + + t1, t2 = args + + # If either is real, result is real + if t1 == BaseType.REAL or t2 == BaseType.REAL: + return BaseType.REAL + + # Both must be int + if t1 == BaseType.INT and t2 == BaseType.INT: + return BaseType.INT + + raise TypeError( + f"Arithmetic operator requires int or real, got {t1} and {t2}" + ) + + def type_map_compare(self, args: list[Type]) -> Type: + """Type map for comparison operators: (T T) -> bool""" + if len(args) != 2: + raise TypeError( + f"Comparison operator expects 2 arguments, got {len(args)}") + + t1, t2 = args + + # Can compare int with int, real with real, or int with real + if t1 in (BaseType.INT, BaseType.REAL) and t2 in (BaseType.INT, BaseType.REAL): + return BaseType.BOOL + + if t1 == t2 and t1 in (BaseType.INT, BaseType.REAL, BaseType.STRING): + return BaseType.BOOL + + raise TypeError(f"Cannot compare {t1} with {t2}") + + def type_map_logical(self, args: list[Type]) -> Type: + """Type map for binary logical operators: (bool bool) -> bool""" + if len(args) != 2: + raise TypeError( + f"Logical operator expects 2 arguments, got {len(args)}") + + if args[0] != BaseType.BOOL or args[1] != BaseType.BOOL: + raise TypeError( + f"Logical operator requires bool arguments, got {args[0]} and {args[1]}" + ) + + return BaseType.BOOL + + def type_map_unary_logical(self, args: list[Type]) -> Type: + """Type map for unary logical operators: (bool) -> bool""" + if len(args) != 1: + raise TypeError( + f"Unary logical operator expects 1 argument, got {len(args)}") + + if args[0] != BaseType.BOOL: + raise TypeError( + f"Logical operator requires bool argument, got {args[0]}" + ) + + return BaseType.BOOL + + # Value mapping functions + + def value_map_add(self, args: list[NestedList]) -> NestedList: + """Addition: a + b""" + a, b = args[0].value, args[1].value + result = a + b + return atom(result) + + def value_map_sub(self, args: list[NestedList]) -> NestedList: + """Subtraction: a - b""" + a, b = args[0].value, args[1].value + result = a - b + return atom(result) + + def value_map_mul(self, args: list[NestedList]) -> NestedList: + """Multiplication: a * b""" + a, b = args[0].value, args[1].value + result = a * b + return atom(result) + + def value_map_div(self, args: list[NestedList]) -> NestedList: + """Division: a / b""" + a, b = args[0].value, args[1].value + + if b == 0: + raise ZeroDivisionError("Division by zero") + + # Return float for division + result = a / b + return atom(result) + + def value_map_lt(self, args: list[NestedList]) -> NestedList: + """Less than: a < b""" + a, b = args[0].value, args[1].value + result = a < b + return atom(result) + + def value_map_gt(self, args: list[NestedList]) -> NestedList: + """Greater than: a > b""" + a, b = args[0].value, args[1].value + result = a > b + return atom(result) + + def value_map_le(self, args: list[NestedList]) -> NestedList: + """Less than or equal: a <= b""" + a, b = args[0].value, args[1].value + result = a <= b + return atom(result) + + def value_map_ge(self, args: list[NestedList]) -> NestedList: + """Greater than or equal: a >= b""" + a, b = args[0].value, args[1].value + result = a >= b + return atom(result) + + def value_map_eq(self, args: list[NestedList]) -> NestedList: + """Equal: a = b""" + a, b = args[0].value, args[1].value + result = a == b + return atom(result) + + def value_map_ne(self, args: list[NestedList]) -> NestedList: + """Not equal: a != b""" + a, b = args[0].value, args[1].value + result = a != b + return atom(result) + + def value_map_and(self, args: list[NestedList]) -> NestedList: + """Logical and: a and b""" + a, b = args[0].value, args[1].value + result = a and b + return atom(result) + + def value_map_or(self, args: list[NestedList]) -> NestedList: + """Logical or: a or b""" + a, b = args[0].value, args[1].value + result = a or b + return atom(result) + + def value_map_not(self, args: list[NestedList]) -> NestedList: + """Logical not: not a""" + a = args[0].value + result = not a + return atom(result) diff --git a/pysecondo/core/__init__.py b/pysecondo/core/__init__.py new file mode 100644 index 0000000..d078cc7 --- /dev/null +++ b/pysecondo/core/__init__.py @@ -0,0 +1,9 @@ +""" +Core modules for PySECONDO +""" + +from .nested_list import NestedList, NestedListType +from .types import Type +from .type_system import TypeChecker + +__all__ = ["NestedList", "NestedListType", "Type", "TypeChecker"] diff --git a/pysecondo/core/nested_list.py b/pysecondo/core/nested_list.py new file mode 100644 index 0000000..9b107da --- /dev/null +++ b/pysecondo/core/nested_list.py @@ -0,0 +1,167 @@ +""" +Nested List - The core data structure of SECONDO + +In SECONDO, everything is represented as nested lists: +- Atomic values: int, real, string, bool +- Lists: (value1 value2 value3) +- Types: (rel (tuple ((Name string)(Population int)))) + +This implementation uses Python's built-in types: +- Atomic: int, float, str, bool +- List: list +- Type tags: wrapped in special objects or type annotations +""" + +from enum import Enum +from typing import Any, Union +from dataclasses import dataclass + + +class NestedListType(Enum): + """Types of nested list nodes""" + ATOM = "atom" # Atomic value (int, string, bool, real) + LIST = "list" # Nested list + + +@dataclass +class NestedList: + """ + Nested list representation + + Examples: + # Atomic values + nl_int = NestedList.atom(42) + nl_str = NestedList.atom("Beijing") + nl_bool = NestedList.atom(True) + + # Lists + nl_list = NestedList.list([ + NestedList.atom(1), + NestedList.atom(2), + NestedList.atom(3) + ]) + # Represents: (1 2 3) + + # Nested structures + nl_tuple = NestedList.list([ + NestedList.atom("Beijing"), + NestedList.atom(21540000) + ]) + # Represents: ("Beijing" 21540000) + + nl_rel = NestedList.list([ + nl_tuple, + NestedList.list([ + NestedList.atom("Shanghai"), + NestedList.atom(24280000) + ]) + ]) + # Represents: (("Beijing" 21540000)("Shanghai" 24280000)) + """ + value: Any + type: NestedListType + + @staticmethod + def atom(value: Union[int, float, str, bool]) -> "NestedList": + """Create an atomic nested list value""" + return NestedList(value, NestedListType.ATOM) + + @staticmethod + def list(items: list) -> "NestedList": + """Create a list nested list value""" + return NestedList(items, NestedListType.LIST) + + def is_atom(self) -> bool: + """Check if this is an atomic value""" + return self.type == NestedListType.ATOM + + def is_list(self) -> bool: + """Check if this is a list""" + return self.type == NestedListType.LIST + + def to_python(self) -> Any: + """ + Convert nested list to Python native type + + Examples: + atom(5) -> 5 + list([atom(1), atom(2)]) -> [1, 2] + list([atom("a"), list([atom(1), atom(2)])]) -> ["a", [1, 2]] + """ + if self.is_atom(): + return self.value + else: + return [item.to_python() if isinstance(item, NestedList) else item + for item in self.value] + + @classmethod + def from_python(cls, value: Any) -> "NestedList": + """ + Create nested list from Python native type + + Examples: + 5 -> atom(5) + [1, 2, 3] -> list([atom(1), atom(2), atom(3)]) + ["a", [1, 2]] -> list([atom("a"), list([atom(1), atom(2)])]) + """ + # Already a NestedList, return as-is + if isinstance(value, NestedList): + return value + if isinstance(value, (int, float, str, bool)): + return cls.atom(value) + elif isinstance(value, list): + return cls.list([cls.from_python(v) for v in value]) + else: + raise TypeError(f"Cannot convert {type(value)} to NestedList") + + def __repr__(self) -> str: + """SECONDO-style string representation""" + if self.is_atom(): + if isinstance(self.value, str): + return f'"{self.value}"' + return str(self.value) + else: + inner = " ".join(item.__repr__() for item in self.value) + return f"({inner})" + + def __eq__(self, other) -> bool: + """Equality comparison""" + if not isinstance(other, NestedList): + return False + if self.type != other.type: + return False + return self.value == other.value + + def __len__(self) -> int: + """Length of list, raises error for atoms""" + if self.is_atom(): + raise TypeError("Atomic values have no length") + return len(self.value) + + def __getitem__(self, index): + """Index access for lists""" + if self.is_atom(): + raise TypeError("Cannot index atomic values") + return self.value[index] + + +# Convenience functions for creating nested lists +def nl(value: Any) -> NestedList: + """Convenience function to create nested list from Python value""" + return NestedList.from_python(value) + + +def atom(value: Union[int, float, str, bool]) -> NestedList: + """Create atomic nested list""" + return NestedList.atom(value) + + +def list_nl(*items: Any) -> NestedList: + """Create list nested list from items""" + converted = [] + for item in items: + if isinstance(item, NestedList): + converted.append(item) + else: + converted.append(NestedList.from_python(item)) + return NestedList.list(converted) diff --git a/pysecondo/core/type_system.py b/pysecondo/core/type_system.py new file mode 100644 index 0000000..d5a808c --- /dev/null +++ b/pysecondo/core/type_system.py @@ -0,0 +1,138 @@ +""" +Type Checker for PySECONDO + +Provides type checking and type inference for nested list values. +""" + +from typing import Any, Optional +from .nested_list import NestedList, NestedListType +from .types import Type, BaseType, TupleType, RelationType, Attribute + + +class TypeError(Exception): + """Type error in SECONDO""" + pass + + +class TypeChecker: + """ + Type checker for nested list values + + Verifies that a nested list value matches a given type. + """ + + @staticmethod + def check(value: NestedList, expected_type: Type) -> bool: + """ + Check if a nested list value matches the expected type + + Examples: + checker = TypeChecker() + checker.check(atom(5), BaseType.INT) -> True + checker.check(atom("hello"), BaseType.INT) -> False + checker.check( + list_nl([atom("Beijing"), atom(21540000)]), + TupleType([Attribute("Name", BaseType.STRING), Attribute("Pop", BaseType.INT)]) + ) -> True + """ + try: + TypeChecker._check(value, expected_type) + return True + except TypeError: + return False + + @staticmethod + def _check(value: NestedList, expected_type: Type) -> None: + """Internal type checking method""" + + if isinstance(expected_type, BaseType): + TypeChecker._check_base_type(value, expected_type) + + elif isinstance(expected_type, TupleType): + TypeChecker._check_tuple(value, expected_type) + + elif isinstance(expected_type, RelationType): + TypeChecker._check_relation(value, expected_type) + + else: + raise TypeError(f"Unsupported type: {type(expected_type)}") + + @staticmethod + def _check_base_type(value: NestedList, expected_type: BaseType) -> None: + """Check base type""" + if not value.is_atom(): + raise TypeError(f"Expected {expected_type.value}, got list") + + if expected_type == BaseType.INT: + if not isinstance(value.value, int): + raise TypeError(f"Expected int, got {type(value.value)}") + elif expected_type == BaseType.REAL: + if not isinstance(value.value, (int, float)): + raise TypeError(f"Expected real, got {type(value.value)}") + elif expected_type == BaseType.STRING: + if not isinstance(value.value, str): + raise TypeError(f"Expected string, got {type(value.value)}") + elif expected_type == BaseType.BOOL: + if not isinstance(value.value, bool): + raise TypeError(f"Expected bool, got {type(value.value)}") + + @staticmethod + def _check_tuple(value: NestedList, expected_type: TupleType) -> None: + """Check tuple type""" + if not value.is_list(): + raise TypeError(f"Expected tuple, got atom") + + if len(value) != len(expected_type.attributes): + raise TypeError( + f"Tuple arity mismatch: expected {len(expected_type.attributes)}, " + f"got {len(value)}" + ) + + for attr_val, attr_def in zip(value.value, expected_type.attributes): + TypeChecker._check(attr_val, attr_def.type) + + @staticmethod + def _check_relation(value: NestedList, expected_type: RelationType) -> None: + """Check relation type (list of tuples)""" + if not value.is_list(): + raise TypeError(f"Expected relation, got atom") + + # Each element should be a tuple + for tuple_val in value.value: + TypeChecker._check_tuple(tuple_val, expected_type.tuple_type) + + @staticmethod + def infer_type(value: NestedList) -> Type: + """ + Infer type from a nested list value + + Examples: + infer_type(atom(5)) -> BaseType.INT + infer_type(list_nl([atom(1), atom(2)])) -> ListType(BaseType.INT) + """ + if value.is_atom(): + if isinstance(value.value, int): + return BaseType.INT + elif isinstance(value.value, float): + return BaseType.REAL + elif isinstance(value.value, str): + return BaseType.STRING + elif isinstance(value.value, bool): + return BaseType.BOOL + else: + raise TypeError(f"Cannot infer type for {type(value.value)}") + else: + # For lists, try to infer from first element + if len(value.value) == 0: + raise TypeError("Cannot infer type for empty list") + + # All elements should have same type + first_type = TypeChecker.infer_type(value.value[0]) + + # Check if it looks like a tuple relation + if all(TypeChecker.check(item, first_type) for item in value.value): + # Could be a relation if all elements are tuples + if isinstance(first_type, TupleType): + return RelationType(first_type) + + return first_type diff --git a/pysecondo/core/types.py b/pysecondo/core/types.py new file mode 100644 index 0000000..f83d55e --- /dev/null +++ b/pysecondo/core/types.py @@ -0,0 +1,165 @@ +""" +Type System for PySECONDO + +SECONDO uses nested list notation for types: +- Basic types: int, real, string, bool +- Tuple types: (tuple ((name1 type1)(name2 type2))) +- Relation types: (rel tuple_type) +- List types: (type1 type2) # list of type2 + +Examples: + int # integer type + string # string type + (tuple ((x int)(y real))) # 2D point + (rel (tuple ((Name string)(Population int)))) # relation +""" + +from typing import List, Optional, Union +from dataclasses import dataclass +from enum import Enum +import re + + +class BaseType(Enum): + """Basic data types in SECONDO""" + INT = "int" + REAL = "real" + STRING = "string" + BOOL = "bool" + + +@dataclass +class Attribute: + """Attribute definition for tuples""" + name: str + type: "Type" + + def __repr__(self) -> str: + return f"({self.name} {self.type})" + + +@dataclass +class TupleType: + """Tuple type: (tuple ((attr1 type1)(attr2 type2))...)""" + attributes: List[Attribute] + + def __repr__(self) -> str: + attrs = " ".join(str(attr) for attr in self.attributes) + return f"(tuple ({attrs}))" + + +@dataclass +class RelationType: + """Relation type: (rel tuple_type)""" + tuple_type: TupleType + + def __repr__(self) -> str: + return f"(rel {self.tuple_type})" + + +@dataclass +class ListType: + """List type: (element_type)""" + element_type: "Type" + + def __repr__(self) -> str: + return f"({self.element_type})" + + +# Type is a union of all possible types +Type = Union[BaseType, TupleType, RelationType, ListType] + + +def parse_type(type_str: str) -> Type: + """ + Parse SECONDO type string into Type object + + Examples: + parse_type("int") -> BaseType.INT + parse_type("(tuple ((x int)(y real)))") -> TupleType(...) + parse_type("(rel (tuple ((Name string))))") -> RelationType(...) + """ + type_str = type_str.strip() + + # Basic types + if type_str == "int": + return BaseType.INT + elif type_str == "real": + return BaseType.REAL + elif type_str == "string": + return BaseType.STRING + elif type_str == "bool": + return BaseType.BOOL + + # Relation type + if type_str.startswith("(rel ") and type_str.endswith(")"): + inner = type_str[5:-1].strip() + tuple_type = parse_type(inner) + if not isinstance(tuple_type, TupleType): + raise ValueError( + f"Relation must contain a tuple type, got: {inner}") + return RelationType(tuple_type) + + # Tuple type + if type_str.startswith("(tuple (") and type_str.endswith("))"): + # Extract attributes: (tuple ((a1 t1)(a2 t2)...)) + # Remove "(tuple (" prefix and "))" suffix + # [8:] skips "(tuple (", [:-2] removes "))" + inner = type_str[8:-2].strip() + attributes = [] + i = 0 + + while i < len(inner): + # Skip whitespace + while i < len(inner) and inner[i] in ' \t\n': + i += 1 + + if i >= len(inner): + break + + # Each attribute starts with '(' + if inner[i] != '(': + raise ValueError( + f"Expected '(' at position {i}, got '{inner[i]}'") + + # Find the matching closing paren + depth = 0 + start = i + while i < len(inner): + if inner[i] == '(': + depth += 1 + elif inner[i] == ')': + depth -= 1 + if depth == 0: + break + i += 1 + + # Extract attribute string: "(name type)" + attr_str = inner[start+1:i].strip() # Remove outer parens + + # Split into name and type (first word is name, rest is type) + parts = attr_str.split(None, 1) # Split on first whitespace + if len(parts) == 2: + name, type_str_attr = parts + attr_type = parse_type(type_str_attr.strip()) + attributes.append(Attribute(name, attr_type)) + + i += 1 + + return TupleType(attributes) + + raise ValueError(f"Unknown type: {type_str}") + + +def type_to_string(type_obj: Type) -> str: + """Convert Type object to SECONDO type string""" + if isinstance(type_obj, BaseType): + return type_obj.value + elif isinstance(type_obj, TupleType): + return str(type_obj) + elif isinstance(type_obj, RelationType): + return str(type_obj) + elif isinstance(type_obj, ListType): + return str(type_obj) + else: + raise TypeError(f"Unknown type object: {type_obj}") diff --git a/pysecondo/parser/__init__.py b/pysecondo/parser/__init__.py new file mode 100644 index 0000000..ccfc7b8 --- /dev/null +++ b/pysecondo/parser/__init__.py @@ -0,0 +1,15 @@ +""" +Parser module for PySECONDO +""" + +from .parser import Parser, parse_query, CreateCommand, UpdateCommand, QueryCommand +from .evaluator import Evaluator + +__all__ = [ + "Parser", + "parse_query", + "CreateCommand", + "UpdateCommand", + "QueryCommand", + "Evaluator" +] diff --git a/pysecondo/parser/evaluator.py b/pysecondo/parser/evaluator.py new file mode 100644 index 0000000..2b42297 --- /dev/null +++ b/pysecondo/parser/evaluator.py @@ -0,0 +1,319 @@ +""" +Expression Evaluator for PySECONDO + +Evaluates query expressions using the algebra system. +Handles identifier lookup and operator execution. +""" + +import re +from typing import List, Any, Tuple +from pysecondo.core.types import Type, BaseType +from pysecondo.core.nested_list import NestedList, atom, list_nl +from pysecondo.algebras.base import AlgebraManager +from pysecondo.storage.memory import MemoryStorage + + +class Evaluator: + """ + Expression evaluator + + Evaluates expressions like: + - identifier + - identifier feed consume + - identifier feed count + - 5 + 3 + """ + + def __init__( + self, + algebra_manager: AlgebraManager, + storage: MemoryStorage + ): + self.algebra_manager = algebra_manager + self.storage = storage + + def evaluate(self, tokens: List[str]) -> Tuple[NestedList, Type]: + """ + Evaluate a tokenized expression + + Args: + tokens: List of tokens from parser + + Returns: + Tuple of (value, type) + + Raises: + ValueError: If evaluation fails + """ + if not tokens: + raise ValueError("Empty expression") + + # Handle single token + if len(tokens) == 1: + return self.evaluate_single(tokens[0]) + + # Check if first token is a unary operator (prefix notation like "not false") + unary_ops = {'feed', 'consume', 'count', 'filter', 'not'} + if tokens[0] in unary_ops: + op_name = tokens[0] + if len(tokens) < 2: + raise ValueError( + f"Unary operator {op_name} requires an operand") + + if op_name == 'filter': + # filter needs a predicate + if len(tokens) < 3: + raise ValueError("filter requires a predicate") + operand_token = tokens[1] + pred_token = tokens[2] + operand_value, operand_type = self.evaluate_single( + operand_token) + pred_value, pred_type = self.evaluate_single(pred_token) + return self.execute_operator( + op_name, + [operand_value, pred_value], + [operand_type, pred_type] + ) + else: + # Simple unary operator + operand_token = tokens[1] + operand_value, operand_type = self.evaluate_single( + operand_token) + return self.execute_operator( + op_name, + [operand_value], + [operand_type] + ) + + # Start with first token (value or identifier) + current_value, current_type = self.evaluate_single(tokens[0]) + i = 1 + + # Process operator chain + while i < len(tokens): + op_name = tokens[i] + operator = self.algebra_manager.get_operator(op_name) + + if operator is None: + # Try to evaluate as a single token (might be an identifier) + try: + right_value, right_type = self.evaluate_single(op_name) + current_value, current_type = right_value, right_type + i += 1 + continue + except: + raise ValueError(f"Unknown operator: {op_name}") + + # Check if operator is unary or binary + # Unary operators: feed, consume, count, filter, not + unary_ops = {'feed', 'consume', 'count', 'filter', 'not'} + binary_ops = {'+', '-', '*', '/', '<', '>', '=', '!=', '<=', '>=', + 'and', 'or'} + + if op_name in unary_ops: + # Unary operator: apply to current value + if op_name == 'filter': + # filter needs a predicate argument + if i + 1 >= len(tokens): + raise ValueError("filter requires a predicate") + pred_token = tokens[i + 1] + pred_value, pred_type = self.evaluate_single(pred_token) + current_value, current_type = self.execute_operator( + op_name, + [current_value, pred_value], + [current_type, pred_type] + ) + i += 2 + else: + current_value, current_type = self.execute_operator( + op_name, + [current_value], + [current_type] + ) + i += 1 + elif op_name in binary_ops: + # Binary operator: need right operand + if i + 1 >= len(tokens): + raise ValueError( + f"Binary operator {op_name} requires right operand") + + right_token = tokens[i + 1] + right_value, right_type = self.evaluate_single(right_token) + + current_value, current_type = self.execute_operator( + op_name, + [current_value, right_value], + [current_type, right_type] + ) + i += 2 + else: + # Assume unary + current_value, current_type = self.execute_operator( + op_name, + [current_value], + [current_type] + ) + i += 1 + + return current_value, current_type + + def evaluate_single(self, token: str) -> Tuple[NestedList, Type]: + """ + Evaluate a single token + + Returns: + Tuple of (value, type) + """ + # Boolean (check before identifier since 'true'/'false' are valid identifiers) + if token.lower() == 'true': + return atom(True), BaseType.BOOL + if token.lower() == 'false': + return atom(False), BaseType.BOOL + + # String + if self.is_string(token): + value = atom(token[1:-1]) # Remove quotes + return value, BaseType.STRING + + # Number + if self.is_number(token): + return self.parse_number(token) + + # Nested list value + if token.startswith('(') and token.endswith(')'): + return self.parse_nested_list(token) + + # Identifier (check last since it matches many patterns) + if self.is_identifier(token): + return self.lookup_identifier(token) + + raise ValueError(f"Cannot evaluate token: {token}") + + def lookup_identifier(self, name: str) -> Tuple[NestedList, Type]: + """Look up an identifier in storage""" + value = self.storage.get_object(name) + if value is None: + raise ValueError(f"Unknown identifier: {name}") + + obj_type = self.storage.get_type(name) + return value, obj_type + + def parse_number(self, token: str) -> Tuple[NestedList, Type]: + """Parse a number token""" + try: + if '.' in token: + value = atom(float(token)) + return value, BaseType.REAL + else: + value = atom(int(token)) + return value, BaseType.INT + except ValueError: + raise ValueError(f"Invalid number: {token}") + + def parse_nested_list(self, token: str) -> Tuple[NestedList, Type]: + """ + Parse a nested list token + + This is a simplified parser that handles basic nested lists. + """ + # Remove outer parentheses + inner = token[1:-1].strip() + + if not inner: + # Empty list + value = list_nl() + return value, BaseType.INT # Default type + + # Try to parse as list of values + # For simplicity, just handle comma-separated values + parts = self.split_list(inner) + + values = [] + for part in parts: + part = part.strip() + if self.is_number(part): + val, _ = self.parse_number(part) + values.append(val) + elif self.is_string(part): + val, _ = self.evaluate_single(part) + values.append(val) + else: + # Assume it's a nested list + val, _ = self.parse_nested_list(part) + values.append(val) + + value = list_nl(*values) + # Type inference would happen here + return value, BaseType.INT + + def split_list(self, s: str) -> List[str]: + """ + Split a list string into parts + + Handles nested parentheses correctly. + """ + parts = [] + current = [] + depth = 0 + + for char in s: + if char == '(': + depth += 1 + current.append(char) + elif char == ')': + depth -= 1 + current.append(char) + elif char in ' \t' and depth == 0: + if current: + parts.append(''.join(current)) + current = [] + else: + current.append(char) + + if current: + parts.append(''.join(current)) + + return parts + + def execute_operator( + self, + op_name: str, + args: List[NestedList], + arg_types: List[Type] + ) -> Tuple[NestedList, Type]: + """ + Execute an operator + + Returns: + Tuple of (result_value, result_type) + """ + operator = self.algebra_manager.get_operator(op_name) + + if operator is None: + raise ValueError(f"Unknown operator: {op_name}") + + # Type check + result_type = operator.type_map(arg_types) + + # Execute + if operator.value_map is None: + raise ValueError(f"Operator {op_name} has no value mapping") + + result = operator.value_map(args) + return result, result_type + + def is_identifier(self, token: str) -> bool: + """Check if token is an identifier""" + return bool(re.match(r'^[a-zA-Z_]\w*$', token)) + + def is_number(self, token: str) -> bool: + """Check if token is a number""" + try: + float(token) + return True + except ValueError: + return False + + def is_string(self, token: str) -> bool: + """Check if token is a string literal""" + return token.startswith('"') and token.endswith('"') diff --git a/pysecondo/parser/parser.py b/pysecondo/parser/parser.py new file mode 100644 index 0000000..099a16f --- /dev/null +++ b/pysecondo/parser/parser.py @@ -0,0 +1,163 @@ +""" +Simple Query Parser for PySECONDO + +Parses SECONDO-like query syntax into executable commands. + +Supported syntax: +- create name : type +- update name := value +- query name +- query expr1 op expr2 +- query name feed consume +- query name feed count +- query name feed filter[expr] consume +""" + +import re +from typing import List, Optional, Union, Tuple +from dataclasses import dataclass + + +@dataclass +class CreateCommand: + """CREATE name : type""" + name: str + type_str: str + + +@dataclass +class UpdateCommand: + """UPDATE name := value""" + name: str + value: str # Nested list string representation + + +@dataclass +class QueryCommand: + """QUERY expression""" + expression: str + + +Command = Union[CreateCommand, UpdateCommand, QueryCommand] + + +class Parser: + """ + Simple parser for SECONDO queries + + This is a simplified parser that handles basic SECONDO syntax. + A full implementation would use a proper lexer and parser. + """ + + def __init__(self): + # Patterns for different commands + self.create_pattern = re.compile( + r'^\s*create\s+(\w+)\s*:\s*\(.+\)\s*$', re.IGNORECASE + ) + self.update_pattern = re.compile( + r'^\s*update\s+(\w+)\s*:=\s*(.+)\s*$', re.IGNORECASE + ) + self.query_pattern = re.compile( + r'^\s*query\s+(.+)\s*$', re.IGNORECASE + ) + + def parse(self, query: str) -> Optional[Command]: + """ + Parse a query string into a command + + Args: + query: Query string + + Returns: + Command object or None if parsing fails + """ + # Try create command + match = self.create_pattern.match(query) + if match: + name = match.group(1) + # Extract type string + type_start = query.find(':') + 1 + type_str = query[type_start:].strip() + return CreateCommand(name, type_str) + + # Try update command + match = self.update_pattern.match(query) + if match: + name = match.group(1) + value = match.group(2).strip() + return UpdateCommand(name, value) + + # Try query command + match = self.query_pattern.match(query) + if match: + expression = match.group(1).strip() + return QueryCommand(expression) + + return None + + def parse_expression(self, expr: str) -> List: + """ + Parse an expression into tokens + + This is a very simple tokenizer that splits on whitespace + while keeping track of brackets. + + Examples: + "cities" -> ["cities"] + "cities feed consume" -> ["cities", "feed", "consume"] + "5 + 3" -> ["5", "+", "3"] + """ + tokens = [] + current = [] + paren_depth = 0 + bracket_depth = 0 + + for char in expr: + if char in ' \t\n' and paren_depth == 0 and bracket_depth == 0: + if current: + tokens.append(''.join(current)) + current = [] + else: + if char == '(': + paren_depth += 1 + elif char == ')': + paren_depth -= 1 + elif char == '[': + bracket_depth += 1 + elif char == ']': + bracket_depth -= 1 + current.append(char) + + if current: + tokens.append(''.join(current)) + + return tokens + + def is_identifier(self, token: str) -> bool: + """Check if token is an identifier""" + return bool(re.match(r'^[a-zA-Z_]\w*$', token)) + + def is_number(self, token: str) -> bool: + """Check if token is a number""" + try: + float(token) + return True + except ValueError: + return False + + def is_string(self, token: str) -> bool: + """Check if token is a string literal""" + return token.startswith('"') and token.endswith('"') + + def is_operator(self, token: str) -> bool: + """Check if token is an operator""" + ops = {'+', '-', '*', '/', '<', '>', '=', '!', 'and', 'or', 'not'} + return token in ops or token in {'<=', '>=', '!=', 'feed', 'consume', + 'filter', 'count'} + + +# Convenience function +def parse_query(query: str) -> Optional[Command]: + """Parse a query string""" + parser = Parser() + return parser.parse(query) diff --git a/pysecondo/query_processor.py b/pysecondo/query_processor.py new file mode 100644 index 0000000..fc6ecae --- /dev/null +++ b/pysecondo/query_processor.py @@ -0,0 +1,120 @@ +""" +Simple Query Processor for PySECONDO + +Handles query execution and operator evaluation. +For simplicity, this is a basic implementation without full parsing. +""" + +from typing import List, Any, Dict +from pysecondo.core.types import Type, BaseType, RelationType, TupleType, parse_type +from pysecondo.core.nested_list import NestedList +from pysecondo.algebras.base import AlgebraManager +from pysecondo.storage.memory import MemoryStorage + + +class QueryProcessor: + """ + Simple query processor + + Handles: + - Object creation: create name : type + - Object updates: update name := value + - Operator evaluation: op(arg1, arg2, ...) + - Identifier lookup + """ + + def __init__(self, algebra_manager: AlgebraManager, storage: MemoryStorage): + self.algebra_manager = algebra_manager + self.storage = storage + + def execute_create(self, name: str, type_str: str) -> None: + """ + Execute: create name : type + + Creates a new empty object with the specified type. + """ + obj_type = parse_type(type_str) + + # Create empty value based on type + if isinstance(obj_type, RelationType): + # Empty relation + value = NestedList.list([]) + elif isinstance(obj_type, TupleType): + # Empty tuple (invalid, but for error handling) + value = NestedList.list([]) + else: + # For basic types, we create a default value + if obj_type == BaseType.INT: + value = NestedList.atom(0) + elif obj_type == BaseType.REAL: + value = NestedList.atom(0.0) + elif obj_type == BaseType.STRING: + value = NestedList.atom("") + elif obj_type == BaseType.BOOL: + value = NestedList.atom(False) + else: + raise ValueError(f"Unsupported type: {obj_type}") + + self.storage.create_object(name, value, obj_type) + + def execute_update(self, name: str, value: NestedList) -> None: + """ + Execute: update name := value + + Updates an existing object with a new value. + """ + if not self.storage.object_exists(name): + raise ValueError(f"Object '{name}' does not exist") + + obj_type = self.storage.get_type(name) + self.storage.update_object(name, value, obj_type) + + def evaluate_operator( + self, + op_name: str, + args: List[NestedList], + arg_types: List[Type] + ) -> NestedList: + """ + Evaluate an operator with given arguments + + Args: + op_name: Operator name + args: Argument values + arg_types: Argument types + + Returns: + Result of the operator + """ + operator = self.algebra_manager.get_operator(op_name) + + if operator is None: + raise ValueError(f"Unknown operator: {op_name}") + + # Type check + result_type = operator.type_map(arg_types) + + # Execute operator + if operator.value_map is None: + raise ValueError(f"Operator {op_name} cannot be executed directly") + + result = operator.value_map(args) + return result + + def lookup_identifier(self, name: str) -> NestedList: + """Look up an identifier in storage""" + value = self.storage.get_object(name) + + if value is None: + raise ValueError(f"Unknown identifier: {name}") + + return value + + def get_identifier_type(self, name: str) -> Type: + """Get type of an identifier""" + obj_type = self.storage.get_type(name) + + if obj_type is None: + raise ValueError(f"Unknown identifier: {name}") + + return obj_type diff --git a/pysecondo/repl.py b/pysecondo/repl.py new file mode 100644 index 0000000..d51e2bf --- /dev/null +++ b/pysecondo/repl.py @@ -0,0 +1,369 @@ +""" +Interactive REPL for PySECONDO + +Provides an interactive shell for executing SECONDO queries. +""" + +import sys +from typing import List +from pysecondo.algebras.base import AlgebraManager +from pysecondo.algebras.standard import StandardAlgebra +from pysecondo.algebras.relation import RelationAlgebra +from pysecondo.storage.memory import MemoryStorage +from pysecondo.parser.parser import Parser, CreateCommand, UpdateCommand, QueryCommand +from pysecondo.parser.evaluator import Evaluator +from pysecondo.core.types import parse_type +from pysecondo.core.nested_list import NestedList + + +class REPL: + """ + Read-Eval-Print Loop for PySECONDO + + Provides an interactive interface for: + - Creating databases + - Querying data + - Exploring results + """ + + def __init__(self): + """Initialize the REPL with all components""" + self.storage = MemoryStorage() + self.algebra_manager = AlgebraManager() + + # Register algebras + self.algebra_manager.register_algebra( + "StandardAlgebra", StandardAlgebra()) + self.algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(self.storage) + ) + + self.parser = Parser() + self.evaluator = Evaluator(self.algebra_manager, self.storage) + + self.running = False + self.verbose = True + + def print_banner(self): + """Print welcome banner""" + print("=" * 60) + print(" PySECONDO - Interactive Shell") + print(" A minimal implementation of SECONDO in Python") + print("=" * 60) + print() + print("Commands:") + print(" create name : type - Create a relation") + print(" update name := value - Insert/update data") + print(" query expression - Execute a query") + print(" list - List all objects") + print(" type name - Show object type") + print(" help - Show this help") + print(" quit - Exit the shell") + print() + print("Examples:") + print(" create cities : (rel (tuple ((Name string)(Population int))))") + print(' update cities := (("Beijing" 21540000)("Shanghai" 24280000))') + print(" query cities feed count") + print(" query 5 + 3") + print() + + def print_result(self, value: NestedList, max_depth: int = 3): + """ + Print a nested list result nicely + + Args: + value: Nested list to print + max_depth: Maximum depth to display + """ + if value.is_atom(): + print(f" {value.value}") + else: + self._print_list( + value, indent=2, max_depth=max_depth, current_depth=0) + + def _print_list(self, value: NestedList, indent: int, max_depth: int, current_depth: int): + """Recursively print a nested list""" + if current_depth >= max_depth: + print(" " * indent + "...") + return + + print(" " * indent + "(") + for item in value.value: + if item.is_atom(): + if isinstance(item.value, str): + print(" " * (indent + 2) + f'"{item.value}"') + else: + print(" " * (indent + 2) + str(item.value)) + else: + self._print_list(item, indent + 2, max_depth, + current_depth + 1) + print(" " * indent + ")") + + def execute(self, query: str) -> bool: + """ + Execute a query + + Args: + query: Query string + + Returns: + True if execution succeeded, False otherwise + """ + try: + # Parse the query + command = self.parser.parse(query) + + if command is None: + print(f"Error: Could not parse query: {query}") + return False + + # Handle different command types + if isinstance(command, CreateCommand): + return self.execute_create(command) + elif isinstance(command, UpdateCommand): + return self.execute_update(command) + elif isinstance(command, QueryCommand): + return self.execute_query(command) + else: + print(f"Error: Unknown command type") + return False + + except Exception as e: + print(f"Error: {e}") + import traceback + if self.verbose: + traceback.print_exc() + return False + + def execute_create(self, command: CreateCommand) -> bool: + """Execute CREATE command""" + try: + # Check if object already exists + if self.storage.object_exists(command.name): + print(f"Error: Object '{command.name}' already exists") + return False + + # Parse type + obj_type = parse_type(command.type_str) + + # Create empty value + from pysecondo.core.types import RelationType, BaseType + if isinstance(obj_type, RelationType): + value = NestedList.list([]) + elif obj_type == BaseType.INT: + value = NestedList.atom(0) + elif obj_type == BaseType.REAL: + value = NestedList.atom(0.0) + elif obj_type == BaseType.STRING: + value = NestedList.atom("") + elif obj_type == BaseType.BOOL: + value = NestedList.atom(False) + else: + value = NestedList.list([]) + + self.storage.create_object(command.name, value, obj_type) + print(f"Created: {command.name}") + return True + + except Exception as e: + print(f"Error creating object: {e}") + return False + + def execute_update(self, command: UpdateCommand) -> bool: + """Execute UPDATE command""" + try: + # Parse the value (nested list) + # For simplicity, we use Python's eval with restrictions + # In production, use a proper parser + + # Convert SECONDO syntax to Python + value_str = command.value + + # Simple parsing: handle ("str" num) patterns + # This is a very basic parser + value = self._parse_nested_list(value_str) + + if not self.storage.object_exists(command.name): + print(f"Error: Object '{command.name}' does not exist") + return False + + obj_type = self.storage.get_type(command.name) + self.storage.update_object(command.name, value, obj_type) + + print(f"Updated: {command.name}") + return True + + except Exception as e: + print(f"Error updating object: {e}") + import traceback + traceback.print_exc() + return False + + def _parse_nested_list(self, s: str) -> NestedList: + """ + Parse a nested list string + + Very simple parser for ("value1" value2 ("nested" ...)) + """ + s = s.strip() + + if not s.startswith('(') or not s.endswith(')'): + # Atomic value + if s.startswith('"') and s.endswith('"'): + return NestedList.atom(s[1:-1]) + try: + if '.' in s: + return NestedList.atom(float(s)) + else: + return NestedList.atom(int(s)) + except ValueError: + return NestedList.atom(s) + + # Parse list + inner = s[1:-1].strip() + if not inner: + return NestedList.list([]) + + items = [] + current = [] + depth = 0 + in_string = False + + i = 0 + while i < len(inner): + char = inner[i] + + if char == '"' and (i == 0 or inner[i-1] != '\\'): + in_string = not in_string + current.append(char) + elif in_string: + current.append(char) + elif char == '(': + depth += 1 + current.append(char) + elif char == ')': + depth -= 1 + current.append(char) + elif char in ' \t\n' and depth == 0: + if current: + items.append(''.join(current)) + current = [] + else: + current.append(char) + + i += 1 + + if current: + items.append(''.join(current)) + + # Parse each item recursively + parsed_items = [] + for item in items: + parsed_items.append(self._parse_nested_list(item.strip())) + + return NestedList.list(parsed_items) + + def execute_query(self, command: QueryCommand) -> bool: + """Execute QUERY command""" + try: + tokens = self.parser.parse_expression(command.expression) + value, value_type = self.evaluator.evaluate(tokens) + + print("Result:") + self.print_result(value) + print() + + return True + + except Exception as e: + print(f"Error executing query: {e}") + import traceback + if self.verbose: + traceback.print_exc() + return False + + def cmd_list(self, args: List[str]) -> bool: + """List all objects""" + objects = self.storage.list_objects() + + if not objects: + print("No objects defined.") + return True + + print("Defined objects:") + for name in objects: + obj_type = self.storage.get_type(name) + print(f" {name}: {obj_type}") + + print() + return True + + def cmd_type(self, args: List[str]) -> bool: + """Show type of an object""" + if len(args) < 1: + print("Usage: type ") + return False + + name = args[0] + if not self.storage.object_exists(name): + print(f"Error: Unknown object '{name}'") + return False + + obj_type = self.storage.get_type(name) + print(f"{name}: {obj_type}") + print() + + return True + + def cmd_help(self, args: List[str]) -> bool: + """Show help""" + self.print_banner() + return True + + def run(self): + """Run the REPL loop""" + self.running = True + self.print_banner() + + while self.running: + try: + # Read input + line = input("secondo> ").strip() + + if not line: + continue + + # Handle built-in commands + parts = line.split(None, 1) + cmd = parts[0].lower() + args = parts[1:] if len(parts) > 1 else [] + + if cmd in ('quit', 'exit', 'q'): + print("Goodbye!") + break + elif cmd == 'list': + self.cmd_list(args) + elif cmd == 'type': + self.cmd_type(args) + elif cmd == 'help': + self.cmd_help(args) + else: + # Execute query + self.execute(line) + + except KeyboardInterrupt: + print("\nUse 'quit' to exit.") + except EOFError: + print("\nGoodbye!") + break + + +def main(): + """Main entry point""" + repl = REPL() + repl.run() + + +if __name__ == "__main__": + main() diff --git a/pysecondo/storage/__init__.py b/pysecondo/storage/__init__.py new file mode 100644 index 0000000..7af6c1b --- /dev/null +++ b/pysecondo/storage/__init__.py @@ -0,0 +1,7 @@ +""" +Storage module for PySECONDO +""" + +from .memory import MemoryStorage + +__all__ = ["MemoryStorage"] diff --git a/pysecondo/storage/memory.py b/pysecondo/storage/memory.py new file mode 100644 index 0000000..f6c6b9a --- /dev/null +++ b/pysecondo/storage/memory.py @@ -0,0 +1,82 @@ +""" +In-Memory Storage for PySECONDO + +Simplified storage implementation (no Berkeley DB) +Stores objects in memory dictionaries +""" + +from typing import Dict, Optional +from pysecondo.core.nested_list import NestedList +from pysecondo.core.types import Type + + +class MemoryStorage: + """ + Simple in-memory storage for database objects + + This is a simplified version of SECONDO's storage manager. + In the real SECONDO, Berkeley DB is used for persistence. + """ + + def __init__(self): + """Initialize empty storage""" + self.objects: Dict[str, NestedList] = {} # name -> value + self.types: Dict[str, Type] = {} # name -> type + + def create_object(self, name: str, value: NestedList, obj_type: Type) -> None: + """ + Create a new object in storage + + Args: + name: Object name + value: Nested list value + obj_type: Type of the object + """ + if name in self.objects: + raise ValueError(f"Object '{name}' already exists") + + self.objects[name] = value + self.types[name] = obj_type + + def update_object(self, name: str, value: NestedList, obj_type: Type) -> None: + """ + Update an existing object + + Args: + name: Object name + value: New nested list value + obj_type: Type of the object + """ + if name not in self.objects: + raise ValueError(f"Object '{name}' does not exist") + + self.objects[name] = value + self.types[name] = obj_type + + def get_object(self, name: str) -> Optional[NestedList]: + """Get object value by name""" + return self.objects.get(name) + + def get_type(self, name: str) -> Optional[Type]: + """Get object type by name""" + return self.types.get(name) + + def delete_object(self, name: str) -> None: + """Delete object from storage""" + if name in self.objects: + del self.objects[name] + if name in self.types: + del self.types[name] + + def list_objects(self) -> list[str]: + """List all object names""" + return list(self.objects.keys()) + + def object_exists(self, name: str) -> bool: + """Check if object exists""" + return name in self.objects + + def clear(self) -> None: + """Clear all objects""" + self.objects.clear() + self.types.clear() diff --git a/tests/test_algebra.py b/tests/test_algebra.py new file mode 100644 index 0000000..708bb61 --- /dev/null +++ b/tests/test_algebra.py @@ -0,0 +1,226 @@ +""" +Tests for Phase 2: Algebra System +""" + +from pysecondo.core.nested_list import atom +from pysecondo.core.types import BaseType +from pysecondo.algebras.standard import StandardAlgebra +from pysecondo.algebras.base import AlgebraManager +import sys +sys.path.insert(0, '.') + + +def test_algebra_registration(): + """Test algebra registration""" + print("Testing Algebra Registration...") + + manager = AlgebraManager() + std_algebra = StandardAlgebra() + + manager.register_algebra("StandardAlgebra", std_algebra) + + # Check algebra is registered + assert "StandardAlgebra" in manager.list_algebras() + + # Check operators are indexed + ops = manager.list_operators() + assert "+" in ops + assert "-" in ops + assert "*" in ops + assert "/" in ops + assert "and" in ops + assert "or" in ops + assert "not" in ops + + print(" ✓ Algebra registration tests passed") + + +def test_arithmetic_operators(): + """Test arithmetic operators""" + print("Testing Arithmetic Operators...") + + manager = AlgebraManager() + manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + # Test addition + add_op = manager.get_operator("+") + + # Type map: int + int = int + result_type = add_op.type_map([BaseType.INT, BaseType.INT]) + assert result_type == BaseType.INT + + # Type map: int + real = real + result_type = add_op.type_map([BaseType.INT, BaseType.REAL]) + assert result_type == BaseType.REAL + + # Value map: 5 + 3 = 8 + result = add_op.value_map([atom(5), atom(3)]) + assert result.value == 8 + + # Test subtraction + sub_op = manager.get_operator("-") + result = sub_op.value_map([atom(10), atom(3)]) + assert result.value == 7 + + # Test multiplication + mul_op = manager.get_operator("*") + result = mul_op.value_map([atom(6), atom(7)]) + assert result.value == 42 + + # Test division + div_op = manager.get_operator("/") + result = div_op.value_map([atom(10), atom(2)]) + assert result.value == 5.0 + + print(" ✓ Arithmetic operators tests passed") + + +def test_comparison_operators(): + """Test comparison operators""" + print("Testing Comparison Operators...") + + manager = AlgebraManager() + manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + # Test less than + lt_op = manager.get_operator("<") + + result_type = lt_op.type_map([BaseType.INT, BaseType.INT]) + assert result_type == BaseType.BOOL + + result = lt_op.value_map([atom(3), atom(5)]) + assert result.value is True + + result = lt_op.value_map([atom(5), atom(3)]) + assert result.value is False + + # Test greater than + gt_op = manager.get_operator(">") + result = gt_op.value_map([atom(10), atom(5)]) + assert result.value is True + + # Test equal + eq_op = manager.get_operator("=") + result = eq_op.value_map([atom(5), atom(5)]) + assert result.value is True + + result = eq_op.value_map([atom(5), atom(3)]) + assert result.value is False + + # Test not equal + ne_op = manager.get_operator("!=") + result = ne_op.value_map([atom(5), atom(3)]) + assert result.value is True + + print(" ✓ Comparison operators tests passed") + + +def test_logical_operators(): + """Test logical operators""" + print("Testing Logical Operators...") + + manager = AlgebraManager() + manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + # Test and + and_op = manager.get_operator("and") + result = and_op.value_map([atom(True), atom(True)]) + assert result.value is True + + result = and_op.value_map([atom(True), atom(False)]) + assert result.value is False + + # Test or + or_op = manager.get_operator("or") + result = or_op.value_map([atom(True), atom(False)]) + assert result.value is True + + result = or_op.value_map([atom(False), atom(False)]) + assert result.value is False + + # Test not + not_op = manager.get_operator("not") + result = not_op.value_map([atom(True)]) + assert result.value is False + + result = not_op.value_map([atom(False)]) + assert result.value is True + + print(" ✓ Logical operators tests passed") + + +def test_type_checking(): + """Test type checking in operators""" + print("Testing Type Checking...") + + manager = AlgebraManager() + manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + add_op = manager.get_operator("+") + + # Valid types + try: + add_op.type_map([BaseType.INT, BaseType.INT]) + except TypeError: + assert False, "Should not raise TypeError for valid types" + + # Invalid types + try: + add_op.type_map([BaseType.STRING, BaseType.INT]) + assert False, "Should raise TypeError for invalid types" + except TypeError as e: + assert "requires" in str(e).lower() + + # Wrong number of arguments + try: + add_op.type_map([BaseType.INT]) + assert False, "Should raise TypeError for wrong argument count" + except TypeError as e: + assert "expects 2" in str(e) + + print(" ✓ Type checking tests passed") + + +def test_complex_expressions(): + """Test complex expressions using multiple operators""" + print("Testing Complex Expressions...") + + manager = AlgebraManager() + manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + # Expression: (5 + 3) * 2 = 16 + add_op = manager.get_operator("+") + mul_op = manager.get_operator("*") + + temp = add_op.value_map([atom(5), atom(3)]) + result = mul_op.value_map([temp, atom(2)]) + assert result.value == 16 + + # Expression: (10 > 5) and (3 < 7) = True + gt_op = manager.get_operator(">") + lt_op = manager.get_operator("<") + and_op = manager.get_operator("and") + + temp1 = gt_op.value_map([atom(10), atom(5)]) + temp2 = lt_op.value_map([atom(3), atom(7)]) + result = and_op.value_map([temp1, temp2]) + assert result.value is True + + print(" ✓ Complex expressions tests passed") + + +if __name__ == "__main__": + print("=" * 50) + print("Phase 2: Algebra System Tests") + print("=" * 50) + + test_algebra_registration() + test_arithmetic_operators() + test_comparison_operators() + test_logical_operators() + test_type_checking() + test_complex_expressions() + + print("\n" + "=" * 50) + print("All Phase 2 tests passed! ✓") + print("=" * 50) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..140af63 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,179 @@ +""" +Tests for Phase 1: Core functionality +""" + +from pysecondo.storage.memory import MemoryStorage +from pysecondo.core.type_system import TypeChecker +from pysecondo.core.types import ( + BaseType, TupleType, RelationType, Attribute, + parse_type, type_to_string +) +from pysecondo.core.nested_list import NestedList, atom, list_nl +import sys +sys.path.insert(0, '.') + + +def test_nested_list(): + """Test nested list creation and operations""" + print("Testing NestedList...") + + # Atomic values + nl_int = atom(42) + nl_str = atom("Beijing") + nl_bool = atom(True) + + assert nl_int.is_atom() + assert nl_int.value == 42 + assert str(nl_int) == "42" + + assert nl_str.is_atom() + assert nl_str.value == "Beijing" + assert str(nl_str) == '"Beijing"' + + # Lists + nl_list = list_nl(1, 2, 3) + assert nl_list.is_list() + assert len(nl_list) == 3 + assert str(nl_list) == "(1 2 3)" + + # Nested structures (tuple) + nl_tuple = list_nl("Beijing", 21540000) + assert str(nl_tuple) == '("Beijing" 21540000)' + + # Nested structures (relation) + nl_rel = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000) + ) + assert str(nl_rel) == '(("Beijing" 21540000) ("Shanghai" 24280000))' + + # to_python conversion + assert nl_int.to_python() == 42 + assert nl_list.to_python() == [1, 2, 3] + assert nl_tuple.to_python() == ["Beijing", 21540000] + + print(" ✓ NestedList tests passed") + + +def test_type_system(): + """Test type system""" + print("Testing Type System...") + + # Parse basic types + int_type = parse_type("int") + assert int_type == BaseType.INT + + string_type = parse_type("string") + assert string_type == BaseType.STRING + + # Parse tuple type + tuple_type_str = "(tuple ((Name string)(Population int)))" + tuple_type = parse_type(tuple_type_str) + assert isinstance(tuple_type, TupleType) + assert len(tuple_type.attributes) == 2 + assert tuple_type.attributes[0].name == "Name" + assert tuple_type.attributes[0].type == BaseType.STRING + + # Parse relation type + rel_type_str = "(rel (tuple ((Name string)(Population int))))" + rel_type = parse_type(rel_type_str) + assert isinstance(rel_type, RelationType) + assert isinstance(rel_type.tuple_type, TupleType) + + # Type to string + assert type_to_string(BaseType.INT) == "int" + assert "Name" in type_to_string(tuple_type) + assert "(rel" in type_to_string(rel_type) + + print(" ✓ Type system tests passed") + + +def test_type_checker(): + """Test type checking""" + print("Testing Type Checker...") + + checker = TypeChecker() + + # Check basic types + assert checker.check(atom(42), BaseType.INT) + assert checker.check(atom("hello"), BaseType.STRING) + assert not checker.check(atom("hello"), BaseType.INT) + + # Check tuple type + city_tuple_type = TupleType([ + Attribute("Name", BaseType.STRING), + Attribute("Population", BaseType.INT) + ]) + + beijing = list_nl("Beijing", 21540000) + assert checker.check(beijing, city_tuple_type) + + wrong_tuple = list_nl(123, 21540000) + assert not checker.check(wrong_tuple, city_tuple_type) + + # Check relation type + cities_rel_type = RelationType(city_tuple_type) + + cities = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000) + ) + assert checker.check(cities, cities_rel_type) + + print(" ✓ Type checker tests passed") + + +def test_storage(): + """Test in-memory storage""" + print("Testing Memory Storage...") + + storage = MemoryStorage() + + # Create object + city_tuple_type = TupleType([ + Attribute("Name", BaseType.STRING), + Attribute("Population", BaseType.INT) + ]) + + beijing = list_nl("Beijing", 21540000) + storage.create_object("beijing", beijing, city_tuple_type) + + # Get object + retrieved = storage.get_object("beijing") + assert retrieved == beijing + + retrieved_type = storage.get_type("beijing") + assert retrieved_type == city_tuple_type + + # Update object + shanghai = list_nl("Shanghai", 24280000) + storage.update_object("beijing", shanghai, city_tuple_type) + assert storage.get_object("beijing") == shanghai + + # List objects + storage.create_object("city2", beijing, city_tuple_type) + objects = storage.list_objects() + assert "beijing" in objects + assert "city2" in objects + assert len(objects) == 2 + + # Delete object + storage.delete_object("city2") + assert len(storage.list_objects()) == 1 + + print(" ✓ Storage tests passed") + + +if __name__ == "__main__": + print("=" * 50) + print("Phase 1: Core Functionality Tests") + print("=" * 50) + + test_nested_list() + test_type_system() + test_type_checker() + test_storage() + + print("\n" + "=" * 50) + print("All Phase 1 tests passed! ✓") + print("=" * 50) diff --git a/tests/test_relation.py b/tests/test_relation.py new file mode 100644 index 0000000..264b9c7 --- /dev/null +++ b/tests/test_relation.py @@ -0,0 +1,311 @@ +""" +Tests for Phase 3: Relation Algebra +""" + +from pysecondo.core.nested_list import atom, list_nl +from pysecondo.core.types import BaseType, TupleType, RelationType, Attribute +from pysecondo.query_processor import QueryProcessor +from pysecondo.storage.memory import MemoryStorage +from pysecondo.algebras.relation import RelationAlgebra +from pysecondo.algebras.standard import StandardAlgebra +from pysecondo.algebras.base import AlgebraManager +import sys +sys.path.insert(0, '.') + + +def test_create_relation(): + """Test creating relations""" + print("Testing Create Relation...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(storage) + ) + + qp = QueryProcessor(algebra_manager, storage) + + # Create a relation + qp.execute_create( + "cities", + "(rel (tuple ((Name string)(Population int))))" + ) + + # Check it exists + assert storage.object_exists("cities") + obj_type = storage.get_type("cities") + assert isinstance(obj_type, RelationType) + + print(" ✓ Create relation tests passed") + + +def test_update_relation(): + """Test updating relations with data""" + print("Testing Update Relation...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(storage) + ) + + qp = QueryProcessor(algebra_manager, storage) + + # Create and populate a relation + qp.execute_create( + "cities", + "(rel (tuple ((Name string)(Population int))))" + ) + + # Insert data + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + ) + + qp.execute_update("cities", cities_data) + + # Verify data + cities = qp.lookup_identifier("cities") + assert len(cities) == 3 + + print(" ✓ Update relation tests passed") + + +def test_feed_consume(): + """Test feed and consume operators""" + print("Testing Feed and Consume...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(storage) + ) + + qp = QueryProcessor(algebra_manager, storage) + + # Create and populate a relation + qp.execute_create( + "cities", + "(rel (tuple ((Name string)(Population int))))" + ) + + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + ) + qp.execute_update("cities", cities_data) + + # Test feed: relation -> stream + cities = qp.lookup_identifier("cities") + cities_type = qp.get_identifier_type("cities") + + feed_op = algebra_manager.get_operator("feed") + stream = feed_op.value_map([cities]) + assert len(stream) == 3 + + # Test consume: stream -> relation + consume_op = algebra_manager.get_operator("consume") + result = consume_op.value_map([stream]) + assert len(result) == 3 + + print(" ✓ Feed and consume tests passed") + + +def test_count(): + """Test count operator""" + print("Testing Count Operator...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(storage) + ) + + qp = QueryProcessor(algebra_manager, storage) + + # Create and populate a relation + qp.execute_create( + "cities", + "(rel (tuple ((Name string)(Population int))))" + ) + + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + list_nl("Shenzhen", 17560000), + ) + qp.execute_update("cities", cities_data) + + # Test count + cities = qp.lookup_identifier("cities") + feed_op = algebra_manager.get_operator("feed") + count_op = algebra_manager.get_operator("count") + + stream = feed_op.value_map([cities]) + count_result = count_op.value_map([stream]) + assert count_result.value == 4 + + print(" ✓ Count operator tests passed") + + +def test_filter(): + """Test filter operator""" + print("Testing Filter Operator...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(storage) + ) + + qp = QueryProcessor(algebra_manager, storage) + + # Create and populate a relation + qp.execute_create( + "cities", + "(rel (tuple ((Name string)(Population int))))" + ) + + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + list_nl("Shenzhen", 17560000), + ) + qp.execute_update("cities", cities_data) + + # Test filter with true (pass all) + cities = qp.lookup_identifier("cities") + feed_op = algebra_manager.get_operator("feed") + filter_op = algebra_manager.get_operator("filter") + + stream = feed_op.value_map([cities]) + filtered = filter_op.value_map([stream, atom(True)]) + assert len(filtered) == 4 + + # Test filter with false (pass none) + filtered = filter_op.value_map([stream, atom(False)]) + assert len(filtered) == 0 + + print(" ✓ Filter operator tests passed") + + +def test_query_pipeline(): + """Test a complete query pipeline""" + print("Testing Query Pipeline...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(storage) + ) + + qp = QueryProcessor(algebra_manager, storage) + + # Setup: create cities relation + qp.execute_create( + "cities", + "(rel (tuple ((Name string)(Population int))))" + ) + + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + list_nl("Shenzhen", 17560000), + list_nl("Hangzhou", 12200000), + ) + qp.execute_update("cities", cities_data) + + # Query: cities feed count + # Equivalent to: SELECT COUNT(*) FROM cities + cities = qp.lookup_identifier("cities") + feed_op = algebra_manager.get_operator("feed") + count_op = algebra_manager.get_operator("count") + + stream = feed_op.value_map([cities]) + count = count_op.value_map([stream]) + assert count.value == 5 + + # Query: cities feed filter[true] consume + # Equivalent to: SELECT * FROM cities + filter_op = algebra_manager.get_operator("filter") + consume_op = algebra_manager.get_operator("consume") + + stream = feed_op.value_map([cities]) + filtered = filter_op.value_map([stream, atom(True)]) + result = consume_op.value_map([filtered]) + assert len(result) == 5 + + print(" ✓ Query pipeline tests passed") + + +def test_type_checking(): + """Test type checking for relation operators""" + print("Testing Type Checking for Relations...") + + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + storage = MemoryStorage() + algebra_manager.register_algebra( + "RelationAlgebra", + RelationAlgebra(storage) + ) + + # Test feed type mapping + feed_op = algebra_manager.get_operator("feed") + tuple_type = TupleType([ + Attribute("Name", BaseType.STRING), + Attribute("Population", BaseType.INT) + ]) + rel_type = RelationType(tuple_type) + + result_type = feed_op.type_map([rel_type]) + assert result_type == tuple_type + + # Test consume type mapping + consume_op = algebra_manager.get_operator("consume") + result_type = consume_op.type_map([tuple_type]) + assert isinstance(result_type, RelationType) + assert result_type.tuple_type == tuple_type + + # Test count type mapping + count_op = algebra_manager.get_operator("count") + result_type = count_op.type_map([tuple_type]) + assert result_type == BaseType.INT + + print(" ✓ Type checking tests passed") + + +if __name__ == "__main__": + print("=" * 50) + print("Phase 3: Relation Algebra Tests") + print("=" * 50) + + test_create_relation() + test_update_relation() + test_feed_consume() + test_count() + test_filter() + test_query_pipeline() + test_type_checking() + + print("\n" + "=" * 50) + print("All Phase 3 tests passed! ✓") + print("=" * 50) diff --git a/tests/test_repl.py b/tests/test_repl.py new file mode 100644 index 0000000..a86f3ee --- /dev/null +++ b/tests/test_repl.py @@ -0,0 +1,220 @@ +""" +Tests for Phase 4: Query Processing & REPL +""" + +from pysecondo.core.nested_list import atom, list_nl +from pysecondo.core.types import BaseType, TupleType, RelationType +from pysecondo.storage.memory import MemoryStorage +from pysecondo.algebras.relation import RelationAlgebra +from pysecondo.algebras.standard import StandardAlgebra +from pysecondo.algebras.base import AlgebraManager +from pysecondo.parser.evaluator import Evaluator +from pysecondo.parser.parser import Parser, parse_query, CreateCommand, UpdateCommand, QueryCommand +import sys +sys.path.insert(0, '.') + + +def test_parser_create(): + """Test parsing CREATE commands""" + print("Testing Parser (CREATE)...") + + parser = Parser() + + # Test create command + cmd = parser.parse( + 'create cities : (rel (tuple ((Name string)(Population int))))') + assert isinstance(cmd, CreateCommand) + assert cmd.name == "cities" + assert "(rel" in cmd.type_str + + print(" ✓ CREATE parsing tests passed") + + +def test_parser_update(): + """Test parsing UPDATE commands""" + print("Testing Parser (UPDATE)...") + + parser = Parser() + + # Test update command + cmd = parser.parse( + 'update cities := (("Beijing" 21540000)("Shanghai" 24280000))') + assert isinstance(cmd, UpdateCommand) + assert cmd.name == "cities" + assert "Beijing" in cmd.value + + print(" ✓ UPDATE parsing tests passed") + + +def test_parser_query(): + """Test parsing QUERY commands""" + print("Testing Parser (QUERY)...") + + parser = Parser() + + # Test query command + cmd = parser.parse('query cities feed count') + assert isinstance(cmd, QueryCommand) + assert cmd.expression == "cities feed count" + + # Test arithmetic query + cmd = parser.parse('query 5 + 3') + assert isinstance(cmd, QueryCommand) + assert cmd.expression == "5 + 3" + + print(" ✓ QUERY parsing tests passed") + + +def test_parser_expressions(): + """Test expression tokenization""" + print("Testing Expression Tokenization...") + + parser = Parser() + + # Simple identifier + tokens = parser.parse_expression("cities") + assert tokens == ["cities"] + + # Operator chain + tokens = parser.parse_expression("cities feed consume") + assert tokens == ["cities", "feed", "consume"] + + # Arithmetic + tokens = parser.parse_expression("5 + 3") + assert tokens == ["5", "+", "3"] + + # Complex expression + tokens = parser.parse_expression("cities feed filter true consume") + assert tokens == ["cities", "feed", "filter", "true", "consume"] + + print(" ✓ Expression tokenization tests passed") + + +def test_evaluator_arithmetic(): + """Test evaluating arithmetic expressions""" + print("Testing Evaluator (Arithmetic)...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + evaluator = Evaluator(algebra_manager, storage) + + # Test: 5 + 3 + tokens = ["5", "+", "3"] + value, value_type = evaluator.evaluate(tokens) + assert value.value == 8 + + # Test: 10 - 4 + tokens = ["10", "-", "4"] + value, value_type = evaluator.evaluate(tokens) + assert value.value == 6 + + # Test: 6 * 7 + tokens = ["6", "*", "7"] + value, value_type = evaluator.evaluate(tokens) + assert value.value == 42 + + print(" ✓ Arithmetic evaluation tests passed") + + +def test_evaluator_identifiers(): + """Test evaluating identifiers""" + print("Testing Evaluator (Identifiers)...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + + evaluator = Evaluator(algebra_manager, storage) + + # Store a value + storage.create_object("x", atom(42), BaseType.INT) + + # Test: x + tokens = ["x"] + value, value_type = evaluator.evaluate(tokens) + assert value.value == 42 + + print(" ✓ Identifier evaluation tests passed") + + +def test_evaluator_relations(): + """Test evaluating relation expressions""" + print("Testing Evaluator (Relations)...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", RelationAlgebra(storage)) + + evaluator = Evaluator(algebra_manager, storage) + + # Create a relation + cities_type = RelationType(TupleType([])) + storage.create_object("cities", list_nl(), cities_type) + + # Test: cities feed + tokens = ["cities", "feed"] + value, value_type = evaluator.evaluate(tokens) + + print(" ✓ Relation evaluation tests passed") + + +def test_end_to_end(): + """Test end-to-end query execution""" + print("Testing End-to-End Queries...") + + storage = MemoryStorage() + algebra_manager = AlgebraManager() + algebra_manager.register_algebra("StandardAlgebra", StandardAlgebra()) + algebra_manager.register_algebra( + "RelationAlgebra", RelationAlgebra(storage)) + + parser = Parser() + evaluator = Evaluator(algebra_manager, storage) + + # Create relation + create_cmd = parser.parse( + 'create cities : (rel (tuple ((Name string)(Population int))))') + assert isinstance(create_cmd, CreateCommand) + + # Insert data (using storage directly for simplicity) + from pysecondo.core.types import parse_type + cities_type = parse_type('(rel (tuple ((Name string)(Population int))))') + cities_data = list_nl( + list_nl("Beijing", 21540000), + list_nl("Shanghai", 24280000), + list_nl("Guangzhou", 14040000), + ) + storage.create_object("cities", cities_data, cities_type) + + # Query: cities feed count + query_cmd = parser.parse('query cities feed count') + assert isinstance(query_cmd, QueryCommand) + + tokens = parser.parse_expression(query_cmd.expression) + value, value_type = evaluator.evaluate(tokens) + assert value.value == 3 + + print(" ✓ End-to-end query tests passed") + + +if __name__ == "__main__": + print("=" * 50) + print("Phase 4: Query Processing & REPL Tests") + print("=" * 50) + + test_parser_create() + test_parser_update() + test_parser_query() + test_parser_expressions() + test_evaluator_arithmetic() + test_evaluator_identifiers() + test_evaluator_relations() + test_end_to_end() + + print("\n" + "=" * 50) + print("All Phase 4 tests passed! ✓") + print("=" * 50)