first commit

This commit is contained in:
2026-01-24 11:30:02 +08:00
commit d99832f66b
26 changed files with 3456 additions and 0 deletions

View File

@@ -0,0 +1,319 @@
"""
Expression Evaluator for PySECONDO
Evaluates query expressions using the algebra system.
Handles identifier lookup and operator execution.
"""
import re
from typing import List, Any, Tuple
from pysecondo.core.types import Type, BaseType
from pysecondo.core.nested_list import NestedList, atom, list_nl
from pysecondo.algebras.base import AlgebraManager
from pysecondo.storage.memory import MemoryStorage
class Evaluator:
"""
Expression evaluator
Evaluates expressions like:
- identifier
- identifier feed consume
- identifier feed count
- 5 + 3
"""
def __init__(
self,
algebra_manager: AlgebraManager,
storage: MemoryStorage
):
self.algebra_manager = algebra_manager
self.storage = storage
def evaluate(self, tokens: List[str]) -> Tuple[NestedList, Type]:
"""
Evaluate a tokenized expression
Args:
tokens: List of tokens from parser
Returns:
Tuple of (value, type)
Raises:
ValueError: If evaluation fails
"""
if not tokens:
raise ValueError("Empty expression")
# Handle single token
if len(tokens) == 1:
return self.evaluate_single(tokens[0])
# Check if first token is a unary operator (prefix notation like "not false")
unary_ops = {'feed', 'consume', 'count', 'filter', 'not'}
if tokens[0] in unary_ops:
op_name = tokens[0]
if len(tokens) < 2:
raise ValueError(
f"Unary operator {op_name} requires an operand")
if op_name == 'filter':
# filter needs a predicate
if len(tokens) < 3:
raise ValueError("filter requires a predicate")
operand_token = tokens[1]
pred_token = tokens[2]
operand_value, operand_type = self.evaluate_single(
operand_token)
pred_value, pred_type = self.evaluate_single(pred_token)
return self.execute_operator(
op_name,
[operand_value, pred_value],
[operand_type, pred_type]
)
else:
# Simple unary operator
operand_token = tokens[1]
operand_value, operand_type = self.evaluate_single(
operand_token)
return self.execute_operator(
op_name,
[operand_value],
[operand_type]
)
# Start with first token (value or identifier)
current_value, current_type = self.evaluate_single(tokens[0])
i = 1
# Process operator chain
while i < len(tokens):
op_name = tokens[i]
operator = self.algebra_manager.get_operator(op_name)
if operator is None:
# Try to evaluate as a single token (might be an identifier)
try:
right_value, right_type = self.evaluate_single(op_name)
current_value, current_type = right_value, right_type
i += 1
continue
except:
raise ValueError(f"Unknown operator: {op_name}")
# Check if operator is unary or binary
# Unary operators: feed, consume, count, filter, not
unary_ops = {'feed', 'consume', 'count', 'filter', 'not'}
binary_ops = {'+', '-', '*', '/', '<', '>', '=', '!=', '<=', '>=',
'and', 'or'}
if op_name in unary_ops:
# Unary operator: apply to current value
if op_name == 'filter':
# filter needs a predicate argument
if i + 1 >= len(tokens):
raise ValueError("filter requires a predicate")
pred_token = tokens[i + 1]
pred_value, pred_type = self.evaluate_single(pred_token)
current_value, current_type = self.execute_operator(
op_name,
[current_value, pred_value],
[current_type, pred_type]
)
i += 2
else:
current_value, current_type = self.execute_operator(
op_name,
[current_value],
[current_type]
)
i += 1
elif op_name in binary_ops:
# Binary operator: need right operand
if i + 1 >= len(tokens):
raise ValueError(
f"Binary operator {op_name} requires right operand")
right_token = tokens[i + 1]
right_value, right_type = self.evaluate_single(right_token)
current_value, current_type = self.execute_operator(
op_name,
[current_value, right_value],
[current_type, right_type]
)
i += 2
else:
# Assume unary
current_value, current_type = self.execute_operator(
op_name,
[current_value],
[current_type]
)
i += 1
return current_value, current_type
def evaluate_single(self, token: str) -> Tuple[NestedList, Type]:
"""
Evaluate a single token
Returns:
Tuple of (value, type)
"""
# Boolean (check before identifier since 'true'/'false' are valid identifiers)
if token.lower() == 'true':
return atom(True), BaseType.BOOL
if token.lower() == 'false':
return atom(False), BaseType.BOOL
# String
if self.is_string(token):
value = atom(token[1:-1]) # Remove quotes
return value, BaseType.STRING
# Number
if self.is_number(token):
return self.parse_number(token)
# Nested list value
if token.startswith('(') and token.endswith(')'):
return self.parse_nested_list(token)
# Identifier (check last since it matches many patterns)
if self.is_identifier(token):
return self.lookup_identifier(token)
raise ValueError(f"Cannot evaluate token: {token}")
def lookup_identifier(self, name: str) -> Tuple[NestedList, Type]:
"""Look up an identifier in storage"""
value = self.storage.get_object(name)
if value is None:
raise ValueError(f"Unknown identifier: {name}")
obj_type = self.storage.get_type(name)
return value, obj_type
def parse_number(self, token: str) -> Tuple[NestedList, Type]:
"""Parse a number token"""
try:
if '.' in token:
value = atom(float(token))
return value, BaseType.REAL
else:
value = atom(int(token))
return value, BaseType.INT
except ValueError:
raise ValueError(f"Invalid number: {token}")
def parse_nested_list(self, token: str) -> Tuple[NestedList, Type]:
"""
Parse a nested list token
This is a simplified parser that handles basic nested lists.
"""
# Remove outer parentheses
inner = token[1:-1].strip()
if not inner:
# Empty list
value = list_nl()
return value, BaseType.INT # Default type
# Try to parse as list of values
# For simplicity, just handle comma-separated values
parts = self.split_list(inner)
values = []
for part in parts:
part = part.strip()
if self.is_number(part):
val, _ = self.parse_number(part)
values.append(val)
elif self.is_string(part):
val, _ = self.evaluate_single(part)
values.append(val)
else:
# Assume it's a nested list
val, _ = self.parse_nested_list(part)
values.append(val)
value = list_nl(*values)
# Type inference would happen here
return value, BaseType.INT
def split_list(self, s: str) -> List[str]:
"""
Split a list string into parts
Handles nested parentheses correctly.
"""
parts = []
current = []
depth = 0
for char in s:
if char == '(':
depth += 1
current.append(char)
elif char == ')':
depth -= 1
current.append(char)
elif char in ' \t' and depth == 0:
if current:
parts.append(''.join(current))
current = []
else:
current.append(char)
if current:
parts.append(''.join(current))
return parts
def execute_operator(
self,
op_name: str,
args: List[NestedList],
arg_types: List[Type]
) -> Tuple[NestedList, Type]:
"""
Execute an operator
Returns:
Tuple of (result_value, result_type)
"""
operator = self.algebra_manager.get_operator(op_name)
if operator is None:
raise ValueError(f"Unknown operator: {op_name}")
# Type check
result_type = operator.type_map(arg_types)
# Execute
if operator.value_map is None:
raise ValueError(f"Operator {op_name} has no value mapping")
result = operator.value_map(args)
return result, result_type
def is_identifier(self, token: str) -> bool:
"""Check if token is an identifier"""
return bool(re.match(r'^[a-zA-Z_]\w*$', token))
def is_number(self, token: str) -> bool:
"""Check if token is a number"""
try:
float(token)
return True
except ValueError:
return False
def is_string(self, token: str) -> bool:
"""Check if token is a string literal"""
return token.startswith('"') and token.endswith('"')