Files
secondo-py/pysecondo/parser/parser.py

164 lines
4.3 KiB
Python
Raw Normal View History

2026-01-24 11:30:02 +08:00
"""
Simple Query Parser for PySECONDO
Parses SECONDO-like query syntax into executable commands.
Supported syntax:
- create name : type
- update name := value
- query name
- query expr1 op expr2
- query name feed consume
- query name feed count
- query name feed filter[expr] consume
"""
import re
from typing import List, Optional, Union, Tuple
from dataclasses import dataclass
@dataclass
class CreateCommand:
"""CREATE name : type"""
name: str
type_str: str
@dataclass
class UpdateCommand:
"""UPDATE name := value"""
name: str
value: str # Nested list string representation
@dataclass
class QueryCommand:
"""QUERY expression"""
expression: str
Command = Union[CreateCommand, UpdateCommand, QueryCommand]
class Parser:
"""
Simple parser for SECONDO queries
This is a simplified parser that handles basic SECONDO syntax.
A full implementation would use a proper lexer and parser.
"""
def __init__(self):
# Patterns for different commands
self.create_pattern = re.compile(
r'^\s*create\s+(\w+)\s*:\s*\(.+\)\s*$', re.IGNORECASE
)
self.update_pattern = re.compile(
r'^\s*update\s+(\w+)\s*:=\s*(.+)\s*$', re.IGNORECASE
)
self.query_pattern = re.compile(
r'^\s*query\s+(.+)\s*$', re.IGNORECASE
)
def parse(self, query: str) -> Optional[Command]:
"""
Parse a query string into a command
Args:
query: Query string
Returns:
Command object or None if parsing fails
"""
# Try create command
match = self.create_pattern.match(query)
if match:
name = match.group(1)
# Extract type string
type_start = query.find(':') + 1
type_str = query[type_start:].strip()
return CreateCommand(name, type_str)
# Try update command
match = self.update_pattern.match(query)
if match:
name = match.group(1)
value = match.group(2).strip()
return UpdateCommand(name, value)
# Try query command
match = self.query_pattern.match(query)
if match:
expression = match.group(1).strip()
return QueryCommand(expression)
return None
def parse_expression(self, expr: str) -> List:
"""
Parse an expression into tokens
This is a very simple tokenizer that splits on whitespace
while keeping track of brackets.
Examples:
"cities" -> ["cities"]
"cities feed consume" -> ["cities", "feed", "consume"]
"5 + 3" -> ["5", "+", "3"]
"""
tokens = []
current = []
paren_depth = 0
bracket_depth = 0
for char in expr:
if char in ' \t\n' and paren_depth == 0 and bracket_depth == 0:
if current:
tokens.append(''.join(current))
current = []
else:
if char == '(':
paren_depth += 1
elif char == ')':
paren_depth -= 1
elif char == '[':
bracket_depth += 1
elif char == ']':
bracket_depth -= 1
current.append(char)
if current:
tokens.append(''.join(current))
return tokens
def is_identifier(self, token: str) -> bool:
"""Check if token is an identifier"""
return bool(re.match(r'^[a-zA-Z_]\w*$', token))
def is_number(self, token: str) -> bool:
"""Check if token is a number"""
try:
float(token)
return True
except ValueError:
return False
def is_string(self, token: str) -> bool:
"""Check if token is a string literal"""
return token.startswith('"') and token.endswith('"')
def is_operator(self, token: str) -> bool:
"""Check if token is an operator"""
ops = {'+', '-', '*', '/', '<', '>', '=', '!', 'and', 'or', 'not'}
return token in ops or token in {'<=', '>=', '!=', 'feed', 'consume',
'filter', 'count'}
# Convenience function
def parse_query(query: str) -> Optional[Command]:
"""Parse a query string"""
parser = Parser()
return parser.parse(query)