164 lines
4.3 KiB
Python
164 lines
4.3 KiB
Python
"""
|
|
Simple Query Parser for PySECONDO
|
|
|
|
Parses SECONDO-like query syntax into executable commands.
|
|
|
|
Supported syntax:
|
|
- create name : type
|
|
- update name := value
|
|
- query name
|
|
- query expr1 op expr2
|
|
- query name feed consume
|
|
- query name feed count
|
|
- query name feed filter[expr] consume
|
|
"""
|
|
|
|
import re
|
|
from typing import List, Optional, Union, Tuple
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass
|
|
class CreateCommand:
|
|
"""CREATE name : type"""
|
|
name: str
|
|
type_str: str
|
|
|
|
|
|
@dataclass
|
|
class UpdateCommand:
|
|
"""UPDATE name := value"""
|
|
name: str
|
|
value: str # Nested list string representation
|
|
|
|
|
|
@dataclass
|
|
class QueryCommand:
|
|
"""QUERY expression"""
|
|
expression: str
|
|
|
|
|
|
Command = Union[CreateCommand, UpdateCommand, QueryCommand]
|
|
|
|
|
|
class Parser:
|
|
"""
|
|
Simple parser for SECONDO queries
|
|
|
|
This is a simplified parser that handles basic SECONDO syntax.
|
|
A full implementation would use a proper lexer and parser.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Patterns for different commands
|
|
self.create_pattern = re.compile(
|
|
r'^\s*create\s+(\w+)\s*:\s*\(.+\)\s*$', re.IGNORECASE
|
|
)
|
|
self.update_pattern = re.compile(
|
|
r'^\s*update\s+(\w+)\s*:=\s*(.+)\s*$', re.IGNORECASE
|
|
)
|
|
self.query_pattern = re.compile(
|
|
r'^\s*query\s+(.+)\s*$', re.IGNORECASE
|
|
)
|
|
|
|
def parse(self, query: str) -> Optional[Command]:
|
|
"""
|
|
Parse a query string into a command
|
|
|
|
Args:
|
|
query: Query string
|
|
|
|
Returns:
|
|
Command object or None if parsing fails
|
|
"""
|
|
# Try create command
|
|
match = self.create_pattern.match(query)
|
|
if match:
|
|
name = match.group(1)
|
|
# Extract type string
|
|
type_start = query.find(':') + 1
|
|
type_str = query[type_start:].strip()
|
|
return CreateCommand(name, type_str)
|
|
|
|
# Try update command
|
|
match = self.update_pattern.match(query)
|
|
if match:
|
|
name = match.group(1)
|
|
value = match.group(2).strip()
|
|
return UpdateCommand(name, value)
|
|
|
|
# Try query command
|
|
match = self.query_pattern.match(query)
|
|
if match:
|
|
expression = match.group(1).strip()
|
|
return QueryCommand(expression)
|
|
|
|
return None
|
|
|
|
def parse_expression(self, expr: str) -> List:
|
|
"""
|
|
Parse an expression into tokens
|
|
|
|
This is a very simple tokenizer that splits on whitespace
|
|
while keeping track of brackets.
|
|
|
|
Examples:
|
|
"cities" -> ["cities"]
|
|
"cities feed consume" -> ["cities", "feed", "consume"]
|
|
"5 + 3" -> ["5", "+", "3"]
|
|
"""
|
|
tokens = []
|
|
current = []
|
|
paren_depth = 0
|
|
bracket_depth = 0
|
|
|
|
for char in expr:
|
|
if char in ' \t\n' and paren_depth == 0 and bracket_depth == 0:
|
|
if current:
|
|
tokens.append(''.join(current))
|
|
current = []
|
|
else:
|
|
if char == '(':
|
|
paren_depth += 1
|
|
elif char == ')':
|
|
paren_depth -= 1
|
|
elif char == '[':
|
|
bracket_depth += 1
|
|
elif char == ']':
|
|
bracket_depth -= 1
|
|
current.append(char)
|
|
|
|
if current:
|
|
tokens.append(''.join(current))
|
|
|
|
return tokens
|
|
|
|
def is_identifier(self, token: str) -> bool:
|
|
"""Check if token is an identifier"""
|
|
return bool(re.match(r'^[a-zA-Z_]\w*$', token))
|
|
|
|
def is_number(self, token: str) -> bool:
|
|
"""Check if token is a number"""
|
|
try:
|
|
float(token)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
def is_string(self, token: str) -> bool:
|
|
"""Check if token is a string literal"""
|
|
return token.startswith('"') and token.endswith('"')
|
|
|
|
def is_operator(self, token: str) -> bool:
|
|
"""Check if token is an operator"""
|
|
ops = {'+', '-', '*', '/', '<', '>', '=', '!', 'and', 'or', 'not'}
|
|
return token in ops or token in {'<=', '>=', '!=', 'feed', 'consume',
|
|
'filter', 'count'}
|
|
|
|
|
|
# Convenience function
|
|
def parse_query(query: str) -> Optional[Command]:
|
|
"""Parse a query string"""
|
|
parser = Parser()
|
|
return parser.parse(query)
|