secondo-py/pysecondo/parser/parser.py

"""
Simple Query Parser for PySECONDO

Parses SECONDO-like query syntax into executable commands.

Supported syntax:
- create name : type
- update name := value
- query name
- query expr1 op expr2
- query name feed consume
- query name feed count
- query name feed filter[expr] consume
"""

import re
from typing import List, Optional, Union, Tuple
from dataclasses import dataclass


@dataclass
class CreateCommand:
    """CREATE name : type"""
    name: str
    type_str: str


@dataclass
class UpdateCommand:
    """UPDATE name := value"""
    name: str
    value: str  # Nested list string representation


@dataclass
class QueryCommand:
    """QUERY expression"""
    expression: str


Command = Union[CreateCommand, UpdateCommand, QueryCommand]


class Parser:
    """
    Simple parser for SECONDO queries

    This is a simplified parser that handles basic SECONDO syntax.
    A full implementation would use a proper lexer and parser.
    """

    def __init__(self):
        # Patterns for different commands
        self.create_pattern = re.compile(
            r'^\s*create\s+(\w+)\s*:\s*\(.+\)\s*$', re.IGNORECASE
        )
        self.update_pattern = re.compile(
            r'^\s*update\s+(\w+)\s*:=\s*(.+)\s*$', re.IGNORECASE
        )
        self.query_pattern = re.compile(
            r'^\s*query\s+(.+)\s*$', re.IGNORECASE
        )

    def parse(self, query: str) -> Optional[Command]:
        """
        Parse a query string into a command

        Args:
            query: Query string

        Returns:
            Command object or None if parsing fails
        """
        # Try create command
        match = self.create_pattern.match(query)
        if match:
            name = match.group(1)
            # Extract type string
            type_start = query.find(':') + 1
            type_str = query[type_start:].strip()
            return CreateCommand(name, type_str)

        # Try update command
        match = self.update_pattern.match(query)
        if match:
            name = match.group(1)
            value = match.group(2).strip()
            return UpdateCommand(name, value)

        # Try query command
        match = self.query_pattern.match(query)
        if match:
            expression = match.group(1).strip()
            return QueryCommand(expression)

        return None

    def parse_expression(self, expr: str) -> List:
        """
        Parse an expression into tokens

        This is a very simple tokenizer that splits on whitespace
        while keeping track of brackets.

        Examples:
            "cities" -> ["cities"]
            "cities feed consume" -> ["cities", "feed", "consume"]
            "5 + 3" -> ["5", "+", "3"]
        """
        tokens = []
        current = []
        paren_depth = 0
        bracket_depth = 0

        for char in expr:
            if char in ' \t\n' and paren_depth == 0 and bracket_depth == 0:
                if current:
                    tokens.append(''.join(current))
                    current = []
            else:
                if char == '(':
                    paren_depth += 1
                elif char == ')':
                    paren_depth -= 1
                elif char == '[':
                    bracket_depth += 1
                elif char == ']':
                    bracket_depth -= 1
                current.append(char)

        if current:
            tokens.append(''.join(current))

        return tokens

    def is_identifier(self, token: str) -> bool:
        """Check if token is an identifier"""
        return bool(re.match(r'^[a-zA-Z_]\w*$', token))

    def is_number(self, token: str) -> bool:
        """Check if token is a number"""
        try:
            float(token)
            return True
        except ValueError:
            return False

    def is_string(self, token: str) -> bool:
        """Check if token is a string literal"""
        return token.startswith('"') and token.endswith('"')

    def is_operator(self, token: str) -> bool:
        """Check if token is an operator"""
        ops = {'+', '-', '*', '/', '<', '>', '=', '!', 'and', 'or', 'not'}
        return token in ops or token in {'<=', '>=', '!=', 'feed', 'consume',
                                         'filter', 'count'}


# Convenience function
def parse_query(query: str) -> Optional[Command]:
    """Parse a query string"""
    parser = Parser()
    return parser.parse(query)
first commit 2026-01-24 11:30:02 +08:00			`"""`
			`Simple Query Parser for PySECONDO`

			`Parses SECONDO-like query syntax into executable commands.`

			`Supported syntax:`
			`- create name : type`
			`- update name := value`
			`- query name`
			`- query expr1 op expr2`
			`- query name feed consume`
			`- query name feed count`
			`- query name feed filter[expr] consume`
			`"""`

			`import re`
			`from typing import List, Optional, Union, Tuple`
			`from dataclasses import dataclass`


			`@dataclass`
			`class CreateCommand:`
			`"""CREATE name : type"""`
			`name: str`
			`type_str: str`


			`@dataclass`
			`class UpdateCommand:`
			`"""UPDATE name := value"""`
			`name: str`
			`value: str # Nested list string representation`


			`@dataclass`
			`class QueryCommand:`
			`"""QUERY expression"""`
			`expression: str`


			`Command = Union[CreateCommand, UpdateCommand, QueryCommand]`


			`class Parser:`
			`"""`
			`Simple parser for SECONDO queries`

			`This is a simplified parser that handles basic SECONDO syntax.`
			`A full implementation would use a proper lexer and parser.`
			`"""`

			`def __init__(self):`
			`# Patterns for different commands`
			`self.create_pattern = re.compile(`
			`r'^\screate\s+(\w+)\s:\s\(.+\)\s$', re.IGNORECASE`
			`)`
			`self.update_pattern = re.compile(`
			`r'^\supdate\s+(\w+)\s:=\s(.+)\s$', re.IGNORECASE`
			`)`
			`self.query_pattern = re.compile(`
			`r'^\squery\s+(.+)\s$', re.IGNORECASE`
			`)`

			`def parse(self, query: str) -> Optional[Command]:`
			`"""`
			`Parse a query string into a command`

			`Args:`
			`query: Query string`

			`Returns:`
			`Command object or None if parsing fails`
			`"""`
			`# Try create command`
			`match = self.create_pattern.match(query)`
			`if match:`
			`name = match.group(1)`
			`# Extract type string`
			`type_start = query.find(':') + 1`
			`type_str = query[type_start:].strip()`
			`return CreateCommand(name, type_str)`

			`# Try update command`
			`match = self.update_pattern.match(query)`
			`if match:`
			`name = match.group(1)`
			`value = match.group(2).strip()`
			`return UpdateCommand(name, value)`

			`# Try query command`
			`match = self.query_pattern.match(query)`
			`if match:`
			`expression = match.group(1).strip()`
			`return QueryCommand(expression)`

			`return None`

			`def parse_expression(self, expr: str) -> List:`
			`"""`
			`Parse an expression into tokens`

			`This is a very simple tokenizer that splits on whitespace`
			`while keeping track of brackets.`

			`Examples:`
			`"cities" -> ["cities"]`
			`"cities feed consume" -> ["cities", "feed", "consume"]`
			`"5 + 3" -> ["5", "+", "3"]`
			`"""`
			`tokens = []`
			`current = []`
			`paren_depth = 0`
			`bracket_depth = 0`

			`for char in expr:`
			`if char in ' \t\n' and paren_depth == 0 and bracket_depth == 0:`
			`if current:`
			`tokens.append(''.join(current))`
			`current = []`
			`else:`
			`if char == '(':`
			`paren_depth += 1`
			`elif char == ')':`
			`paren_depth -= 1`
			`elif char == '[':`
			`bracket_depth += 1`
			`elif char == ']':`
			`bracket_depth -= 1`
			`current.append(char)`

			`if current:`
			`tokens.append(''.join(current))`

			`return tokens`

			`def is_identifier(self, token: str) -> bool:`
			`"""Check if token is an identifier"""`
			`return bool(re.match(r'^[a-zA-Z_]\w*$', token))`

			`def is_number(self, token: str) -> bool:`
			`"""Check if token is a number"""`
			`try:`
			`float(token)`
			`return True`
			`except ValueError:`
			`return False`

			`def is_string(self, token: str) -> bool:`
			`"""Check if token is a string literal"""`
			`return token.startswith('"') and token.endswith('"')`

			`def is_operator(self, token: str) -> bool:`
			`"""Check if token is an operator"""`
			`ops = {'+', '-', '*', '/', '<', '>', '=', '!', 'and', 'or', 'not'}`
			`return token in ops or token in {'<=', '>=', '!=', 'feed', 'consume',`
			`'filter', 'count'}`


			`# Convenience function`
			`def parse_query(query: str) -> Optional[Command]:`
			`"""Parse a query string"""`
			`parser = Parser()`
			`return parser.parse(query)`