secondo-py/pysecondo/parser/parser.py

"""
Simple Query Parser for PySECONDO

Parses SECONDO-like query syntax into executable commands.

Supported syntax:
- create name : type
- update name := value
- query name
- query expr1 op expr2
- query name feed consume
- query name feed count
- query name feed filter[expr] consume
"""

import re
from typing import List, Optional, Union, Tuple
from dataclasses import dataclass


@dataclass
class CreateCommand:
    """CREATE name : type"""
    name: str
    type_str: str


@dataclass
class UpdateCommand:
    """UPDATE name := value"""
    name: str
    value: str  # Nested list string representation


@dataclass
class QueryCommand:
    """QUERY expression"""
    expression: str


Command = Union[CreateCommand, UpdateCommand, QueryCommand]


class Parser:
    """
    Simple parser for SECONDO queries

    This is a simplified parser that handles basic SECONDO syntax.
    A full implementation would use a proper lexer and parser.
    """

    def __init__(self):
        # Patterns for different commands
        self.create_pattern = re.compile(
            r'^\s*create\s+(\w+)\s*:\s*\(.+\)\s*$', re.IGNORECASE
        )
        self.update_pattern = re.compile(
            r'^\s*update\s+(\w+)\s*:=\s*(.+)\s*$', re.IGNORECASE
        )
        self.query_pattern = re.compile(
            r'^\s*query\s+(.+)\s*$', re.IGNORECASE
        )

    def parse(self, query: str) -> Optional[Command]:
        """
        Parse a query string into a command

        Args:
            query: Query string

        Returns:
            Command object or None if parsing fails
        """
        # Try create command
        match = self.create_pattern.match(query)
        if match:
            name = match.group(1)
            # Extract type string
            type_start = query.find(':') + 1
            type_str = query[type_start:].strip()
            return CreateCommand(name, type_str)

        # Try update command
        match = self.update_pattern.match(query)
        if match:
            name = match.group(1)
            value = match.group(2).strip()
            return UpdateCommand(name, value)

        # Try query command
        match = self.query_pattern.match(query)
        if match:
            expression = match.group(1).strip()
            return QueryCommand(expression)

        return None

    def parse_expression(self, expr: str) -> List:
        """
        Parse an expression into tokens

        This is a very simple tokenizer that splits on whitespace
        while keeping track of brackets.

        Examples:
            "cities" -> ["cities"]
            "cities feed consume" -> ["cities", "feed", "consume"]
            "5 + 3" -> ["5", "+", "3"]
        """
        tokens = []
        current = []
        paren_depth = 0
        bracket_depth = 0

        for char in expr:
            if char in ' \t\n' and paren_depth == 0 and bracket_depth == 0:
                if current:
                    tokens.append(''.join(current))
                    current = []
            else:
                if char == '(':
                    paren_depth += 1
                elif char == ')':
                    paren_depth -= 1
                elif char == '[':
                    bracket_depth += 1
                elif char == ']':
                    bracket_depth -= 1
                current.append(char)

        if current:
            tokens.append(''.join(current))

        return tokens

    def is_identifier(self, token: str) -> bool:
        """Check if token is an identifier"""
        return bool(re.match(r'^[a-zA-Z_]\w*$', token))

    def is_number(self, token: str) -> bool:
        """Check if token is a number"""
        try:
            float(token)
            return True
        except ValueError:
            return False

    def is_string(self, token: str) -> bool:
        """Check if token is a string literal"""
        return token.startswith('"') and token.endswith('"')

    def is_operator(self, token: str) -> bool:
        """Check if token is an operator"""
        ops = {'+', '-', '*', '/', '<', '>', '=', '!', 'and', 'or', 'not'}
        return token in ops or token in {'<=', '>=', '!=', 'feed', 'consume',
                                         'filter', 'count'}


# Convenience function
def parse_query(query: str) -> Optional[Command]:
    """Parse a query string"""
    parser = Parser()
    return parser.parse(query)