Lexer.php

<?php

namespace Tlf;

class Lexer {

    protected $grammars = [];

    protected $state = null;
    protected $stateStack = [];

    /**
     * Stack of ast objects. Top one is passed to lex functions
     */
    protected array $head = [];
    /**
     * Calling $lexer->previous('docblock') will get the previous docblock
     * $lexer->setPrevious('docblock', value) will set the previous docblock
     * ['nameOfPrevItem'
     */
    protected $previous = ['docblock'=>null];


    public function previous($key){
        return $this->previous[$key] ?? null;
    }
    public function setPrevious($key, $value){
        $old = $this->previous($key);
        $this->previous[$key] = $value;
        return $old;
    }
    public function unsetPrevious($key){
        $prev = $this->previous($key);
        unset($this->previous[$key]);
        return $prev;
    }

    public function getState(){
        return $this->state;
    }
    public function setState($newState){
        $this->state = $newState;
        $this->stateStack[] = $newState;
    }
    public function popState(){
        $oldState = array_pop($this->stateStack);
        $this->state = $this->stateStack[array_key_last($this->stateStack)] ?? null;
        return $oldState;
    }
    public function clearBuffer(){
        $buffer = $this->buffer;
        $this->buffer = '';
        return $buffer;
    }
    public function addGrammar($grammar){
        $this->grammars[] = $grammar;
    }

    public function setHead($ast){
        $this->head[] = $ast;
    }
    public function popHead(){
        return array_pop($this->head);
    }
    public function getHead(){
        return $this->head[count($this->head)-1];
    }

    public function lexStr($str){
        $ast = new \Lexer\Ast('str');
        $ast->set('src', $str);
        // $this->setHead($ast);

        $ast = $this->lexAst($ast,$str);
        return $ast;
    }

    public function lex($file){
        $ast = new Lexer\Ast('file');
        // $ast->set('source', file_get_contents($file));
        $ast->set('ext', pathinfo($file,PATHINFO_EXTENSION));
        $ast->set('name', pathinfo($file,PATHINFO_FILENAME));
        $ast->set('path', $file);
        // $this->setHead($ast);

        $str = file_get_contents($ast->get('path'));
        $ast = $this->lexAst($ast, $str);
        return $ast;
    }

    public function lexAst($ast, $str){
        $this->setHead($ast);
        $token = new Lexer\Token($str);

        foreach ($this->grammars as $grammar){
            $grammar->onLexerStart($this,$ast,$token);
        }
        $prevState = false;
        $regs = [];
        while ($token = $token->next()){
            $lastToken = $token;
            if ($prevState!==$this->state){
                $regs = [];
                foreach ($this->grammars as $grammar){
                    $regs = array_merge($regs,$grammar->getRegexesForState($this->state));
                }
                $prevState = $this->state;
            }
        
            // print_r($regs);
            // exit;
            $successes = [];
            foreach ($regs as $regHandler){
                if (!preg_match($regHandler['regex'], $token->buffer(),$match))continue;
                $successes[] = ['regHandler'=>$regHandler,'match'=>$match];
            }
            if (count($successes)>1){
                print_r($successes);
                throw new \Exception("Multiple regexes matched & we don't have conflict resolution yet.");
            }

            $headAst = array_slice($this->head,-1)[0];
            foreach ($successes as $success){
                $func = $success['regHandler']['callable'];
                $token->setMatch($success['match']);
                $func($this,$headAst,$token);
            }
        }

        foreach ($this->grammars as $grammar){
            $grammar->onLexerEnd($this, $ast, $lastToken);
        }
        return $ast;
    }

}