Lexer.php
<?php
namespace Tlf;
class Lexer {
protected $grammars = [];
protected $state = null;
protected $stateStack = [];
/**
* Stack of ast objects. Top one is passed to lex functions
*/
protected array $head = [];
/**
* Calling $lexer->previous('docblock') will get the previous docblock
* $lexer->setPrevious('docblock', value) will set the previous docblock
* ['nameOfPrevItem'
*/
protected $previous = ['docblock'=>null];
public function previous($key){
return $this->previous[$key] ?? null;
}
public function setPrevious($key, $value){
$old = $this->previous($key);
$this->previous[$key] = $value;
return $old;
}
public function unsetPrevious($key){
$prev = $this->previous($key);
unset($this->previous[$key]);
return $prev;
}
public function getState(){
return $this->state;
}
public function setState($newState){
$this->state = $newState;
$this->stateStack[] = $newState;
}
public function popState(){
$oldState = array_pop($this->stateStack);
$this->state = $this->stateStack[array_key_last($this->stateStack)] ?? null;
return $oldState;
}
public function clearBuffer(){
$buffer = $this->buffer;
$this->buffer = '';
return $buffer;
}
public function addGrammar($grammar){
$this->grammars[] = $grammar;
}
public function setHead($ast){
$this->head[] = $ast;
}
public function popHead(){
return array_pop($this->head);
}
public function getHead(){
return $this->head[count($this->head)-1];
}
public function lexStr($str){
$ast = new \Lexer\Ast('str');
$ast->set('src', $str);
// $this->setHead($ast);
$ast = $this->lexAst($ast,$str);
return $ast;
}
public function lex($file){
$ast = new Lexer\Ast('file');
// $ast->set('source', file_get_contents($file));
$ast->set('ext', pathinfo($file,PATHINFO_EXTENSION));
$ast->set('name', pathinfo($file,PATHINFO_FILENAME));
$ast->set('path', $file);
// $this->setHead($ast);
$str = file_get_contents($ast->get('path'));
$ast = $this->lexAst($ast, $str);
return $ast;
}
public function lexAst($ast, $str){
$this->setHead($ast);
$token = new Lexer\Token($str);
foreach ($this->grammars as $grammar){
$grammar->onLexerStart($this,$ast,$token);
}
$prevState = false;
$regs = [];
while ($token = $token->next()){
$lastToken = $token;
if ($prevState!==$this->state){
$regs = [];
foreach ($this->grammars as $grammar){
$regs = array_merge($regs,$grammar->getRegexesForState($this->state));
}
$prevState = $this->state;
}
// print_r($regs);
// exit;
$successes = [];
foreach ($regs as $regHandler){
if (!preg_match($regHandler['regex'], $token->buffer(),$match))continue;
$successes[] = ['regHandler'=>$regHandler,'match'=>$match];
}
if (count($successes)>1){
print_r($successes);
throw new \Exception("Multiple regexes matched & we don't have conflict resolution yet.");
}
$headAst = array_slice($this->head,-1)[0];
foreach ($successes as $success){
$func = $success['regHandler']['callable'];
$token->setMatch($success['match']);
$func($this,$headAst,$token);
}
}
foreach ($this->grammars as $grammar){
$grammar->onLexerEnd($this, $ast, $lastToken);
}
return $ast;
}
}