Make sure to keep all the tokens, rework php highlighting into lookup arrays

This commit is contained in:
Timothy Warren 2019-10-30 14:21:10 -04:00
parent fa96e91b30
commit d115985833
4 changed files with 196 additions and 135 deletions

View File

@ -122,16 +122,10 @@ class Editor {
) { ) {
$this->syntax = $syntax; $this->syntax = $syntax;
// Pre-tokenize the file
if ($this->syntax->filetype === 'PHP')
{
$this->syntax->tokens = get_php_tokens($this->filename);
}
// Update the syntax highlighting for all the rows of the file // Update the syntax highlighting for all the rows of the file
for ($i = 0; $i < $this->numRows; $i++) for ($i = 0; $i < $this->numRows; $i++)
{ {
$this->rows[$i]->update(); $this->rows[$i]->updateSyntax();
} }
return; return;
@ -342,6 +336,12 @@ class Editor {
$this->selectSyntaxHighlight(); $this->selectSyntaxHighlight();
// Pre-tokenize the file
if ($this->syntax->filetype === 'PHP')
{
$this->syntax->tokens = get_php_tokens(file_get_contents($this->filename));
}
// #TODO gracefully handle issues with loading a file // #TODO gracefully handle issues with loading a file
$handle = fopen($filename, 'rb'); $handle = fopen($filename, 'rb');
if ($handle === FALSE) if ($handle === FALSE)

View File

@ -12,6 +12,7 @@ class Highlight {
public const NUMBER = 6; public const NUMBER = 6;
public const OPERATOR = 7; public const OPERATOR = 7;
public const VARIABLE = 8; public const VARIABLE = 8;
public const INVALID = 9; public const DELIMITER = 9;
public const MATCH = 10; public const INVALID = 10;
public const MATCH = 11;
} }

View File

@ -20,6 +20,146 @@ class Row {
private Editor $parent; private Editor $parent;
private bool $hlOpenComment = FALSE; private bool $hlOpenComment = FALSE;
private array $phpTokenHighlightMap = [
// Delimiters
T_CLOSE_TAG => Highlight::DELIMITER,
T_ARRAY => Highlight::DELIMITER,
T_CURLY_OPEN => Highlight::DELIMITER,
T_DOLLAR_OPEN_CURLY_BRACES => Highlight::DELIMITER,
T_OPEN_TAG => Highlight::DELIMITER,
T_OPEN_TAG_WITH_ECHO => Highlight::DELIMITER,
// Number literals
T_DNUMBER => Highlight::NUMBER,
T_LNUMBER => Highlight::NUMBER,
// Simple string literals
T_CONSTANT_ENCAPSED_STRING => Highlight::STRING,
// Simple variables
T_VARIABLE => Highlight::VARIABLE,
// Operators
T_AND_EQUAL => Highlight::OPERATOR,
T_BOOLEAN_AND => Highlight::OPERATOR,
T_BOOLEAN_OR => Highlight::OPERATOR,
T_COALESCE => Highlight::OPERATOR,
T_CONCAT_EQUAL => Highlight::OPERATOR,
T_DEC => Highlight::OPERATOR,
T_DIV_EQUAL => Highlight::OPERATOR,
T_DOUBLE_ARROW => Highlight::OPERATOR,
T_DOUBLE_COLON => Highlight::OPERATOR,
T_ELLIPSIS => Highlight::OPERATOR,
T_INC => Highlight::OPERATOR,
T_IS_EQUAL => Highlight::OPERATOR,
T_IS_GREATER_OR_EQUAL => Highlight::OPERATOR,
T_IS_IDENTICAL => Highlight::OPERATOR,
T_IS_NOT_EQUAL => Highlight::OPERATOR,
T_IS_NOT_IDENTICAL => Highlight::OPERATOR,
T_IS_SMALLER_OR_EQUAL => Highlight::OPERATOR,
T_SPACESHIP => Highlight::OPERATOR,
T_LOGICAL_AND => Highlight::OPERATOR,
T_LOGICAL_OR => Highlight::OPERATOR,
T_LOGICAL_XOR => Highlight::OPERATOR,
T_MINUS_EQUAL => Highlight::OPERATOR,
T_MOD_EQUAL => Highlight::OPERATOR,
T_MUL_EQUAL => Highlight::OPERATOR,
T_NS_SEPARATOR => Highlight::OPERATOR,
T_OBJECT_OPERATOR => Highlight::OPERATOR,
T_OR_EQUAL => Highlight::OPERATOR,
T_PLUS_EQUAL => Highlight::OPERATOR,
T_POW => Highlight::OPERATOR,
T_POW_EQUAL => Highlight::OPERATOR,
T_SL => Highlight::OPERATOR,
T_SL_EQUAL => Highlight::OPERATOR,
T_SR => Highlight::OPERATOR,
T_SR_EQUAL => Highlight::OPERATOR,
T_XOR_EQUAL => Highlight::OPERATOR,
// Keywords1
T_ABSTRACT => Highlight::KEYWORD1,
T_AS => Highlight::KEYWORD1,
T_BREAK => Highlight::KEYWORD1,
T_CASE => Highlight::KEYWORD1,
T_CATCH => Highlight::KEYWORD1,
T_CLASS => Highlight::KEYWORD1,
T_CLONE => Highlight::KEYWORD1,
T_CONST => Highlight::KEYWORD1,
T_CONTINUE => Highlight::KEYWORD1,
T_DECLARE => Highlight::KEYWORD1,
T_DEFAULT => Highlight::KEYWORD1,
T_DO => Highlight::KEYWORD1,
T_ELSE => Highlight::KEYWORD1,
T_ELSEIF => Highlight::KEYWORD1,
T_ENDDECLARE => Highlight::KEYWORD1,
T_ENDFOR => Highlight::KEYWORD1,
T_ENDFOREACH => Highlight::KEYWORD1,
T_ENDIF => Highlight::KEYWORD1,
T_ENDSWITCH => Highlight::KEYWORD1,
T_ENDWHILE => Highlight::KEYWORD1,
T_EXTENDS => Highlight::KEYWORD1,
T_FINAL => Highlight::KEYWORD1,
T_FINALLY => Highlight::KEYWORD1,
T_FOR => Highlight::KEYWORD1,
T_FOREACH => Highlight::KEYWORD1,
T_FUNCTION => Highlight::KEYWORD1,
T_GLOBAL => Highlight::KEYWORD1,
T_GOTO => Highlight::KEYWORD1,
T_HALT_COMPILER => Highlight::KEYWORD1,
T_IF => Highlight::KEYWORD1,
T_IMPLEMENTS => Highlight::KEYWORD1,
T_INSTANCEOF => Highlight::KEYWORD1,
T_INSTEADOF => Highlight::KEYWORD1,
T_INTERFACE => Highlight::KEYWORD1,
T_NAMESPACE => Highlight::KEYWORD1,
T_NEW => Highlight::KEYWORD1,
T_PRIVATE => Highlight::KEYWORD1,
T_PUBLIC => Highlight::KEYWORD1,
T_PROTECTED => Highlight::KEYWORD1,
T_RETURN => Highlight::KEYWORD1,
T_STATIC => Highlight::KEYWORD1,
T_SWITCH => Highlight::KEYWORD1,
T_THROW => Highlight::KEYWORD1,
T_TRAIT => Highlight::KEYWORD1,
T_TRY => Highlight::KEYWORD1,
T_USE => Highlight::KEYWORD1,
T_VAR => Highlight::KEYWORD1,
T_WHILE => Highlight::KEYWORD1,
T_YIELD => Highlight::KEYWORD1,
T_YIELD_FROM => Highlight::KEYWORD1,
// Not string literals, but identifiers, keywords, etc.
// T_STRING => Highlight::KEYWORD2,
];
private array $phpCharacterHighlightMap = [
// Delimiter characters
'[' => Highlight::DELIMITER,
']' => Highlight::DELIMITER,
'{' => Highlight::DELIMITER,
'}' => Highlight::DELIMITER,
'(' => Highlight::DELIMITER,
')' => Highlight::DELIMITER,
// Single character operators
',' => Highlight::OPERATOR,
';' => Highlight::OPERATOR,
':' => Highlight::OPERATOR,
'^' => Highlight::OPERATOR,
'%' => Highlight::OPERATOR,
'+' => Highlight::OPERATOR,
'-' => Highlight::OPERATOR,
'*' => Highlight::OPERATOR,
'/' => Highlight::OPERATOR,
'.' => Highlight::OPERATOR,
'|' => Highlight::OPERATOR,
'~' => Highlight::OPERATOR,
'>' => Highlight::OPERATOR,
'<' => Highlight::OPERATOR,
'=' => Highlight::OPERATOR,
'!' => Highlight::OPERATOR,
];
public static function new(Editor $parent, string $chars, int $idx): self public static function new(Editor $parent, string $chars, int $idx): self
{ {
$self = new self(); $self = new self();
@ -114,7 +254,7 @@ class Row {
// ! Syntax Highlighting // ! Syntax Highlighting
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
protected function updateSyntax(): void public function updateSyntax(): void
{ {
$this->hl = array_fill(0, $this->rsize, Highlight::NORMAL); $this->hl = array_fill(0, $this->rsize, Highlight::NORMAL);
@ -277,14 +417,13 @@ class Row {
$this->hlOpenComment = $inComment; $this->hlOpenComment = $inComment;
if ($changed && $this->idx + 1 < $this->parent->numRows) if ($changed && $this->idx + 1 < $this->parent->numRows)
{ {
$this->parent->rows[$this->idx + 1]->update(); $this->parent->rows[$this->idx + 1]->updateSyntax();
} }
} }
protected function updateSyntaxPHP():void protected function updateSyntaxPHP():void
{ {
$tokens = $this->parent->syntax->tokens[$this->idx + 1]; $tokens = $this->parent->syntax->tokens[$this->idx + 1];
$inComment = ($this->idx > 0 && $this->parent->rows[$this->idx - 1]->hlOpenComment);
// The line is probably just empty // The line is probably just empty
if ($tokens === NULL) if ($tokens === NULL)
@ -292,131 +431,46 @@ class Row {
return; return;
} }
// $inComment = ($this->idx > 0 && $this->parent->rows[$this->idx - 1]->hlOpenComment);
// Keep track of where you are in the line, so that // Keep track of where you are in the line, so that
// multiples of the same tokens can be effectively matched // multiples of the same tokens can be effectively matched
$offset = 0; $offset = 0;
foreach ($tokens as $token) foreach ($tokens as $token)
{ {
if ($offset >= $this->rsize)
{
break;
}
$char = $token['char']; $char = $token['char'];
$charLen = strlen($char); $charLen = strlen($char);
$charStart = strpos($this->render, $char, $offset); $charStart = strpos($this->render, $char, $offset);
$charEnd = $charStart + $charLen; if ($charStart === FALSE)
if ($token['typeName'] === 'RAW')
{ {
switch($token['char']) $offset++;
{ continue;
case '^':
case '%':
case '+':
case '-':
case '*':
case '/':
case '.':
case '|':
case '~':
case '>':
case '<':
case '=':
case '!':
array_replace_range($this->hl, $charStart, $charLen, Highlight::OPERATOR);
$offset = $charEnd;
continue 2;
}
} }
switch ($token['type']) $charEnd = $charStart + $charLen;
// Highlight raw characters
if (($token['typeName'] === 'RAW') && array_key_exists($token['char'], $this->phpCharacterHighlightMap))
{ {
// Number literals $hl = $this->phpCharacterHighlightMap[$token['char']];
case T_LNUMBER: array_replace_range($this->hl, $charStart, $charLen, $hl);
array_replace_range($this->hl, $charStart, $charLen, Highlight::NUMBER); $offset = $charEnd;
$offset = $charEnd; continue;
continue 2; }
// Simple string literals // Highlight specific tokens
case T_CONSTANT_ENCAPSED_STRING: if (array_key_exists($token['type'], $this->phpTokenHighlightMap))
array_replace_range($this->hl, $charStart, $charLen, Highlight::STRING); {
$offset = $charEnd; $hl = $this->phpTokenHighlightMap[$token['type']];
continue 2; array_replace_range($this->hl, $charStart, $charLen, $hl);
$offset = $charEnd;
// Operators continue;
case T_AND_EQUAL:
case T_BOOLEAN_AND:
case T_BOOLEAN_OR:
case T_COALESCE:
case T_CONCAT_EQUAL:
case T_DEC:
case T_DIV_EQUAL:
case T_DOUBLE_ARROW:
case T_DOUBLE_COLON:
case T_ELLIPSIS:
case T_INC:
case T_IS_EQUAL:
case T_IS_GREATER_OR_EQUAL:
case T_IS_IDENTICAL:
case T_IS_NOT_EQUAL:
case T_IS_NOT_IDENTICAL:
case T_IS_SMALLER_OR_EQUAL:
case T_SPACESHIP:
case T_LOGICAL_AND:
case T_LOGICAL_OR:
case T_LOGICAL_XOR:
case T_MINUS_EQUAL:
case T_MOD_EQUAL:
case T_MUL_EQUAL:
case T_NS_SEPARATOR:
case T_OBJECT_OPERATOR:
case T_OR_EQUAL:
case T_PAAMAYIM_NEKUDOTAYIM:
case T_PLUS_EQUAL:
case T_POW:
case T_POW_EQUAL:
case T_SL:
case T_SL_EQUAL:
case T_SR:
case T_SR_EQUAL:
case T_XOR_EQUAL:
array_replace_range($this->hl, $charStart, $charLen, Highlight::OPERATOR);
$offset = $charEnd;
continue 2;
// Simple variables
case T_VARIABLE:
array_replace_range($this->hl, $charStart, $charLen, Highlight::VARIABLE);
$offset = $charEnd;
continue 2;
case T_COMMENT:
case T_DOC_COMMENT:
// TODO
break;
// Not string literals, but identifiers, keywords, etc.
case T_STRING:
if (in_array($char, $this->parent->syntax->keywords2, TRUE))
{
array_replace_range($this->hl, $charStart, $charLen, Highlight::KEYWORD2);
$offset = $charEnd;
continue 2;
}
break;
// Keywords1
case T_ABSTRACT:
case T_AS:
case T_BREAK:
case T_CASE:
case T_CATCH:
case T_CLASS:
case T_DO:
array_replace_range($this->hl, $charStart, $charLen, Highlight::KEYWORD1);
// $keyword = $this->getKeywordFromToken($token['type']);
$offset = $charEnd;
continue 2;
break;
// Keywords 2
} }
} }
} }

View File

@ -269,6 +269,12 @@ function read_stdout(int $len = 128): string
*/ */
function array_replace_range(array &$array, int $offset, int $length, $value):void function array_replace_range(array &$array, int $offset, int $length, $value):void
{ {
if ($length === 1)
{
$array[$offset] = $value;
return;
}
$replacement = array_fill(0, $length, $value); $replacement = array_fill(0, $length, $value);
array_splice($array, $offset, $length, $replacement); array_splice($array, $offset, $length, $replacement);
@ -296,6 +302,7 @@ function syntax_to_color(int $hl): int
Highlight::NUMBER => 31, // Foreground Red Highlight::NUMBER => 31, // Foreground Red
Highlight::OPERATOR => 92, // Foreground Bright Green Highlight::OPERATOR => 92, // Foreground Bright Green
Highlight::VARIABLE => 96, // Foreground Bright Cyan Highlight::VARIABLE => 96, // Foreground Bright Cyan
Highlight::DELIMITER => 34, // Foreground Blue
Highlight::INVALID => 101, // Background Bright Red Highlight::INVALID => 101, // Background Bright Red
Highlight::MATCH => 7, // Reverse! Highlight::MATCH => 7, // Reverse!
]; ];
@ -309,18 +316,18 @@ function syntax_to_color(int $hl): int
* Use 'token_get_all' to get the tokens for a file, * Use 'token_get_all' to get the tokens for a file,
* organized by row number * organized by row number
* *
* @param string $filename * @param string $code
* @return array * @return array
*/ */
function get_php_tokens(string $filename): array function get_php_tokens(string $code): array
{ {
$raw_tokens = token_get_all(file_get_contents($filename), TOKEN_PARSE); $raw_tokens = token_get_all($code);
$tokens = []; $tokens = [];
$lineNum = 1; $lineNum = 1;
$line = []; $line = [];
foreach($raw_tokens as $token) foreach($raw_tokens as $token)
{ {
// Simple characters, usually delimiters // Simple characters, usually delimiters or single character operators
if ( ! is_array($token)) if ( ! is_array($token))
{ {
$line[] = [ $line[] = [
@ -336,21 +343,20 @@ function get_php_tokens(string $filename): array
'type' => $type, 'type' => $type,
'typeName' => token_name($type), 'typeName' => token_name($type),
'char' => $char, 'char' => $char,
'line' => $currentLine, // 'line' => $currentLine,
]; ];
if ($current['line'] !== $lineNum) if ($currentLine !== $lineNum)
{ {
$tokens[$lineNum] = $line; $tokens[$lineNum] = $line;
$lineNum = $current['line']; $lineNum = $currentLine;
$line = []; $line = [];
$line[] = $current;
}
else
{
$line[] = $current;
} }
$line[] = $current;
} }
$tokens[$lineNum] = $line;
return $tokens; return $tokens;
} }