clp

command line syntax highlighter
git clone git://jeskin.net/clp.git
README | Log | Files | Refs | LICENSE

commit a584733996430fa8396bb332484a05903e4d4c29
parent db23bd8ff8a47b0cd0691f00e44a013afe6f5586
Author: Jon Eskin <eskinjp@gmail.com>
Date:   Mon, 18 Jul 2022 11:04:01 -0400

update lexers from scintillua

Diffstat:
Mlua/ftdetect.lua | 35++++++++++++++++++++++++-----------
Tlua/lexer.lua | 0
Mlua/lexers/actionscript.lua | 98+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/ada.lua | 82+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/ansi_c.lua | 213+++++++++++++++++++++++++++++--------------------------------------------------
Mlua/lexers/antlr.lua | 90++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/apdl.lua | 146+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/apl.lua | 62++++++++++++++++++++++++--------------------------------------
Mlua/lexers/applescript.lua | 97++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/asm.lua | 709++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/asp.lua | 45+++++++++++++++++----------------------------
Mlua/lexers/autoit.lua | 243+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/awk.lua | 174++++++++++++++++++++++++++++---------------------------------------------------
Mlua/lexers/bash.lua | 104+++++++++++++++++++++++++++++++-------------------------------------------------
Mlua/lexers/batch.lua | 88++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/bibtex.lua | 74+++++++++++++++++++++++++++++++-------------------------------------------
Mlua/lexers/boo.lua | 103+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/caml.lua | 114+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/chuck.lua | 122++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/clojure.lua | 312++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/cmake.lua | 261+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/coffeescript.lua | 83+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/container.lua | 6++----
Mlua/lexers/context.lua | 76+++++++++++++++++++++++++++++++++++-----------------------------------------
Mlua/lexers/cpp.lua | 133+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/crystal.lua | 185+++++++++++++++++++++++++++++++------------------------------------------------
Mlua/lexers/csharp.lua | 114+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/css.lua | 304++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mlua/lexers/cuda.lua | 125++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/dart.lua | 101+++++++++++++++++++++++++++++++------------------------------------------------
Mlua/lexers/desktop.lua | 81+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/diff.lua | 51++++++++++++++++++---------------------------------
Mlua/lexers/django.lua | 94++++++++++++++++++++++++++++++-------------------------------------------------
Mlua/lexers/dmd.lua | 264++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/dockerfile.lua | 69+++++++++++++++++++++++++++------------------------------------------
Mlua/lexers/dot.lua | 95+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/eiffel.lua | 95+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/elixir.lua | 186++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/elm.lua | 75+++++++++++++++++++++++++++------------------------------------------------
Mlua/lexers/erlang.lua | 143++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mlua/lexers/fantom.lua | 119+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/faust.lua | 68+++++++++++++++++++++++++++-----------------------------------------
Mlua/lexers/fennel.lua | 109+++++++++++++++++++++++--------------------------------------------------------
Mlua/lexers/fish.lua | 101++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/forth.lua | 99++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/fortran.lua | 132++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mlua/lexers/fsharp.lua | 101++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/fstab.lua | 661+++++++++++++------------------------------------------------------------------
Mlua/lexers/gap.lua | 74+++++++++++++++++++++++++++++++-------------------------------------------
Mlua/lexers/gettext.lua | 46++++++++++++++++++----------------------------
Mlua/lexers/gherkin.lua | 74+++++++++++++++++++++++++-------------------------------------------------
Mlua/lexers/glsl.lua | 190++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/gnuplot.lua | 109+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/go.lua | 100++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/groovy.lua | 122++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/gtkrc.lua | 85+++++++++++++++++++++++++++++++------------------------------------------------
Mlua/lexers/haskell.lua | 75++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/html.lua | 270+++++++++++++++++++++++++++++++++++++------------------------------------------
Mlua/lexers/icon.lua | 102+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/idl.lua | 88++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/inform.lua | 138+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/ini.lua | 63+++++++++++++++++++++++++--------------------------------------
Mlua/lexers/io_lang.lua | 86++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/java.lua | 107+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/javascript.lua | 125+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Mlua/lexers/json.lua | 63++++++++++++++++++++++++++++-----------------------------------
Mlua/lexers/jsp.lua | 31+++++++++++--------------------
Mlua/lexers/julia.lua | 187++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/latex.lua | 85+++++++++++++++++++++++++++++--------------------------------------------------
Mlua/lexers/ledger.lua | 62++++++++++++++++++++++++--------------------------------------
Mlua/lexers/less.lua | 25+++++++++----------------
Mlua/lexers/lexer.lua | 2322+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Mlua/lexers/lilypond.lua | 46++++++++++++++++++----------------------------
Mlua/lexers/lisp.lua | 114++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/litcoffee.lua | 28++++++++++++++--------------
Mlua/lexers/logtalk.lua | 98+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Dlua/lexers/lpeg.properties | 282-------------------------------------------------------------------------------
Mlua/lexers/lua.lua | 232++++++++++++++++++++++++++++++++++++-------------------------------------------
Mlua/lexers/makefile.lua | 139++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/man.lua | 43++++++++++++++-----------------------------
Mlua/lexers/markdown.lua | 193++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mlua/lexers/matlab.lua | 142+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/meson.lua | 163+++++++++++++++++++++++++++++++------------------------------------------------
Mlua/lexers/moonscript.lua | 202++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/myrddin.lua | 94+++++++++++++++++++++++++++++++------------------------------------------------
Mlua/lexers/nemerle.lua | 112++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/networkd.lua | 335+++++++++++++++++++------------------------------------------------------------
Mlua/lexers/nim.lua | 172+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/nsis.lua | 268+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/null.lua | 6++----
Mlua/lexers/objective_c.lua | 113+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/pascal.lua | 106++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/perl.lua | 220+++++++++++++++++++++++++++++++++++++------------------------------------------
Mlua/lexers/php.lua | 185+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/pico8.lua | 60++++++++++++++++++++++--------------------------------------
Mlua/lexers/pike.lua | 91++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/pkgbuild.lua | 145+++++++++++++++++++++++++------------------------------------------------------
Mlua/lexers/pony.lua | 200+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/powershell.lua | 86++++++++++++++++++++++++++++++-------------------------------------------------
Mlua/lexers/prolog.lua | 494++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mlua/lexers/props.lua | 51++++++++++++++++++---------------------------------
Mlua/lexers/protobuf.lua | 76++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/ps.lua | 80+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/pure.lua | 74++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/python.lua | 177+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/rails.lua | 92++++++++++++++++++++++++++++++-------------------------------------------------
Mlua/lexers/rc.lua | 73+++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/reason.lua | 114+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/rebol.lua | 199++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/rest.lua | 101+++++++++++++++++++++++++++++++++++++------------------------------------------
Mlua/lexers/rexx.lua | 140++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/rhtml.lua | 31+++++++++++--------------------
Mlua/lexers/routeros.lua | 143++++++++++++++++++++++++-------------------------------------------------------
Dlua/lexers/rpmspec.lua | 31-------------------------------
Mlua/lexers/rstats.lua | 78++++++++++++++++++++++++++++++++++++++----------------------------------------
Mlua/lexers/ruby.lua | 188+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/rust.lua | 130++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mlua/lexers/sass.lua | 30+++++++++++-------------------
Mlua/lexers/scala.lua | 101++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/scheme.lua | 362+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/smalltalk.lua | 76+++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/sml.lua | 180+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/snobol4.lua | 113+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mlua/lexers/spin.lua | 169++++++++++++++++++++++++-------------------------------------------------------
Mlua/lexers/sql.lua | 115++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/systemd.lua | 540+++++++++++++++++--------------------------------------------------------------
Mlua/lexers/taskpaper.lua | 86++++++++++++++++++++++++++++++-------------------------------------------------
Mlua/lexers/tcl.lua | 73++++++++++++++++++++++++++++++-------------------------------------------
Dlua/lexers/template.txt | 40----------------------------------------
Mlua/lexers/tex.lua | 49++++++++++++++++++-------------------------------
Mlua/lexers/texinfo.lua | 309+++++++++++++++++++++++++++++++------------------------------------------------
Mlua/lexers/text.lua | 15+++++----------
Mlua/lexers/toml.lua | 84++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/vala.lua | 98++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/vb.lua | 83+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mlua/lexers/vcard.lua | 128++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/verilog.lua | 149++++++++++++++++++++++++++++++++++++-------------------------------------------
Mlua/lexers/vhdl.lua | 121+++++++++++++++++++++++++++++++++----------------------------------------------
Mlua/lexers/wsf.lua | 136++++++++++++++++++++++++++++++++++++-------------------------------------------
Mlua/lexers/xml.lua | 133+++++++++++++++++++++++++++++++++++--------------------------------------------
Mlua/lexers/xs.lua | 84++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/xtend.lua | 156++++++++++++++++++++++++++++++++++---------------------------------------------
Mlua/lexers/yaml.lua | 142++++++++++++++++++++++++++++++++-----------------------------------------------
Mlua/lexers/zig.lua | 167+++++++++++++++++++++++++++++++------------------------------------------------
144 files changed, 8907 insertions(+), 12450 deletions(-)

diff --git a/lua/ftdetect.lua b/lua/ftdetect.lua @@ -1,4 +1,4 @@ -ftdetect = {} +qftdetect = {} ftdetect.ignoresuffixes = { "~$", "%.orig$", "%.bak$", "%.old$", "%.new$" @@ -42,8 +42,8 @@ ftdetect.filetypes = { ext = { "%.awk$" }, }, bash = { - utility = { "^[db]ash$", "^sh$", "^t?csh$", "^zsh$" }, - ext = { "%.bash$", "%.csh$", "%.sh$", "%.zsh$", "^APKBUILD$", "%.ebuild$", "^.bashrc$", "^.bash_profile$" }, + utility = { "^[db]ash$", "^sh$","^t?csh$","^zsh$" }, + ext = { "%.bash$", "%.csh$", "%.sh$", "%.zsh$" ,"^APKBUILD$", "%.ebuild$", "^.bashrc$", "^.bash_profile$" }, mime = { "text/x-shellscript", "application/x-shellscript" }, }, batch = { @@ -62,7 +62,7 @@ ftdetect.filetypes = { ext = { "%.ck$" }, }, clojure = { - ext = { "%.clj$", "%.cljc$", "%.cljs$", "%.edn$" } + ext = { "%.clj$", "%.cljc$", "%.cljs$", "%.edn$" } }, cmake = { ext = { "%.cmake$", "%.cmake.in$", "%.ctest$", "%.ctest.in$" }, @@ -164,6 +164,10 @@ ftdetect.filetypes = { gherkin = { ext = { "%.feature$" }, }, + ['git-commit'] = { + ext = { "^COMMIT_EDITMSG$" }, + cmd = { "set colorcolumn 72" }, + }, ['git-rebase'] = { ext = { "git%-rebase%-todo" }, }, @@ -214,6 +218,9 @@ ftdetect.filetypes = { javascript = { ext = { "%.cjs$", "%.js$", "%.jsfl$", "%.mjs$", "%.ts$", "%.jsx$", "%.tsx$" }, }, + jq = { + ext = { "%.jq$" }, + }, json = { ext = { "%.json$" }, mime = { "text/x-json" }, @@ -248,15 +255,18 @@ ftdetect.filetypes = { ext = { "%.lgt$" }, }, lua = { - utility = { "^lua%-?5?%d?$", "^lua%-?5%.%d$" }, + utility = {"^lua%-?5?%d?$", "^lua%-?5%.%d$" }, ext = { "%.lua$" }, mime = { "text/x-lua" }, }, makefile = { - hashbang = { "^#!/usr/bin/make" }, - utility = { "^make$" }, + hashbang = {"^#!/usr/bin/make"}, + utility = {"^make$"}, ext = { "%.iface$", "%.mak$", "%.mk$", "GNUmakefile", "makefile", "Makefile" }, mime = { "text/x-makefile" }, + detect = function(_, data) + return data:match("^#!/usr/bin/make") + end }, man = { ext = { @@ -344,7 +354,7 @@ ftdetect.filetypes = { ext = { "%.re$" }, }, rc = { - utility = { "^rc$" }, + utility = {"^rc$"}, ext = { "%.rc$", "%.es$" }, }, rebol = { @@ -399,7 +409,7 @@ ftdetect.filetypes = { spin = { ext = { "%.spin$" } }, - sql = { + sql= { ext = { "%.ddl$", "%.sql$" }, }, strace = { @@ -418,7 +428,7 @@ ftdetect.filetypes = { ext = { "%.taskpaper$" }, }, tcl = { - utility = { "^tclsh$", "^jimsh$" }, + utility = {"^tclsh$", "^jimsh$" }, ext = { "%.tcl$", "%.tk$" }, }, texinfo = { @@ -432,6 +442,9 @@ ftdetect.filetypes = { toml = { ext = { "%.toml$" }, }, + typescript = { + ext = { "%.ts$" }, + }, vala = { ext = { "%.vala$" } }, @@ -464,7 +477,7 @@ ftdetect.filetypes = { }, }, xtend = { - ext = { "%.xtend$" }, + ext = {"%.xtend$" }, }, yaml = { ext = { "%.yaml$", "%.yml$" }, diff --git a/lua/lexer.lua b/lua/lexer.lua diff --git a/lua/lexers/actionscript.lua b/lua/lexers/actionscript.lua @@ -1,75 +1,59 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Actionscript LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'actionscript'} +local lex = lexer.new('actionscript') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local ml_str = '<![CDATA[' * (l.any - ']]>')^0 * ']]>' -local string = token(l.STRING, sq_str + dq_str + ml_str) - --- Numbers. -local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'break', 'continue', 'delete', 'do', 'else', 'for', 'function', 'if', 'in', - 'new', 'on', 'return', 'this', 'typeof', 'var', 'void', 'while', 'with', - 'NaN', 'Infinity', 'false', 'null', 'true', 'undefined', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'break', 'continue', 'delete', 'do', 'else', 'for', 'function', 'if', 'in', 'new', 'on', 'return', + 'this', 'typeof', 'var', 'void', 'while', 'with', 'NaN', 'Infinity', 'false', 'null', 'true', + 'undefined', -- Reserved for future use. - 'abstract', 'case', 'catch', 'class', 'const', 'debugger', 'default', - 'export', 'extends', 'final', 'finally', 'goto', 'implements', 'import', - 'instanceof', 'interface', 'native', 'package', 'private', 'Void', - 'protected', 'public', 'dynamic', 'static', 'super', 'switch', 'synchonized', - 'throw', 'throws', 'transient', 'try', 'volatile' -}) + 'abstract', 'case', 'catch', 'class', 'const', 'debugger', 'default', 'export', 'extends', + 'final', 'finally', 'goto', 'implements', 'import', 'instanceof', 'interface', 'native', + 'package', 'private', 'Void', 'protected', 'public', 'dynamic', 'static', 'super', 'switch', + 'synchonized', 'throw', 'throws', 'transient', 'try', 'volatile' +})) -- Types. -local type = token(l.TYPE, word_match{ - 'Array', 'Boolean', 'Color', 'Date', 'Function', 'Key', 'MovieClip', 'Math', - 'Mouse', 'Number', 'Object', 'Selection', 'Sound', 'String', 'XML', 'XMLNode', - 'XMLSocket', +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'Array', 'Boolean', 'Color', 'Date', 'Function', 'Key', 'MovieClip', 'Math', 'Mouse', 'Number', + 'Object', 'Selection', 'Sound', 'String', 'XML', 'XMLNode', 'XMLSocket', -- Reserved for future use. 'boolean', 'byte', 'char', 'double', 'enum', 'float', 'int', 'long', 'short' -}) +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}')) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local ml_str = lexer.range('<![CDATA[', ']]>') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str)) + +-- Comments. +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}'))) -M._foldsymbols = { - _patterns = {'[{}]', '/%*', '%*/', '//', '<!%[CDATA%[', '%]%]>'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = { - ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//') - }, - [l.STRING] = {['<![CDATA['] = 1, [']]>'] = -1} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) +lex:add_fold_point(lexer.STRING, '<![CDATA[', ']]>') -return M +return lex diff --git a/lua/lexers/ada.lua b/lua/lexers/ada.lua @@ -1,68 +1,54 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Ada LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'ada'} +local lex = lexer.new('ada') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '--' * l.nonnewline^0) - --- Strings. -local string = token(l.STRING, l.delimited_range('"', true, true)) - --- Numbers. -local hex_num = 'O' * S('xX') * (l.xdigit + '_')^1 -local integer = l.digit^1 * ('_' * l.digit^1)^0 -local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer -local number = token(l.NUMBER, hex_num + S('+-')^-1 * (float + integer) * - S('LlUuFf')^-3) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'abort', 'abs', 'accept', 'all', 'and', 'begin', 'body', 'case', 'declare', - 'delay', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'for', - 'generic', 'goto', 'if', 'in', 'is', 'loop', 'mod', 'new', 'not', 'null', - 'or', 'others', 'out', 'protected', 'raise', 'record', 'rem', 'renames', - 'requeue', 'reverse', 'select', 'separate', 'subtype', 'task', 'terminate', - 'then', 'type', 'until', 'when', 'while', 'xor', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'abort', 'abs', 'accept', 'all', 'and', 'begin', 'body', 'case', 'declare', 'delay', 'do', 'else', + 'elsif', 'end', 'entry', 'exception', 'exit', 'for', 'generic', 'goto', 'if', 'in', 'is', 'loop', + 'mod', 'new', 'not', 'null', 'or', 'others', 'out', 'protected', 'raise', 'record', 'rem', + 'renames', 'requeue', 'reverse', 'select', 'separate', 'subtype', 'task', 'terminate', 'then', + 'type', 'until', 'when', 'while', 'xor', -- Preprocessor. 'package', 'pragma', 'use', 'with', - -- Function + -- Function. 'function', 'procedure', 'return', -- Storage class. - 'abstract', 'access', 'aliased', 'array', 'at', 'constant', 'delta', 'digits', - 'interface', 'limited', 'of', 'private', 'range', 'tagged', 'synchronized', + 'abstract', 'access', 'aliased', 'array', 'at', 'constant', 'delta', 'digits', 'interface', + 'limited', 'of', 'private', 'range', 'tagged', 'synchronized', -- Boolean. 'true', 'false' -}) +})) -- Types. -local type = token(l.TYPE, word_match{ - 'boolean', 'character', 'count', 'duration', 'float', 'integer', 'long_float', - 'long_integer', 'priority', 'short_float', 'short_integer', 'string' -}) +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'boolean', 'character', 'count', 'duration', 'float', 'integer', 'long_float', 'long_integer', + 'priority', 'short_float', 'short_integer', 'string' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S(':;=<>&+-*/.()')) +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--'))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Numbers. +local integer = lexer.digit^1 * ('_' * lexer.digit^1)^0 +local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer +lex:add_rule('number', token(lexer.NUMBER, S('+-')^-1 * (float + integer))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':;=<>&+-*/.()'))) -return M +return lex diff --git a/lua/lexers/ansi_c.lua b/lua/lexers/ansi_c.lua @@ -1,154 +1,97 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- C LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'ansi_c'} +local lex = lexer.new('ansi_c') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 --- local preproc_ifzero = l.starts_line('#if') * S(' \t')^0 * '0' * l.space * --- (l.starts_line('#endif')) -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = P('L')^-1 * l.delimited_range("'", true) -local dq_str = P('L')^-1 * l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local float_suffix = P('f')^-1 -local integer_suffix = (S('uU')^-1 * word_match{ 'l', 'L', 'll', 'LL' }^-1) + - (word_match{ 'l', 'L', 'll', 'LL' }^-1 * S('uU')^-1) -local number = token(l.NUMBER, (l.float * float_suffix) + - (l.integer * integer_suffix)) - --- Preprocessor. -local preproc_word = word_match{ - 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'line', - 'pragma', 'undef', 'warning' -} - -local preproc = #l.starts_line('#') * - (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + - token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * - (token(l.WHITESPACE, S('\t ')^0) * - token(l.STRING, l.delimited_range('<>', true, true)))^-1) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) -- Keywords. -local storage_class = word_match{ - -- C11 6.7.1 - 'typedef', 'extern', 'static', '_Thread_local', 'auto', 'register', -} - -local type_qualifier = word_match{ - -- C11 6.7.3 - 'const', 'restrict', 'volatile', '_Atomic', -} - -local function_specifier = word_match{ - -- C11 6.7.4 - 'inline', '_Noreturn', -} - -local extra_keywords = word_match{ - 'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__', - '__attribute__', '__declspec' -} +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', + 'goto', 'if', 'inline', 'register', 'restrict', 'return', 'sizeof', 'static', 'switch', 'typedef', + 'volatile', 'while', + -- C99. + 'false', 'true', + -- C11. + '_Alignas', '_Alignof', '_Atomic', '_Generic', '_Noreturn', '_Static_assert', '_Thread_local', + -- Compiler. + 'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__', '__attribute__', '__declspec' +})) -local keyword = token(l.KEYWORD, word_match{ - 'break', 'case', 'continue', 'default', 'do', 'else', 'enum', 'for', 'goto', - 'if', 'return', 'switch', 'while', - '_Alignas', '_Generic', '_Static_assert', -} + storage_class + type_qualifier + function_specifier + extra_keywords) +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'char', 'double', 'float', 'int', 'long', 'short', 'signed', 'struct', 'union', + 'unsigned', 'void', '_Bool', '_Complex', '_Imaginary', + -- Stdlib types. + 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t' +} + P('u')^-1 * 'int' * (P('_least') + '_fast')^-1 * lexer.digit^1 * '_t')) -- Constants. -local errno = word_match{ - -- http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html - 'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', - 'EAGAIN', 'EALREADY', 'EBADF', 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD', - 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET', 'EDEADLK', 'EDESTADDRREQ', - 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM', - 'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR', - 'ELOOP', 'EMFILE', 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG', - 'ENETDOWN', 'ENETRESET', 'ENETUNREACH', 'ENFILE', 'ENOBUFS', 'ENODATA', - 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM', - 'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS', - 'ENOTCONN', 'ENOTDIR', 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK', - 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP', 'EOVERFLOW', 'EOWNERDEAD', - 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE', - 'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY', - 'EWOULDBLOCK', 'EXDEV', -} +lex:add_rule('constants', token(lexer.CONSTANT, word_match{ + 'NULL', + -- Preprocessor. + '__DATE__', '__FILE__', '__LINE__', '__TIME__', '__func__', + -- errno.h. + 'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', 'EAGAIN', 'EALREADY', 'EBADF', + 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD', 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET', + 'EDEADLK', 'EDESTADDRREQ', 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM', + 'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR', 'ELOOP', 'EMFILE', + 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG', 'ENETDOWN', 'ENETRESET', 'ENETUNREACH', + 'ENFILE', 'ENOBUFS', 'ENODATA', 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM', + 'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS', 'ENOTCONN', 'ENOTDIR', + 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK', 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP', + 'EOVERFLOW', 'EOWNERDEAD', 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE', + 'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY', 'EWOULDBLOCK', 'EXDEV', + -- stdint.h. + 'PTRDIFF_MIN', 'PTRDIFF_MAX', 'SIZE_MAX', 'SIG_ATOMIC_MIN', 'SIG_ATOMIC_MAX', 'WINT_MIN', + 'WINT_MAX', 'WCHAR_MIN', 'WCHAR_MAX' +} + P('U')^-1 * 'INT' * ((P('_LEAST') + '_FAST')^-1 * lexer.digit^1 + 'PTR' + 'MAX') * + (P('_MIN') + '_MAX'))) -local preproc_macros = word_match{ - -- C11 6.10.8.1 Mandatory macros - '__DATE__', '__FILE__', '__LINE__', '__TIME__', - -- C11 6.4.2.2 Predefined identifiers - '__func__', -} +-- Labels. +lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(lexer.word * ':'))) -local constant = token(l.CONSTANT, word_match{ - 'true', 'false', - 'NULL', 'CHAR_BIT', 'SIZE_MAX', } + - ((P('WINT') + P('WCHAR') + P('SIG_ATOMIC') + P('PTRDIFF')) * (P('_MIN') + P('_MAX'))) + - ( P('INT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * (P('_MIN') + P('_MAX'))) + - (P('UINT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * P('_MAX')) + - errno + preproc_macros -) +-- Strings. +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) --- Types. -local type = token(l.TYPE, word_match{ - 'bool', 'char', 'double', 'float', 'int', 'long', 'short', - 'signed', 'struct', 'union', 'unsigned', 'void', '_Bool', '_Complex', - '_Imaginary', 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t', - 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'} + - (P('u')^-1 * P('int') * (P('_least') + P('_fast'))^-1 * l.dec_num^1 * P('_t')) + - (S('usif') * l.dec_num^1 * P('_t')) + - (P('__')^-1 * S('usif') * l.dec_num^1) -) +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Labels. --- FIXME: Accept whitespace before label. -local label = token(l.LABEL, l.starts_line(l.word * ':')) +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') + + lexer.range('#if' * S(' \t')^0 * '0' * lexer.space, '#endif') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Numbers. +local integer = lexer.integer * word_match('u l ll ul ull lu llu', true)^-1 +local float = lexer.float * P('f')^-1 +lex:add_rule('number', token(lexer.NUMBER, float + integer)) --- Operators. -local operator = token(l.OPERATOR, - S('+-/*%<>~!=^&|?~:;,.()[]{}') + - word_match{ 'sizeof', '_Alignof' } -) +-- Preprocessor. +local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1 +local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * + word_match('define elif else endif if ifdef ifndef line pragma undef')) +lex:add_rule('preprocessor', include + preproc) -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'keyword', keyword}, - {'type', type}, - {'constant', constant}, - {'operator', operator}, - {'label', label}, - {'identifier', identifier}, - {'string', string}, - {'number', number}, - {'preproc', preproc}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}'))) -M._foldsymbols = { - _patterns = {'#?%l+', '[{}]', '/%*', '%*/', '//'}, - [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = { - ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//'), - ['#if'] = 1, ['#endif'] = -1 - } -} +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, '#if', '#endif') +lex:add_fold_point(lexer.PREPROCESSOR, '#ifdef', '#endif') +lex:add_fold_point(lexer.PREPROCESSOR, '#ifndef', '#endif') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/antlr.lua b/lua/lexers/antlr.lua @@ -1,74 +1,56 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- ANTLR LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'antlr'} +local lex = lexer.new('antlr') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local string = token(l.STRING, l.delimited_range("'", true)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', - 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof', - 'native', 'new', 'private', 'protected', 'public', 'return', 'static', - 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile', - 'while', 'package', 'import', 'header', 'options', 'tokens', 'strictfp', - 'false', 'null', 'super', 'this', 'true' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'extends', 'final', + 'finally', 'for', 'if', 'implements', 'instanceof', 'native', 'new', 'private', 'protected', + 'public', 'return', 'static', 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', + 'volatile', 'while', 'package', 'import', 'header', 'options', 'tokens', 'strictfp', 'false', + 'null', 'super', 'this', 'true' +})) -- Types. -local type = token(l.TYPE, word_match{ - 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface', - 'long', 'short', 'void' -}) +lex:add_rule('type', token(lexer.TYPE, word_match( + 'boolean byte char class double float int interface long short void'))) -- Functions. -local func = token(l.FUNCTION, 'assert') +lex:add_rule('func', token(lexer.FUNCTION, 'assert')) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}')) +-- Comments. +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Actions. -local action = #P('{') * operator * token('action', (1 - P('}'))^0) * - (#P('}') * operator)^-1 +local open_brace = token(lexer.OPERATOR, '{') +local close_brace = token(lexer.OPERATOR, '}') +lex:add_rule('action', open_brace * token('action', (1 - P('}'))^0) * close_brace^-1) +lex:add_style('action', lexer.styles.nothing) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'function', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'action', action}, - {'operator', operator}, -} +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range("'", true))) -M._tokenstyles = { - action = l.STYLE_NOTHING -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}'))) -M._foldsymbols = { - _patterns = {'[:;%(%){}]', '/%*', '%*/', '//'}, - [l.OPERATOR] = { - [':'] = 1, [';'] = -1, ['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1 - }, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, ':', ';') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/apdl.lua b/lua/lexers/apdl.lua @@ -1,102 +1,78 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- APDL LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'apdl'} +local lex = lexer.new('apdl', {case_insensitive_fold_points = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local comment = token(l.COMMENT, '!' * l.nonnewline^0) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match({ + '*abbr', '*abb', '*afun', '*afu', '*ask', '*cfclos', '*cfc', '*cfopen', '*cfo', '*cfwrite', + '*cfw', '*create', '*cre', '*cycle', '*cyc', '*del', '*dim', '*do', '*elseif', '*else', '*enddo', + '*endif', '*end', '*eval', '*eva', '*exit', '*exi', '*get', '*go', '*if', '*list', '*lis', + '*mfouri', '*mfo', '*mfun', '*mfu', '*mooney', '*moo', '*moper', '*mop', '*msg', '*repeat', + '*rep', '*set', '*status', '*sta', '*tread', '*tre', '*ulib', '*uli', '*use', '*vabs', '*vab', + '*vcol', '*vco', '*vcum', '*vcu', '*vedit', '*ved', '*vfact', '*vfa', '*vfill', '*vfi', '*vfun', + '*vfu', '*vget', '*vge', '*vitrp', '*vit', '*vlen', '*vle', '*vmask', '*vma', '*voper', '*vop', + '*vplot', '*vpl', '*vput', '*vpu', '*vread', '*vre', '*vscfun', '*vsc', '*vstat', '*vst', + '*vwrite', '*vwr', -- + '/anfile', '/anf', '/angle', '/ang', '/annot', '/ann', '/anum', '/anu', '/assign', '/ass', + '/auto', '/aut', '/aux15', '/aux2', '/aux', '/axlab', '/axl', '/batch', '/bat', '/clabel', '/cla', + '/clear', '/cle', '/clog', '/clo', '/cmap', '/cma', '/color', '/col', '/com', '/config', + '/contour', '/con', '/copy', '/cop', '/cplane', '/cpl', '/ctype', '/cty', '/cval', '/cva', + '/delete', '/del', '/devdisp', '/device', '/dev', '/dist', '/dis', '/dscale', '/dsc', '/dv3d', + '/dv3', '/edge', '/edg', '/efacet', '/efa', '/eof', '/erase', '/era', '/eshape', '/esh', '/exit', + '/exi', '/expand', '/exp', '/facet', '/fac', '/fdele', '/fde', '/filname', '/fil', '/focus', + '/foc', '/format', '/for', '/ftype', '/fty', '/gcmd', '/gcm', '/gcolumn', '/gco', '/gfile', + '/gfi', '/gformat', '/gfo', '/gline', '/gli', '/gmarker', '/gma', '/golist', '/gol', '/gopr', + '/gop', '/go', '/graphics', '/gra', '/gresume', '/gre', '/grid', '/gri', '/gropt', '/gro', + '/grtyp', '/grt', '/gsave', '/gsa', '/gst', '/gthk', '/gth', '/gtype', '/gty', '/header', '/hea', + '/input', '/inp', '/larc', '/lar', '/light', '/lig', '/line', '/lin', '/lspec', '/lsp', + '/lsymbol', '/lsy', '/menu', '/men', '/mplib', '/mpl', '/mrep', '/mre', '/mstart', '/mst', + '/nerr', '/ner', '/noerase', '/noe', '/nolist', '/nol', '/nopr', '/nop', '/normal', '/nor', + '/number', '/num', '/opt', '/output', '/out', '/page', '/pag', '/pbc', '/pbf', '/pcircle', '/pci', + '/pcopy', '/pco', '/plopts', '/plo', '/pmacro', '/pma', '/pmeth', '/pme', '/pmore', '/pmo', + '/pnum', '/pnu', '/polygon', '/pol', '/post26', '/post1', '/pos', '/prep7', '/pre', '/psearch', + '/pse', '/psf', '/pspec', '/psp', '/pstatus', '/pst', '/psymb', '/psy', '/pwedge', '/pwe', + '/quit', '/qui', '/ratio', '/rat', '/rename', '/ren', '/replot', '/rep', '/reset', '/res', '/rgb', + '/runst', '/run', '/seclib', '/sec', '/seg', '/shade', '/sha', '/showdisp', '/show', '/sho', + '/shrink', '/shr', '/solu', '/sol', '/sscale', '/ssc', '/status', '/sta', '/stitle', '/sti', + '/syp', '/sys', '/title', '/tit', '/tlabel', '/tla', '/triad', '/tri', '/trlcy', '/trl', '/tspec', + '/tsp', '/type', '/typ', '/ucmd', '/ucm', '/uis', '/ui', '/units', '/uni', '/user', '/use', + '/vcone', '/vco', '/view', '/vie', '/vscale', '/vsc', '/vup', '/wait', '/wai', '/window', '/win', + '/xrange', '/xra', '/yrange', '/yra', '/zoom', '/zoo' +}, true))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local string = token(l.STRING, l.delimited_range("'", true, true)) +lex:add_rule('string', token(lexer.STRING, lexer.range("'", true, false))) -- Numbers. -local number = token(l.NUMBER, l.float + l.integer) - --- Keywords. -local keyword = token(l.KEYWORD, word_match({ - '*abbr', '*abb', '*afun', '*afu', '*ask', '*cfclos', '*cfc', '*cfopen', - '*cfo', '*cfwrite', '*cfw', '*create', '*cre', '*cycle', '*cyc', '*del', - '*dim', '*do', '*elseif', '*else', '*enddo', '*endif', '*end', '*eval', - '*eva', '*exit', '*exi', '*get', '*go', '*if', '*list', '*lis', '*mfouri', - '*mfo', '*mfun', '*mfu', '*mooney', '*moo', '*moper', '*mop', '*msg', - '*repeat', '*rep', '*set', '*status', '*sta', '*tread', '*tre', '*ulib', - '*uli', '*use', '*vabs', '*vab', '*vcol', '*vco', '*vcum', '*vcu', '*vedit', - '*ved', '*vfact', '*vfa', '*vfill', '*vfi', '*vfun', '*vfu', '*vget', '*vge', - '*vitrp', '*vit', '*vlen', '*vle', '*vmask', '*vma', '*voper', '*vop', - '*vplot', '*vpl', '*vput', '*vpu', '*vread', '*vre', '*vscfun', '*vsc', - '*vstat', '*vst', '*vwrite', '*vwr', '/anfile', '/anf', '/angle', '/ang', - '/annot', '/ann', '/anum', '/anu', '/assign', '/ass', '/auto', '/aut', - '/aux15', '/aux2', '/aux', '/axlab', '/axl', '/batch', '/bat', '/clabel', - '/cla', '/clear', '/cle', '/clog', '/clo', '/cmap', '/cma', '/color', '/col', - '/com', '/config', '/contour', '/con', '/copy', '/cop', '/cplane', '/cpl', - '/ctype', '/cty', '/cval', '/cva', '/delete', '/del', '/devdisp', '/device', - '/dev', '/dist', '/dis', '/dscale', '/dsc', '/dv3d', '/dv3', '/edge', '/edg', - '/efacet', '/efa', '/eof', '/erase', '/era', '/eshape', '/esh', '/exit', - '/exi', '/expand', '/exp', '/facet', '/fac', '/fdele', '/fde', '/filname', - '/fil', '/focus', '/foc', '/format', '/for', '/ftype', '/fty', '/gcmd', - '/gcm', '/gcolumn', '/gco', '/gfile', '/gfi', '/gformat', '/gfo', '/gline', - '/gli', '/gmarker', '/gma', '/golist', '/gol', '/gopr', '/gop', '/go', - '/graphics', '/gra', '/gresume', '/gre', '/grid', '/gri', '/gropt', '/gro', - '/grtyp', '/grt', '/gsave', '/gsa', '/gst', '/gthk', '/gth', '/gtype', '/gty', - '/header', '/hea', '/input', '/inp', '/larc', '/lar', '/light', '/lig', - '/line', '/lin', '/lspec', '/lsp', '/lsymbol', '/lsy', '/menu', '/men', - '/mplib', '/mpl', '/mrep', '/mre', '/mstart', '/mst', '/nerr', '/ner', - '/noerase', '/noe', '/nolist', '/nol', '/nopr', '/nop', '/normal', '/nor', - '/number', '/num', '/opt', '/output', '/out', '/page', '/pag', '/pbc', '/pbf', - '/pcircle', '/pci', '/pcopy', '/pco', '/plopts', '/plo', '/pmacro', '/pma', - '/pmeth', '/pme', '/pmore', '/pmo', '/pnum', '/pnu', '/polygon', '/pol', - '/post26', '/post1', '/pos', '/prep7', '/pre', '/psearch', '/pse', '/psf', - '/pspec', '/psp', '/pstatus', '/pst', '/psymb', '/psy', '/pwedge', '/pwe', - '/quit', '/qui', '/ratio', '/rat', '/rename', '/ren', '/replot', '/rep', - '/reset', '/res', '/rgb', '/runst', '/run', '/seclib', '/sec', '/seg', - '/shade', '/sha', '/showdisp', '/show', '/sho', '/shrink', '/shr', '/solu', - '/sol', '/sscale', '/ssc', '/status', '/sta', '/stitle', '/sti', '/syp', - '/sys', '/title', '/tit', '/tlabel', '/tla', '/triad', '/tri', '/trlcy', - '/trl', '/tspec', '/tsp', '/type', '/typ', '/ucmd', '/ucm', '/uis', '/ui', - '/units', '/uni', '/user', '/use', '/vcone', '/vco', '/view', '/vie', - '/vscale', '/vsc', '/vup', '/wait', '/wai', '/window', '/win', '/xrange', - '/xra', '/yrange', '/yra', '/zoom', '/zoo' -}, '*/', true)) - --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Functions. -local func = token(l.FUNCTION, l.delimited_range('%', true, true)) - --- Operators. -local operator = token(l.OPERATOR, S('+-*/$=,;()')) +lex:add_rule('function', token(lexer.FUNCTION, lexer.range('%', true, false))) -- Labels. -local label = token(l.LABEL, l.starts_line(':') * l.word) +lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(':') * lexer.word)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'identifier', identifier}, - {'string', string}, - {'number', number}, - {'function', func}, - {'label', label}, - {'comment', comment}, - {'operator', operator}, -} +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('!'))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/$=,;()'))) -M._foldsymbols = { - _patterns = {'%*[A-Za-z]+', '!'}, - [l.KEYWORD] = { - ['*if'] = 1, ['*IF'] = 1, ['*do'] = 1, ['*DO'] = 1, ['*dowhile'] = 1, - ['*DOWHILE'] = 1, - ['*endif'] = -1, ['*ENDIF'] = -1, ['*enddo'] = -1, ['*ENDDO'] = -1 - }, - [l.COMMENT] = {['!'] = l.fold_line_comments('!')} -} +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, '*if', '*endif') +lex:add_fold_point(lexer.KEYWORD, '*do', '*enddo') +lex:add_fold_point(lexer.KEYWORD, '*dowhile', '*enddo') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('!')) -return M +return lex diff --git a/lua/lexers/apl.lua b/lua/lexers/apl.lua @@ -1,69 +1,55 @@ --- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE. +-- Copyright 2015-2022 David B. Lamkins <david@lamkins.net>. See LICENSE. -- APL LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'apl'} +local lex = lexer.new('apl') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local comment = token(l.COMMENT, (P('⍝') + P('#')) * l.nonnewline^0) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(P('⍝') + '#'))) -- Strings. -local sq_str = l.delimited_range("'", false, true) -local dq_str = l.delimited_range('"') - -local string = token(l.STRING, sq_str + dq_str) +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Numbers. -local dig = R('09') +local dig = lexer.digit local rad = P('.') local exp = S('eE') local img = S('jJ') local sgn = P('¯')^-1 -local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) * - (exp * sgn *dig^1)^-1 -local number = token(l.NUMBER, float * img * float + float) +local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) * (exp * sgn * dig^1)^-1 +lex:add_rule('number', token(lexer.NUMBER, float * img * float + float)) -- Keywords. -local keyword = token(l.KEYWORD, P('⍞') + P('χ') + P('⍺') + P('⍶') + P('⍵') + - P('⍹') + P('⎕') * R('AZ', 'az')^0) +lex:add_rule('keyword', token(lexer.KEYWORD, + P('⍞') + 'χ' + '⍺' + '⍶' + '⍵' + '⍹' + '⎕' * lexer.alpha^0)) -- Names. -local n1l = R('AZ', 'az') -local n1b = P('_') + P('∆') + P('⍙') -local n2l = n1l + R('09') -local n2b = n1b + P('¯') +local n1l = lexer.alpha +local n1b = P('_') + '∆' + '⍙' +local n2l = n1l + lexer.digit +local n2b = n1b + '¯' local n1 = n1l + n1b local n2 = n2l + n2b local name = n1 * n2^0 -- Labels. -local label = token(l.LABEL, name * P(':')) +lex:add_rule('label', token(lexer.LABEL, name * ':')) -- Variables. -local variable = token(l.VARIABLE, name) +lex:add_rule('variable', token(lexer.VARIABLE, name)) -- Special. -local special = token(l.TYPE, S('{}[]();') + P('←') + P('→') + P('◊')) +lex:add_rule('special', token(lexer.TYPE, S('{}[]();') + '←' + '→' + '◊')) -- Nabla. -local nabla = token(l.PREPROCESSOR, P('∇') + P('⍫')) - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'string', string}, - {'number', number}, - {'keyword', keyword}, - {'label', label}, - {'variable', variable}, - {'special', special}, - {'nabla', nabla}, -} +lex:add_rule('nabla', token(lexer.PREPROCESSOR, P('∇') + '⍫')) -return M +return lex diff --git a/lua/lexers/applescript.lua b/lua/lexers/applescript.lua @@ -1,82 +1,69 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Applescript LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'applescript'} +local lex = lexer.new('applescript') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '--' * l.nonnewline^0 -local block_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local string = token(l.STRING, l.delimited_range('"', true)) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'script', 'property', 'prop', 'end', 'copy', 'to', 'set', 'global', 'local', - 'on', 'to', 'of', 'in', 'given', 'with', 'without', 'return', 'continue', - 'tell', 'if', 'then', 'else', 'repeat', 'times', 'while', 'until', 'from', - 'exit', 'try', 'error', 'considering', 'ignoring', 'timeout', 'transaction', - 'my', 'get', 'put', 'into', 'is', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match({ + 'script', 'property', 'prop', 'end', 'copy', 'to', 'set', 'global', 'local', 'on', 'to', 'of', + 'in', 'given', 'with', 'without', 'return', 'continue', 'tell', 'if', 'then', 'else', 'repeat', + 'times', 'while', 'until', 'from', 'exit', 'try', 'error', 'considering', 'ignoring', 'timeout', + 'transaction', 'my', 'get', 'put', 'into', 'is', -- References. - 'each', 'some', 'every', 'whose', 'where', 'id', 'index', 'first', 'second', - 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth', - 'last', 'front', 'back', 'st', 'nd', 'rd', 'th', 'middle', 'named', 'through', - 'thru', 'before', 'after', 'beginning', 'the', + 'each', 'some', 'every', 'whose', 'where', 'id', 'index', 'first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth', 'last', 'front', 'back', 'st', 'nd', + 'rd', 'th', 'middle', 'named', 'through', 'thru', 'before', 'after', 'beginning', 'the', -- Commands. - 'close', 'copy', 'count', 'delete', 'duplicate', 'exists', 'launch', 'make', - 'move', 'open', 'print', 'quit', 'reopen', 'run', 'save', 'saving', + 'close', 'copy', 'count', 'delete', 'duplicate', 'exists', 'launch', 'make', 'move', 'open', + 'print', 'quit', 'reopen', 'run', 'save', 'saving', -- Operators. - 'div', 'mod', 'and', 'not', 'or', 'as', 'contains', 'equal', 'equals', - 'isn\'t', -}, "'", true)) + 'div', 'mod', 'and', 'not', 'or', 'as', 'contains', 'equal', 'equals', 'isn\'t' +}, true))) -- Constants. -local constant = token(l.CONSTANT, word_match({ +lex:add_rule('constant', token(lexer.CONSTANT, word_match({ 'case', 'diacriticals', 'expansion', 'hyphens', 'punctuation', -- Predefined variables. 'it', 'me', 'version', 'pi', 'result', 'space', 'tab', 'anything', -- Text styles. - 'bold', 'condensed', 'expanded', 'hidden', 'italic', 'outline', 'plain', - 'shadow', 'strikethrough', 'subscript', 'superscript', 'underline', + 'bold', 'condensed', 'expanded', 'hidden', 'italic', 'outline', 'plain', 'shadow', + 'strikethrough', 'subscript', 'superscript', 'underline', -- Save options. 'ask', 'no', 'yes', -- Booleans. 'false', 'true', -- Date and time. - 'weekday', 'monday', 'mon', 'tuesday', 'tue', 'wednesday', 'wed', 'thursday', - 'thu', 'friday', 'fri', 'saturday', 'sat', 'sunday', 'sun', 'month', - 'january', 'jan', 'february', 'feb', 'march', 'mar', 'april', 'apr', 'may', - 'june', 'jun', 'july', 'jul', 'august', 'aug', 'september', 'sep', 'october', - 'oct', 'november', 'nov', 'december', 'dec', 'minutes', 'hours', 'days', - 'weeks' -}, nil, true)) + 'weekday', 'monday', 'mon', 'tuesday', 'tue', 'wednesday', 'wed', 'thursday', 'thu', 'friday', + 'fri', 'saturday', 'sat', 'sunday', 'sun', 'month', 'january', 'jan', 'february', 'feb', 'march', + 'mar', 'april', 'apr', 'may', 'june', 'jun', 'july', 'jul', 'august', 'aug', 'september', 'sep', + 'october', 'oct', 'november', 'nov', 'december', 'dec', 'minutes', 'hours', 'days', 'weeks' +}, true))) -- Identifiers. -local identifier = token(l.IDENTIFIER, (l.alpha + '_') * l.alnum^0) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + '_')^0)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) + +-- Comments. +local line_comment = lexer.to_eol('--') +local block_comment = lexer.range('(*', '*)') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. -local operator = token(l.OPERATOR, S('+-^*/&<>=:,(){}')) +lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=:,(){}'))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'constant', constant}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '(*', '*)') -return M +return lex diff --git a/lua/lexers/asm.lua b/lua/lexers/asm.lua @@ -1,212 +1,152 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- NASM Assembly LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'asm'} +local lex = lexer.new('asm') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, ';' * l.nonnewline^0) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer * S('hqb')^-1) - --- Preprocessor. -local preproc_word = word_match{ - 'arg', 'assign', 'clear', 'define', 'defstr', 'deftok', 'depend', 'elif', - 'elifctx', 'elifdef', 'elifempty', 'elifenv', 'elifid', 'elifidn', 'elifidni', - 'elifmacro', 'elifn', 'elifnctx', 'elifndef', 'elifnempty', 'elifnenv', - 'elifnid', 'elifnidn', 'elifnidni', 'elifnmacro', 'elifnnum', 'elifnstr', - 'elifntoken', 'elifnum', 'elifstr', 'eliftoken', 'else', 'endif', 'endmacro', - 'endrep', 'endwhile', 'error', 'exitmacro', 'exitrep', 'exitwhile', 'fatal', - 'final', 'idefine', 'idefstr', 'ideftok', 'if', 'ifctx', 'ifdef', 'ifempty', - 'ifenv', 'ifid', 'ifidn', 'ifidni', 'ifmacro', 'ifn', 'ifnctx', 'ifndef', - 'ifnempty', 'ifnenv', 'ifnid', 'ifnidn', 'ifnidni', 'ifnmacro', 'ifnnum', - 'ifnstr', 'ifntoken', 'ifnum', 'ifstr', 'iftoken', 'imacro', 'include', - 'ixdefine', 'line', 'local', 'macro', 'pathsearch', 'pop', 'push', 'rep', - 'repl', 'rmacro', 'rotate', 'stacksize', 'strcat', 'strlen', 'substr', - 'undef', 'unmacro', 'use', 'warning', 'while', 'xdefine', -} -local preproc_symbol = '??' + S('!$+?') + '%' * -l.space + R('09')^1 -local preproc = token(l.PREPROCESSOR, '%' * (preproc_word + preproc_symbol)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ -- Preprocessor macros. - 'struc', 'endstruc', 'istruc', 'at', 'iend', 'align', 'alignb', 'sectalign', - '.nolist', + 'struc', 'endstruc', 'istruc', 'at', 'iend', 'align', 'alignb', 'sectalign', '.nolist', -- Preprocessor Packages. - --'altreg', 'smartalign', 'fp', 'ifunc' + 'altreg', 'smartalign', 'fp', 'ifunc', -- Directives. - 'absolute', 'bits', 'class', 'common', 'common', 'cpu', 'default', 'export', - 'extern', 'float', 'global', 'group', 'import', 'osabi', 'overlay', 'private', - 'public', '__SECT__', 'section', 'segment', 'stack', 'use16', 'use32', - 'use64', + 'absolute', 'bits', 'class', 'common', 'common', 'cpu', 'default', 'export', 'extern', 'float', + 'global', 'group', 'import', 'osabi', 'overlay', 'private', 'public', '__SECT__', 'section', + 'segment', 'stack', 'use16', 'use32', 'use64', -- Section Names. - '.bss', '.comment', '.data', '.lbss', '.ldata', '.lrodata', '.rdata', - '.rodata', '.tbss', '.tdata', '.text', + '.bss', '.comment', '.data', '.lbss', '.ldata', '.lrodata', '.rdata', '.rodata', '.tbss', + '.tdata', '.text', -- Section Qualifiers. - 'alloc', 'bss', 'code', 'exec', 'data', 'noalloc', 'nobits', 'noexec', - 'nowrite', 'progbits', 'rdata', 'tls', 'write', + 'alloc', 'bss', 'code', 'exec', 'data', 'noalloc', 'nobits', 'noexec', 'nowrite', 'progbits', + 'rdata', 'tls', 'write', -- Operators. - 'abs', 'rel', 'seg', 'wrt', 'strict', - '__utf16__', '__utf16be__', '__utf16le__', '__utf32__', '__utf32be__', - '__utf32le__', -}, '.')) + 'abs', 'rel', 'seg', 'wrt', 'strict', '__utf16__', '__utf16be__', '__utf16le__', '__utf32__', + '__utf32be__', '__utf32le__' +})) -- Instructions. -- awk '{print $1}'|uniq|tr '[:upper:]' '[:lower:]'| --- lua -e "for l in io.lines() do print(\"'\"..l..\"',\") end"|fmt -w 78 -local instruction = token('instruction', word_match{ +-- lua -e "for l in io.lines() do print(\"'\"..l..\"',\") end"|fmt -w 98 +lex:add_rule('instruction', token('instruction', word_match{ -- Special Instructions. - 'db', 'dd', 'do', 'dq', 'dt', 'dw', 'dy', 'resb', 'resd', 'reso', 'resq', - 'rest', 'resw', 'resy', + 'db', 'dd', 'do', 'dq', 'dt', 'dw', 'dy', 'resb', 'resd', 'reso', 'resq', 'rest', 'resw', 'resy', -- Conventional Instructions. - 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bb0_reset', - 'bb1_reset', 'bound', 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', - 'call', 'cbw', 'cdq', 'cdqe', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmp', - 'cmpsb', 'cmpsd', 'cmpsq', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', - 'cmpxchg16b', 'cpuid', 'cpu_read', 'cpu_write', 'cqo', 'cwd', 'cwde', 'daa', - 'das', 'dec', 'div', 'dmint', 'emms', 'enter', 'equ', 'f2xm1', 'fabs', - 'fadd', 'faddp', 'fbld', 'fbstp', 'fchs', 'fclex', 'fcmovb', 'fcmovbe', - 'fcmove', 'fcmovnb', 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu', 'fcom', - 'fcomi', 'fcomip', 'fcomp', 'fcompp', 'fcos', 'fdecstp', 'fdisi', 'fdiv', - 'fdivp', 'fdivr', 'fdivrp', 'femms', 'feni', 'ffree', 'ffreep', 'fiadd', - 'ficom', 'ficomp', 'fidiv', 'fidivr', 'fild', 'fimul', 'fincstp', 'finit', - 'fist', 'fistp', 'fisttp', 'fisub', 'fisubr', 'fld', 'fld1', 'fldcw', - 'fldenv', 'fldl2e', 'fldl2t', 'fldlg2', 'fldln2', 'fldpi', 'fldz', 'fmul', - 'fmulp', 'fnclex', 'fndisi', 'fneni', 'fninit', 'fnop', 'fnsave', 'fnstcw', - 'fnstenv', 'fnstsw', 'fpatan', 'fprem', 'fprem1', 'fptan', 'frndint', - 'frstor', 'fsave', 'fscale', 'fsetpm', 'fsin', 'fsincos', 'fsqrt', - 'fst', 'fstcw', 'fstenv', 'fstp', 'fstsw', 'fsub', 'fsubp', 'fsubr', - 'fsubrp', 'ftst', 'fucom', 'fucomi', 'fucomip', 'fucomp', 'fucompp', - 'fxam', 'fxch', 'fxtract', 'fyl2x', 'fyl2xp1', 'hlt', 'ibts', 'icebp', - 'idiv', 'imul', 'in', 'inc', 'incbin', 'insb', 'insd', 'insw', 'int', - 'int01', 'int1', 'int03', 'int3', 'into', 'invd', 'invpcid', 'invlpg', - 'invlpga', 'iret', 'iretd', 'iretq', 'iretw', 'jcxz', 'jecxz', 'jrcxz', - 'jmp', 'jmpe', 'lahf', 'lar', 'lds', 'lea', 'leave', 'les', 'lfence', - 'lfs', 'lgdt', 'lgs', 'lidt', 'lldt', 'lmsw', 'loadall', 'loadall286', - 'lodsb', 'lodsd', 'lodsq', 'lodsw', 'loop', 'loope', 'loopne', 'loopnz', - 'loopz', 'lsl', 'lss', 'ltr', 'mfence', 'monitor', 'mov', 'movd', 'movq', - 'movsb', 'movsd', 'movsq', 'movsw', 'movsx', 'movsxd', 'movsx', 'movzx', - 'mul', 'mwait', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', 'outsw', - 'packssdw', 'packsswb', 'packuswb', 'paddb', 'paddd', 'paddsb', 'paddsiw', - 'paddsw', 'paddusb', 'paddusw', 'paddw', 'pand', 'pandn', 'pause', 'paveb', - 'pavgusb', 'pcmpeqb', 'pcmpeqd', 'pcmpeqw', 'pcmpgtb', 'pcmpgtd', 'pcmpgtw', - 'pdistib', 'pf2id', 'pfacc', 'pfadd', 'pfcmpeq', 'pfcmpge', 'pfcmpgt', - 'pfmax', 'pfmin', 'pfmul', 'pfrcp', 'pfrcpit1', 'pfrcpit2', 'pfrsqit1', - 'pfrsqrt', 'pfsub', 'pfsubr', 'pi2fd', 'pmachriw', 'pmaddwd', 'pmagw', - 'pmulhriw', 'pmulhrwa', 'pmulhrwc', 'pmulhw', 'pmullw', 'pmvgezb', 'pmvlzb', - 'pmvnzb', 'pmvzb', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', - 'popfq', 'popfw', 'por', 'prefetch', 'prefetchw', 'pslld', 'psllq', - 'psllw', 'psrad', 'psraw', 'psrld', 'psrlq', 'psrlw', 'psubb', 'psubd', - 'psubsb', 'psubsiw', 'psubsw', 'psubusb', 'psubusw', 'psubw', 'punpckhbw', - 'punpckhdq', 'punpckhwd', 'punpcklbw', 'punpckldq', 'punpcklwd', 'push', - 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfq', 'pushfw', 'pxor', - 'rcl', 'rcr', 'rdshr', 'rdmsr', 'rdpmc', 'rdtsc', 'rdtscp', 'ret', 'retf', - 'retn', 'rol', 'ror', 'rdm', 'rsdc', 'rsldt', 'rsm', 'rsts', 'sahf', 'sal', - 'salc', 'sar', 'sbb', 'scasb', 'scasd', 'scasq', 'scasw', 'sfence', 'sgdt', - 'shl', 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'skinit', 'smi', 'smint', - 'smintold', 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosq', 'stosw', - 'str', 'sub', 'svdc', 'svldt', 'svts', 'swapgs', 'syscall', 'sysenter', - 'sysexit', 'sysret', 'test', 'ud0', 'ud1', 'ud2b', 'ud2', 'ud2a', 'umov', - 'verr', 'verw', 'fwait', 'wbinvd', 'wrshr', 'wrmsr', 'xadd', 'xbts', - 'xchg', 'xlatb', 'xlat', 'xor', 'cmova', 'cmovae', 'cmovb', 'cmovbe', - 'cmovc', 'cmove', 'cmovg', 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', - 'cmovnb', 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', - 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 'cmovp', - 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmovcc', 'ja', 'jae', 'jb', 'jbe', - 'jc', 'je', 'jg', 'jge', 'jl', 'jle', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', - 'jne', 'jng', 'jnge', 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', - 'jpe', 'jpo', 'js', 'jz', 'seta', 'setae', 'setb', 'setbe', 'setc', 'sete', - 'setg', 'setge', 'setl', 'setle', 'setna', 'setnae', 'setnb', 'setnbe', - 'setnc', 'setne', 'setng', 'setnge', 'setnl', 'setnle', 'setno', 'setnp', - 'setns', 'setnz', 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', - --" Katmai Streaming SIMD instructions (SSE -- a.k.a. KNI, XMM, MMX2). - 'addps', 'addss', 'andnps', 'andps', 'cmpeqps', 'cmpeqss', 'cmpleps', - 'cmpless', 'cmpltps', 'cmpltss', 'cmpneqps', 'cmpneqss', 'cmpnleps', - 'cmpnless', 'cmpnltps', 'cmpnltss', 'cmpordps', 'cmpordss', 'cmpunordps', - 'cmpunordss', 'cmpps', 'cmpss', 'comiss', 'cvtpi2ps', 'cvtps2pi', 'cvtsi2ss', - 'cvtss2si', 'cvttps2pi', 'cvttss2si', 'divps', 'divss', 'ldmxcsr', 'maxps', - 'maxss', 'minps', 'minss', 'movaps', 'movhps', 'movlhps', 'movlps', - 'movhlps', 'movmskps', 'movntps', 'movss', 'movups', 'mulps', 'mulss', - 'orps', 'rcpps', 'rcpss', 'rsqrtps', 'rsqrtss', 'shufps', 'sqrtps', 'sqrtss', - 'stmxcsr', 'subps', 'subss', 'ucomiss', 'unpckhps', 'unpcklps', 'xorps', + 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bb0_reset', 'bb1_reset', 'bound', 'bsf', + 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw', 'cdq', 'cdqe', 'clc', 'cld', 'cli', + 'clts', 'cmc', 'cmp', 'cmpsb', 'cmpsd', 'cmpsq', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', + 'cmpxchg16b', 'cpuid', 'cpu_read', 'cpu_write', 'cqo', 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', + 'dmint', 'emms', 'enter', 'equ', 'f2xm1', 'fabs', 'fadd', 'faddp', 'fbld', 'fbstp', 'fchs', + 'fclex', 'fcmovb', 'fcmovbe', 'fcmove', 'fcmovnb', 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu', + 'fcom', 'fcomi', 'fcomip', 'fcomp', 'fcompp', 'fcos', 'fdecstp', 'fdisi', 'fdiv', 'fdivp', + 'fdivr', 'fdivrp', 'femms', 'feni', 'ffree', 'ffreep', 'fiadd', 'ficom', 'ficomp', 'fidiv', + 'fidivr', 'fild', 'fimul', 'fincstp', 'finit', 'fist', 'fistp', 'fisttp', 'fisub', 'fisubr', + 'fld', 'fld1', 'fldcw', 'fldenv', 'fldl2e', 'fldl2t', 'fldlg2', 'fldln2', 'fldpi', 'fldz', 'fmul', + 'fmulp', 'fnclex', 'fndisi', 'fneni', 'fninit', 'fnop', 'fnsave', 'fnstcw', 'fnstenv', 'fnstsw', + 'fpatan', 'fprem', 'fprem1', 'fptan', 'frndint', 'frstor', 'fsave', 'fscale', 'fsetpm', 'fsin', + 'fsincos', 'fsqrt', 'fst', 'fstcw', 'fstenv', 'fstp', 'fstsw', 'fsub', 'fsubp', 'fsubr', 'fsubrp', + 'ftst', 'fucom', 'fucomi', 'fucomip', 'fucomp', 'fucompp', 'fxam', 'fxch', 'fxtract', 'fyl2x', + 'fyl2xp1', 'hlt', 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'incbin', 'insb', 'insd', 'insw', + 'int', 'int01', 'int1', 'int03', 'int3', 'into', 'invd', 'invpcid', 'invlpg', 'invlpga', 'iret', + 'iretd', 'iretq', 'iretw', 'jcxz', 'jecxz', 'jrcxz', 'jmp', 'jmpe', 'lahf', 'lar', 'lds', 'lea', + 'leave', 'les', 'lfence', 'lfs', 'lgdt', 'lgs', 'lidt', 'lldt', 'lmsw', 'loadall', 'loadall286', + 'lodsb', 'lodsd', 'lodsq', 'lodsw', 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', + 'ltr', 'mfence', 'monitor', 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsq', 'movsw', 'movsx', + 'movsxd', 'movsx', 'movzx', 'mul', 'mwait', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', + 'outsw', 'packssdw', 'packsswb', 'packuswb', 'paddb', 'paddd', 'paddsb', 'paddsiw', 'paddsw', + 'paddusb', 'paddusw', 'paddw', 'pand', 'pandn', 'pause', 'paveb', 'pavgusb', 'pcmpeqb', 'pcmpeqd', + 'pcmpeqw', 'pcmpgtb', 'pcmpgtd', 'pcmpgtw', 'pdistib', 'pf2id', 'pfacc', 'pfadd', 'pfcmpeq', + 'pfcmpge', 'pfcmpgt', 'pfmax', 'pfmin', 'pfmul', 'pfrcp', 'pfrcpit1', 'pfrcpit2', 'pfrsqit1', + 'pfrsqrt', 'pfsub', 'pfsubr', 'pi2fd', 'pmachriw', 'pmaddwd', 'pmagw', 'pmulhriw', 'pmulhrwa', + 'pmulhrwc', 'pmulhw', 'pmullw', 'pmvgezb', 'pmvlzb', 'pmvnzb', 'pmvzb', 'pop', 'popa', 'popad', + 'popaw', 'popf', 'popfd', 'popfq', 'popfw', 'por', 'prefetch', 'prefetchw', 'pslld', 'psllq', + 'psllw', 'psrad', 'psraw', 'psrld', 'psrlq', 'psrlw', 'psubb', 'psubd', 'psubsb', 'psubsiw', + 'psubsw', 'psubusb', 'psubusw', 'psubw', 'punpckhbw', 'punpckhdq', 'punpckhwd', 'punpcklbw', + 'punpckldq', 'punpcklwd', 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfq', + 'pushfw', 'pxor', 'rcl', 'rcr', 'rdshr', 'rdmsr', 'rdpmc', 'rdtsc', 'rdtscp', 'ret', 'retf', + 'retn', 'rol', 'ror', 'rdm', 'rsdc', 'rsldt', 'rsm', 'rsts', 'sahf', 'sal', 'salc', 'sar', 'sbb', + 'scasb', 'scasd', 'scasq', 'scasw', 'sfence', 'sgdt', 'shl', 'shld', 'shr', 'shrd', 'sidt', + 'sldt', 'skinit', 'smi', 'smint', 'smintold', 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', + 'stosq', 'stosw', 'str', 'sub', 'svdc', 'svldt', 'svts', 'swapgs', 'syscall', 'sysenter', + 'sysexit', 'sysret', 'test', 'ud0', 'ud1', 'ud2b', 'ud2', 'ud2a', 'umov', 'verr', 'verw', 'fwait', + 'wbinvd', 'wrshr', 'wrmsr', 'xadd', 'xbts', 'xchg', 'xlatb', 'xlat', 'xor', 'xor', 'cmova', + 'cmovae', 'cmovb', 'cmovbe', 'cmovc', 'cmove', 'cmovg', 'cmovge', 'cmovl', 'cmovle', 'cmovna', + 'cmovnae', 'cmovnb', 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', 'cmovnle', + 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', + 'cmovcc', 'ja', 'jae', 'jb', 'jbe', 'jc', 'je', 'jg', 'jge', 'jl', 'jle', 'jna', 'jnae', 'jnb', + 'jnbe', 'jnc', 'jne', 'jng', 'jnge', 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe', + 'jpo', 'js', 'jz', 'seta', 'setae', 'setb', 'setbe', 'setc', 'sete', 'setg', 'setge', 'setl', + 'setle', 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng', 'setnge', 'setnl', + 'setnle', 'setno', 'setnp', 'setns', 'setnz', 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', + -- Katmai Streaming SIMD instructions (SSE -- a.k.a. KNI XMM MMX2). + 'addps', 'addss', 'andnps', 'andps', 'cmpeqps', 'cmpeqss', 'cmpleps', 'cmpless', 'cmpltps', + 'cmpltss', 'cmpneqps', 'cmpneqss', 'cmpnleps', 'cmpnless', 'cmpnltps', 'cmpnltss', 'cmpordps', + 'cmpordss', 'cmpunordps', 'cmpunordss', 'cmpps', 'cmpss', 'comiss', 'cvtpi2ps', 'cvtps2pi', + 'cvtsi2ss', 'cvtss2si', 'cvttps2pi', 'cvttss2si', 'divps', 'divss', 'ldmxcsr', 'maxps', 'maxss', + 'minps', 'minss', 'movaps', 'movhps', 'movlhps', 'movlps', 'movhlps', 'movmskps', 'movntps', + 'movss', 'movups', 'mulps', 'mulss', 'orps', 'rcpps', 'rcpss', 'rsqrtps', 'rsqrtss', 'shufps', + 'sqrtps', 'sqrtss', 'stmxcsr', 'subps', 'subss', 'ucomiss', 'unpckhps', 'unpcklps', 'xorps', -- Introduced in Deschutes but necessary for SSE support. 'fxrstor', 'fxrstor64', 'fxsave', 'fxsave64', -- XSAVE group (AVX and extended state). - 'xgetbv', 'xsetbv', 'xsave', 'xsave64', 'xsaveopt', 'xsaveopt64', 'xrstor', - 'xrstor64', + 'xgetbv', 'xsetbv', 'xsave', 'xsave64', 'xsaveopt', 'xsaveopt64', 'xrstor', 'xrstor64', -- Generic memory operations. 'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2', 'sfence', -- New MMX instructions introduced in Katmai. - 'maskmovq', 'movntq', 'pavgb', 'pavgw', 'pextrw', 'pinsrw', 'pmaxsw', - 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', 'psadbw', 'pshufw', + 'maskmovq', 'movntq', 'pavgb', 'pavgw', 'pextrw', 'pinsrw', 'pmaxsw', 'pmaxub', 'pminsw', + 'pminub', 'pmovmskb', 'pmulhuw', 'psadbw', 'pshufw', -- AMD Enhanced 3DNow! (Athlon) instructions. 'pf2iw', 'pfnacc', 'pfpnacc', 'pi2fw', 'pswapd', -- Willamette SSE2 Cacheability Instructions. 'maskmovdqu', 'clflush', 'movntdq', 'movnti', 'movntpd', 'lfence', 'mfence', -- Willamette MMX instructions (SSE2 SIMD Integer Instructions). - 'movd', 'movdqa', 'movdqu', 'movdq2q', 'movq', 'movq2dq', 'packsswb', - 'packssdw', 'packuswb', 'paddb', 'paddw', 'paddd', 'paddq', 'paddsb', - 'paddsw', 'paddusb', 'paddusw', 'pand', 'pandn', 'pavgb', 'pavgw', 'pcmpeqb', - 'pcmpeqw', 'pcmpeqd', 'pcmpgtb', 'pcmpgtw', 'pcmpgtd', 'pextrw', 'pinsrw', - 'pmaddwd', 'pmaxsw', 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', - 'pmulhw', 'pmullw', 'pmuludq', 'por', 'psadbw', 'pshufd', 'pshufhw', - 'pshuflw', 'pslldq', 'psllw', 'pslld', 'psllq', 'psraw', 'psrad', 'psrldq', - 'psrlw', 'psrld', 'psrlq', 'psubb', 'psubw', 'psubd', 'psubq', 'psubsb', - 'psubsw', 'psubusb', 'psubusw', 'punpckhbw', 'punpckhwd', 'punpckhdq', + 'movd', 'movdqa', 'movdqu', 'movdq2q', 'movq', 'movq2dq', 'packsswb', 'packssdw', 'packuswb', + 'paddb', 'paddw', 'paddd', 'paddq', 'paddsb', 'paddsw', 'paddusb', 'paddusw', 'pand', 'pandn', + 'pavgb', 'pavgw', 'pcmpeqb', 'pcmpeqw', 'pcmpeqd', 'pcmpgtb', 'pcmpgtw', 'pcmpgtd', 'pextrw', + 'pinsrw', 'pmaddwd', 'pmaxsw', 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', 'pmulhw', + 'pmullw', 'pmuludq', 'por', 'psadbw', 'pshufd', 'pshufhw', 'pshuflw', 'pslldq', 'psllw', 'pslld', + 'psllq', 'psraw', 'psrad', 'psrldq', 'psrlw', 'psrld', 'psrlq', 'psubb', 'psubw', 'psubd', + 'psubq', 'psubsb', 'psubsw', 'psubusb', 'psubusw', 'punpckhbw', 'punpckhwd', 'punpckhdq', 'punpckhqdq', 'punpcklbw', 'punpcklwd', 'punpckldq', 'punpcklqdq', 'pxor', -- Willamette Streaming SIMD instructions (SSE2). - 'addpd', 'addsd', 'andnpd', 'andpd', 'cmpeqpd', 'cmpeqsd', 'cmplepd', - 'cmplesd', 'cmpltpd', 'cmpltsd', 'cmpneqpd', 'cmpneqsd', 'cmpnlepd', - 'cmpnlesd', 'cmpnltpd', 'cmpnltsd', 'cmpordpd', 'cmpordsd', 'cmpunordpd', - 'cmpunordsd', 'cmppd', 'cmpsd', 'comisd', 'cvtdq2pd', 'cvtdq2ps', - 'cvtpd2dq', 'cvtpd2pi', 'cvtpd2ps', 'cvtpi2pd', 'cvtps2dq', 'cvtps2pd', - 'cvtsd2si', 'cvtsd2ss', 'cvtsi2sd', 'cvtss2sd', 'cvttpd2pi', 'cvttpd2dq', - 'cvttps2dq', 'cvttsd2si', 'divpd', 'divsd', 'maxpd', 'maxsd', 'minpd', - 'minsd', 'movapd', 'movhpd', 'movlpd', 'movmskpd', 'movsd', 'movupd', - 'mulpd', 'mulsd', 'orpd', 'shufpd', 'sqrtpd', 'sqrtsd', 'subpd', 'subsd', - 'ucomisd', 'unpckhpd', 'unpcklpd', 'xorpd', + 'addpd', 'addsd', 'andnpd', 'andpd', 'cmpeqpd', 'cmpeqsd', 'cmplepd', 'cmplesd', 'cmpltpd', + 'cmpltsd', 'cmpneqpd', 'cmpneqsd', 'cmpnlepd', 'cmpnlesd', 'cmpnltpd', 'cmpnltsd', 'cmpordpd', + 'cmpordsd', 'cmpunordpd', 'cmpunordsd', 'cmppd', 'cmpsd', 'comisd', 'cvtdq2pd', 'cvtdq2ps', + 'cvtpd2dq', 'cvtpd2pi', 'cvtpd2ps', 'cvtpi2pd', 'cvtps2dq', 'cvtps2pd', 'cvtsd2si', 'cvtsd2ss', + 'cvtsi2sd', 'cvtss2sd', 'cvttpd2pi', 'cvttpd2dq', 'cvttps2dq', 'cvttsd2si', 'divpd', 'divsd', + 'maxpd', 'maxsd', 'minpd', 'minsd', 'movapd', 'movhpd', 'movlpd', 'movmskpd', 'movsd', 'movupd', + 'mulpd', 'mulsd', 'orpd', 'shufpd', 'sqrtpd', 'sqrtsd', 'subpd', 'subsd', 'ucomisd', 'unpckhpd', + 'unpcklpd', 'xorpd', -- Prescott New Instructions (SSE3). - 'addsubpd', 'addsubps', 'haddpd', 'haddps', 'hsubpd', 'hsubps', 'lddqu', - 'movddup', 'movshdup', 'movsldup', + 'addsubpd', 'addsubps', 'haddpd', 'haddps', 'hsubpd', 'hsubps', 'lddqu', 'movddup', 'movshdup', + 'movsldup', -- VMX/SVM Instructions. - 'clgi', 'stgi', 'vmcall', 'vmclear', 'vmfunc', 'vmlaunch', 'vmload', - 'vmmcall', 'vmptrld', 'vmptrst', 'vmread', 'vmresume', 'vmrun', 'vmsave', - 'vmwrite', 'vmxoff', 'vmxon', + 'clgi', 'stgi', 'vmcall', 'vmclear', 'vmfunc', 'vmlaunch', 'vmload', 'vmmcall', 'vmptrld', + 'vmptrst', 'vmread', 'vmresume', 'vmrun', 'vmsave', 'vmwrite', 'vmxoff', 'vmxon', -- Extended Page Tables VMX instructions. 'invept', 'invvpid', -- Tejas New Instructions (SSSE3). - 'pabsb', 'pabsw', 'pabsd', 'palignr', 'phaddw', 'phaddd', 'phaddsw', - 'phsubw', 'phsubd', 'phsubsw', 'pmaddubsw', 'pmulhrsw', 'pshufb', 'psignb', - 'psignw', 'psignd', + 'pabsb', 'pabsw', 'pabsd', 'palignr', 'phaddw', 'phaddd', 'phaddsw', 'phsubw', 'phsubd', + 'phsubsw', 'pmaddubsw', 'pmulhrsw', 'pshufb', 'psignb', 'psignw', 'psignd', -- AMD SSE4A. 'extrq', 'insertq', 'movntsd', 'movntss', -- New instructions in Barcelona. 'lzcnt', -- Penryn New Instructions (SSE4.1). - 'blendpd', 'blendps', 'blendvpd', 'blendvps', 'dppd', 'dpps', 'extractps', - 'insertps', 'movntdqa', 'mpsadbw', 'packusdw', 'pblendvb', 'pblendw', - 'pcmpeqq', 'pextrb', 'pextrd', 'pextrq', 'pextrw', 'phminposuw', 'pinsrb', - 'pinsrd', 'pinsrq', 'pmaxsb', 'pmaxsd', 'pmaxud', 'pmaxuw', 'pminsb', - 'pminsd', 'pminud', 'pminuw', 'pmovsxbw', 'pmovsxbd', 'pmovsxbq', 'pmovsxwd', - 'pmovsxwq', 'pmovsxdq', 'pmovzxbw', 'pmovzxbd', 'pmovzxbq', 'pmovzxwd', - 'pmovzxwq', 'pmovzxdq', 'pmuldq', 'pmulld', 'ptest', 'roundpd', 'roundps', - 'roundsd', 'roundss', + 'blendpd', 'blendps', 'blendvpd', 'blendvps', 'dppd', 'dpps', 'extractps', 'insertps', 'movntdqa', + 'mpsadbw', 'packusdw', 'pblendvb', 'pblendw', 'pcmpeqq', 'pextrb', 'pextrd', 'pextrq', 'pextrw', + 'phminposuw', 'pinsrb', 'pinsrd', 'pinsrq', 'pmaxsb', 'pmaxsd', 'pmaxud', 'pmaxuw', 'pminsb', + 'pminsd', 'pminud', 'pminuw', 'pmovsxbw', 'pmovsxbd', 'pmovsxbq', 'pmovsxwd', 'pmovsxwq', + 'pmovsxdq', 'pmovzxbw', 'pmovzxbd', 'pmovzxbq', 'pmovzxwd', 'pmovzxwq', 'pmovzxdq', 'pmuldq', + 'pmulld', 'ptest', 'roundpd', 'roundps', 'roundsd', 'roundss', -- Nehalem New Instructions (SSE4.2). - 'crc32', 'pcmpestri', 'pcmpestrm', 'pcmpistri', 'pcmpistrm', 'pcmpgtq', - 'popcnt', + 'crc32', 'pcmpestri', 'pcmpestrm', 'pcmpistri', 'pcmpistrm', 'pcmpgtq', 'popcnt', -- Intel SMX. 'getsec', -- Geode (Cyrix) 3DNow! additions. @@ -216,271 +156,234 @@ local instruction = token('instruction', word_match{ -- Intel AES instructions. 'aesenc', 'aesenclast', 'aesdec', 'aesdeclast', 'aesimc', 'aeskeygenassist', -- Intel AVX AES instructions. - 'vaesenc', 'vaesenclast', 'vaesdec', 'vaesdeclast', 'vaesimc', - 'vaeskeygenassist', + 'vaesenc', 'vaesenclast', 'vaesdec', 'vaesdeclast', 'vaesimc', 'vaeskeygenassist', -- Intel AVX instructions. - 'vaddpd', 'vaddps', 'vaddsd', 'vaddss', 'vaddsubpd', 'vaddsubps', - 'vandpd', 'vandps', 'vandnpd', 'vandnps', 'vblendpd', 'vblendps', - 'vblendvpd', 'vblendvps', 'vbroadcastss', 'vbroadcastsd', 'vbroadcastf128', - 'vcmpeq_ospd', 'vcmpeqpd', 'vcmplt_ospd', 'vcmpltpd', 'vcmple_ospd', - 'vcmplepd', 'vcmpunord_qpd', 'vcmpunordpd', 'vcmpneq_uqpd', 'vcmpneqpd', - 'vcmpnlt_uspd', 'vcmpnltpd', 'vcmpnle_uspd', 'vcmpnlepd', 'vcmpord_qpd', - 'vcmpordpd', 'vcmpeq_uqpd', 'vcmpnge_uspd', 'vcmpngepd', 'vcmpngt_uspd', - 'vcmpngtpd', 'vcmpfalse_oqpd', 'vcmpfalsepd', 'vcmpneq_oqpd', 'vcmpge_ospd', - 'vcmpgepd', 'vcmpgt_ospd', 'vcmpgtpd', 'vcmptrue_uqpd', 'vcmptruepd', - 'vcmpeq_ospd', 'vcmplt_oqpd', 'vcmple_oqpd', 'vcmpunord_spd', 'vcmpneq_uspd', - 'vcmpnlt_uqpd', 'vcmpnle_uqpd', 'vcmpord_spd', 'vcmpeq_uspd', 'vcmpnge_uqpd', - 'vcmpngt_uqpd', 'vcmpfalse_ospd', 'vcmpneq_ospd', 'vcmpge_oqpd', - 'vcmpgt_oqpd', 'vcmptrue_uspd', 'vcmppd', 'vcmpeq_osps', 'vcmpeqps', - 'vcmplt_osps', 'vcmpltps', 'vcmple_osps', 'vcmpleps', 'vcmpunord_qps', - 'vcmpunordps', 'vcmpneq_uqps', 'vcmpneqps', 'vcmpnlt_usps', 'vcmpnltps', - 'vcmpnle_usps', 'vcmpnleps', 'vcmpord_qps', 'vcmpordps', 'vcmpeq_uqps', - 'vcmpnge_usps', 'vcmpngeps', 'vcmpngt_usps', 'vcmpngtps', 'vcmpfalse_oqps', - 'vcmpfalseps', 'vcmpneq_oqps', 'vcmpge_osps', 'vcmpgeps', 'vcmpgt_osps', - 'vcmpgtps', 'vcmptrue_uqps', 'vcmptrueps', 'vcmpeq_osps', 'vcmplt_oqps', - 'vcmple_oqps', 'vcmpunord_sps', 'vcmpneq_usps', 'vcmpnlt_uqps', - 'vcmpnle_uqps', 'vcmpord_sps', 'vcmpeq_usps', 'vcmpnge_uqps', - 'vcmpngt_uqps', 'vcmpfalse_osps', 'vcmpneq_osps', 'vcmpge_oqps', - 'vcmpgt_oqps', 'vcmptrue_usps', 'vcmpps', 'vcmpeq_ossd', 'vcmpeqsd', - 'vcmplt_ossd', 'vcmpltsd', 'vcmple_ossd', 'vcmplesd', 'vcmpunord_qsd', - 'vcmpunordsd', 'vcmpneq_uqsd', 'vcmpneqsd', 'vcmpnlt_ussd', 'vcmpnltsd', - 'vcmpnle_ussd', 'vcmpnlesd', 'vcmpord_qsd', 'vcmpordsd', 'vcmpeq_uqsd', - 'vcmpnge_ussd', 'vcmpngesd', 'vcmpngt_ussd', 'vcmpngtsd', 'vcmpfalse_oqsd', - 'vcmpfalsesd', 'vcmpneq_oqsd', 'vcmpge_ossd', 'vcmpgesd', 'vcmpgt_ossd', - 'vcmpgtsd', 'vcmptrue_uqsd', 'vcmptruesd', 'vcmpeq_ossd', 'vcmplt_oqsd', - 'vcmple_oqsd', 'vcmpunord_ssd', 'vcmpneq_ussd', 'vcmpnlt_uqsd', - 'vcmpnle_uqsd', 'vcmpord_ssd', 'vcmpeq_ussd', 'vcmpnge_uqsd', - 'vcmpngt_uqsd', 'vcmpfalse_ossd', 'vcmpneq_ossd', 'vcmpge_oqsd', - 'vcmpgt_oqsd', 'vcmptrue_ussd', 'vcmpsd', 'vcmpeq_osss', 'vcmpeqss', - 'vcmplt_osss', 'vcmpltss', 'vcmple_osss', 'vcmpless', 'vcmpunord_qss', - 'vcmpunordss', 'vcmpneq_uqss', 'vcmpneqss', 'vcmpnlt_usss', 'vcmpnltss', - 'vcmpnle_usss', 'vcmpnless', 'vcmpord_qss', 'vcmpordss', 'vcmpeq_uqss', - 'vcmpnge_usss', 'vcmpngess', 'vcmpngt_usss', 'vcmpngtss', 'vcmpfalse_oqss', - 'vcmpfalsess', 'vcmpneq_oqss', 'vcmpge_osss', 'vcmpgess', 'vcmpgt_osss', - 'vcmpgtss', 'vcmptrue_uqss', 'vcmptruess', 'vcmpeq_osss', 'vcmplt_oqss', - 'vcmple_oqss', 'vcmpunord_sss', 'vcmpneq_usss', 'vcmpnlt_uqss', - 'vcmpnle_uqss', 'vcmpord_sss', 'vcmpeq_usss', 'vcmpnge_uqss', - 'vcmpngt_uqss', 'vcmpfalse_osss', 'vcmpneq_osss', 'vcmpge_oqss', - 'vcmpgt_oqss', 'vcmptrue_usss', 'vcmpss', 'vcomisd', 'vcomiss', - 'vcvtdq2pd', 'vcvtdq2ps', 'vcvtpd2dq', 'vcvtpd2ps', 'vcvtps2dq', - 'vcvtps2pd', 'vcvtsd2si', 'vcvtsd2ss', 'vcvtsi2sd', 'vcvtsi2ss', - 'vcvtss2sd', 'vcvtss2si', 'vcvttpd2dq', 'vcvttps2dq', 'vcvttsd2si', - 'vcvttss2si', 'vdivpd', 'vdivps', 'vdivsd', 'vdivss', 'vdppd', 'vdpps', - 'vextractf128', 'vextractps', 'vhaddpd', 'vhaddps', 'vhsubpd', 'vhsubps', - 'vinsertf128', 'vinsertps', 'vlddqu', 'vldqqu', 'vlddqu', 'vldmxcsr', - 'vmaskmovdqu', 'vmaskmovps', 'vmaskmovpd', 'vmaxpd', 'vmaxps', 'vmaxsd', - 'vmaxss', 'vminpd', 'vminps', 'vminsd', 'vminss', 'vmovapd', 'vmovaps', - 'vmovd', 'vmovq', 'vmovddup', 'vmovdqa', 'vmovqqa', 'vmovdqa', 'vmovdqu', - 'vmovqqu', 'vmovdqu', 'vmovhlps', 'vmovhpd', 'vmovhps', 'vmovlhps', - 'vmovlpd', 'vmovlps', 'vmovmskpd', 'vmovmskps', 'vmovntdq', 'vmovntqq', - 'vmovntdq', 'vmovntdqa', 'vmovntpd', 'vmovntps', 'vmovsd', 'vmovshdup', - 'vmovsldup', 'vmovss', 'vmovupd', 'vmovups', 'vmpsadbw', 'vmulpd', - 'vmulps', 'vmulsd', 'vmulss', 'vorpd', 'vorps', 'vpabsb', 'vpabsw', - 'vpabsd', 'vpacksswb', 'vpackssdw', 'vpackuswb', 'vpackusdw', 'vpaddb', - 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', - 'vpalignr', 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', - 'vpcmpestri', 'vpcmpestrm', 'vpcmpistri', 'vpcmpistrm', 'vpcmpeqb', - 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', - 'vpcmpgtq', 'vpermilpd', 'vpermilps', 'vperm2f128', 'vpextrb', 'vpextrw', - 'vpextrd', 'vpextrq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphminposuw', - 'vphsubw', 'vphsubd', 'vphsubsw', 'vpinsrb', 'vpinsrw', 'vpinsrd', - 'vpinsrq', 'vpmaddwd', 'vpmaddubsw', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd', - 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', - 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq', - 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq', - 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmulhuw', 'vpmulhrsw', 'vpmulhw', - 'vpmullw', 'vpmulld', 'vpmuludq', 'vpmuldq', 'vpor', 'vpsadbw', 'vpshufb', - 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd', - 'vpslldq', 'vpsrldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', - 'vpsrlw', 'vpsrld', 'vpsrlq', 'vptest', 'vpsubb', 'vpsubw', 'vpsubd', - 'vpsubq', 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', - 'vpunpckhwd', 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', - 'vpunpckldq', 'vpunpcklqdq', 'vpxor', 'vrcpps', 'vrcpss', 'vrsqrtps', - 'vrsqrtss', 'vroundpd', 'vroundps', 'vroundsd', 'vroundss', 'vshufpd', - 'vshufps', 'vsqrtpd', 'vsqrtps', 'vsqrtsd', 'vsqrtss', 'vstmxcsr', 'vsubpd', - 'vsubps', 'vsubsd', 'vsubss', 'vtestps', 'vtestpd', 'vucomisd', 'vucomiss', - 'vunpckhpd', 'vunpckhps', 'vunpcklpd', 'vunpcklps', 'vxorpd', 'vxorps', - 'vzeroall', 'vzeroupper', + 'vaddpd', 'vaddps', 'vaddsd', 'vaddss', 'vaddsubpd', 'vaddsubps', 'vandpd', 'vandps', 'vandnpd', + 'vandnps', 'vblendpd', 'vblendps', 'vblendvpd', 'vblendvps', 'vbroadcastss', 'vbroadcastsd', + 'vbroadcastf128', 'vcmpeq_ospd', 'vcmpeqpd', 'vcmplt_ospd', 'vcmpltpd', 'vcmple_ospd', 'vcmplepd', + 'vcmpunord_qpd', 'vcmpunordpd', 'vcmpneq_uqpd', 'vcmpneqpd', 'vcmpnlt_uspd', 'vcmpnltpd', + 'vcmpnle_uspd', 'vcmpnlepd', 'vcmpord_qpd', 'vcmpordpd', 'vcmpeq_uqpd', 'vcmpnge_uspd', + 'vcmpngepd', 'vcmpngt_uspd', 'vcmpngtpd', 'vcmpfalse_oqpd', 'vcmpfalsepd', 'vcmpneq_oqpd', + 'vcmpge_ospd', 'vcmpgepd', 'vcmpgt_ospd', 'vcmpgtpd', 'vcmptrue_uqpd', 'vcmptruepd', + 'vcmpeq_ospd', 'vcmplt_oqpd', 'vcmple_oqpd', 'vcmpunord_spd', 'vcmpneq_uspd', 'vcmpnlt_uqpd', + 'vcmpnle_uqpd', 'vcmpord_spd', 'vcmpeq_uspd', 'vcmpnge_uqpd', 'vcmpngt_uqpd', 'vcmpfalse_ospd', + 'vcmpneq_ospd', 'vcmpge_oqpd', 'vcmpgt_oqpd', 'vcmptrue_uspd', 'vcmppd', 'vcmpeq_osps', + 'vcmpeqps', 'vcmplt_osps', 'vcmpltps', 'vcmple_osps', 'vcmpleps', 'vcmpunord_qps', 'vcmpunordps', + 'vcmpneq_uqps', 'vcmpneqps', 'vcmpnlt_usps', 'vcmpnltps', 'vcmpnle_usps', 'vcmpnleps', + 'vcmpord_qps', 'vcmpordps', 'vcmpeq_uqps', 'vcmpnge_usps', 'vcmpngeps', 'vcmpngt_usps', + 'vcmpngtps', 'vcmpfalse_oqps', 'vcmpfalseps', 'vcmpneq_oqps', 'vcmpge_osps', 'vcmpgeps', + 'vcmpgt_osps', 'vcmpgtps', 'vcmptrue_uqps', 'vcmptrueps', 'vcmpeq_osps', 'vcmplt_oqps', + 'vcmple_oqps', 'vcmpunord_sps', 'vcmpneq_usps', 'vcmpnlt_uqps', 'vcmpnle_uqps', 'vcmpord_sps', + 'vcmpeq_usps', 'vcmpnge_uqps', 'vcmpngt_uqps', 'vcmpfalse_osps', 'vcmpneq_osps', 'vcmpge_oqps', + 'vcmpgt_oqps', 'vcmptrue_usps', 'vcmpps', 'vcmpeq_ossd', 'vcmpeqsd', 'vcmplt_ossd', 'vcmpltsd', + 'vcmple_ossd', 'vcmplesd', 'vcmpunord_qsd', 'vcmpunordsd', 'vcmpneq_uqsd', 'vcmpneqsd', + 'vcmpnlt_ussd', 'vcmpnltsd', 'vcmpnle_ussd', 'vcmpnlesd', 'vcmpord_qsd', 'vcmpordsd', + 'vcmpeq_uqsd', 'vcmpnge_ussd', 'vcmpngesd', 'vcmpngt_ussd', 'vcmpngtsd', 'vcmpfalse_oqsd', + 'vcmpfalsesd', 'vcmpneq_oqsd', 'vcmpge_ossd', 'vcmpgesd', 'vcmpgt_ossd', 'vcmpgtsd', + 'vcmptrue_uqsd', 'vcmptruesd', 'vcmpeq_ossd', 'vcmplt_oqsd', 'vcmple_oqsd', 'vcmpunord_ssd', + 'vcmpneq_ussd', 'vcmpnlt_uqsd', 'vcmpnle_uqsd', 'vcmpord_ssd', 'vcmpeq_ussd', 'vcmpnge_uqsd', + 'vcmpngt_uqsd', 'vcmpfalse_ossd', 'vcmpneq_ossd', 'vcmpge_oqsd', 'vcmpgt_oqsd', 'vcmptrue_ussd', + 'vcmpsd', 'vcmpeq_osss', 'vcmpeqss', 'vcmplt_osss', 'vcmpltss', 'vcmple_osss', 'vcmpless', + 'vcmpunord_qss', 'vcmpunordss', 'vcmpneq_uqss', 'vcmpneqss', 'vcmpnlt_usss', 'vcmpnltss', + 'vcmpnle_usss', 'vcmpnless', 'vcmpord_qss', 'vcmpordss', 'vcmpeq_uqss', 'vcmpnge_usss', + 'vcmpngess', 'vcmpngt_usss', 'vcmpngtss', 'vcmpfalse_oqss', 'vcmpfalsess', 'vcmpneq_oqss', + 'vcmpge_osss', 'vcmpgess', 'vcmpgt_osss', 'vcmpgtss', 'vcmptrue_uqss', 'vcmptruess', + 'vcmpeq_osss', 'vcmplt_oqss', 'vcmple_oqss', 'vcmpunord_sss', 'vcmpneq_usss', 'vcmpnlt_uqss', + 'vcmpnle_uqss', 'vcmpord_sss', 'vcmpeq_usss', 'vcmpnge_uqss', 'vcmpngt_uqss', 'vcmpfalse_osss', + 'vcmpneq_osss', 'vcmpge_oqss', 'vcmpgt_oqss', 'vcmptrue_usss', 'vcmpss', 'vcomisd', 'vcomiss', + 'vcvtdq2pd', 'vcvtdq2ps', 'vcvtpd2dq', 'vcvtpd2ps', 'vcvtps2dq', 'vcvtps2pd', 'vcvtsd2si', + 'vcvtsd2ss', 'vcvtsi2sd', 'vcvtsi2ss', 'vcvtss2sd', 'vcvtss2si', 'vcvttpd2dq', 'vcvttps2dq', + 'vcvttsd2si', 'vcvttss2si', 'vdivpd', 'vdivps', 'vdivsd', 'vdivss', 'vdppd', 'vdpps', + 'vextractf128', 'vextractps', 'vhaddpd', 'vhaddps', 'vhsubpd', 'vhsubps', 'vinsertf128', + 'vinsertps', 'vlddqu', 'vldqqu', 'vlddqu', 'vldmxcsr', 'vmaskmovdqu', 'vmaskmovps', 'vmaskmovpd', + 'vmaxpd', 'vmaxps', 'vmaxsd', 'vmaxss', 'vminpd', 'vminps', 'vminsd', 'vminss', 'vmovapd', + 'vmovaps', 'vmovd', 'vmovq', 'vmovddup', 'vmovdqa', 'vmovqqa', 'vmovdqa', 'vmovdqu', 'vmovqqu', + 'vmovdqu', 'vmovhlps', 'vmovhpd', 'vmovhps', 'vmovlhps', 'vmovlpd', 'vmovlps', 'vmovmskpd', + 'vmovmskps', 'vmovntdq', 'vmovntqq', 'vmovntdq', 'vmovntdqa', 'vmovntpd', 'vmovntps', 'vmovsd', + 'vmovshdup', 'vmovsldup', 'vmovss', 'vmovupd', 'vmovups', 'vmpsadbw', 'vmulpd', 'vmulps', + 'vmulsd', 'vmulss', 'vorpd', 'vorps', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw', + 'vpackuswb', 'vpackusdw', 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw', + 'vpaddusb', 'vpaddusw', 'vpalignr', 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', + 'vpblendw', 'vpcmpestri', 'vpcmpestrm', 'vpcmpistri', 'vpcmpistrm', 'vpcmpeqb', 'vpcmpeqw', + 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', 'vpcmpgtq', 'vpermilpd', 'vpermilps', + 'vperm2f128', 'vpextrb', 'vpextrw', 'vpextrd', 'vpextrq', 'vphaddw', 'vphaddd', 'vphaddsw', + 'vphminposuw', 'vphsubw', 'vphsubd', 'vphsubsw', 'vpinsrb', 'vpinsrw', 'vpinsrd', 'vpinsrq', + 'vpmaddwd', 'vpmaddubsw', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd', 'vpmaxub', 'vpmaxuw', 'vpmaxud', + 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', + 'vpmovsxbd', 'vpmovsxbq', 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', + 'vpmovzxbq', 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmulhuw', 'vpmulhrsw', 'vpmulhw', 'vpmullw', + 'vpmulld', 'vpmuludq', 'vpmuldq', 'vpor', 'vpsadbw', 'vpshufb', 'vpshufd', 'vpshufhw', 'vpshuflw', + 'vpsignb', 'vpsignw', 'vpsignd', 'vpslldq', 'vpsrldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', + 'vpsrad', 'vpsrlw', 'vpsrld', 'vpsrlq', 'vptest', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq', + 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', 'vpunpckhwd', 'vpunpckhdq', + 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq', 'vpunpcklqdq', 'vpxor', 'vrcpps', + 'vrcpss', 'vrsqrtps', 'vrsqrtss', 'vroundpd', 'vroundps', 'vroundsd', 'vroundss', 'vshufpd', + 'vshufps', 'vsqrtpd', 'vsqrtps', 'vsqrtsd', 'vsqrtss', 'vstmxcsr', 'vsubpd', 'vsubps', 'vsubsd', + 'vsubss', 'vtestps', 'vtestpd', 'vucomisd', 'vucomiss', 'vunpckhpd', 'vunpckhps', 'vunpcklpd', + 'vunpcklps', 'vxorpd', 'vxorps', 'vzeroall', 'vzeroupper', -- Intel Carry-Less Multiplication instructions (CLMUL). 'pclmullqlqdq', 'pclmulhqlqdq', 'pclmullqhqdq', 'pclmulhqhqdq', 'pclmulqdq', -- Intel AVX Carry-Less Multiplication instructions (CLMUL). - 'vpclmullqlqdq', 'vpclmulhqlqdq', 'vpclmullqhqdq', 'vpclmulhqhqdq', - 'vpclmulqdq', + 'vpclmullqlqdq', 'vpclmulhqlqdq', 'vpclmullqhqdq', 'vpclmulhqhqdq', 'vpclmulqdq', -- Intel Fused Multiply-Add instructions (FMA). - 'vfmadd132ps', 'vfmadd132pd', 'vfmadd312ps', 'vfmadd312pd', 'vfmadd213ps', - 'vfmadd213pd', 'vfmadd123ps', 'vfmadd123pd', 'vfmadd231ps', 'vfmadd231pd', - 'vfmadd321ps', 'vfmadd321pd', 'vfmaddsub132ps', 'vfmaddsub132pd', - 'vfmaddsub312ps', 'vfmaddsub312pd', 'vfmaddsub213ps', 'vfmaddsub213pd', - 'vfmaddsub123ps', 'vfmaddsub123pd', 'vfmaddsub231ps', 'vfmaddsub231pd', - 'vfmaddsub321ps', 'vfmaddsub321pd', 'vfmsub132ps', 'vfmsub132pd', - 'vfmsub312ps', 'vfmsub312pd', 'vfmsub213ps', 'vfmsub213pd', 'vfmsub123ps', - 'vfmsub123pd', 'vfmsub231ps', 'vfmsub231pd', 'vfmsub321ps', 'vfmsub321pd', - 'vfmsubadd132ps', 'vfmsubadd132pd', 'vfmsubadd312ps', 'vfmsubadd312pd', - 'vfmsubadd213ps', 'vfmsubadd213pd', 'vfmsubadd123ps', 'vfmsubadd123pd', - 'vfmsubadd231ps', 'vfmsubadd231pd', 'vfmsubadd321ps', 'vfmsubadd321pd', - 'vfnmadd132ps', 'vfnmadd132pd', 'vfnmadd312ps', 'vfnmadd312pd', - 'vfnmadd213ps', 'vfnmadd213pd', 'vfnmadd123ps', 'vfnmadd123pd', - 'vfnmadd231ps', 'vfnmadd231pd', 'vfnmadd321ps', 'vfnmadd321pd', - 'vfnmsub132ps', 'vfnmsub132pd', 'vfnmsub312ps', 'vfnmsub312pd', - 'vfnmsub213ps', 'vfnmsub213pd', 'vfnmsub123ps', 'vfnmsub123pd', - 'vfnmsub231ps', 'vfnmsub231pd', 'vfnmsub321ps', 'vfnmsub321pd', - 'vfmadd132ss', 'vfmadd132sd', 'vfmadd312ss', 'vfmadd312sd', 'vfmadd213ss', - 'vfmadd213sd', 'vfmadd123ss', 'vfmadd123sd', 'vfmadd231ss', 'vfmadd231sd', - 'vfmadd321ss', 'vfmadd321sd', 'vfmsub132ss', 'vfmsub132sd', 'vfmsub312ss', - 'vfmsub312sd', 'vfmsub213ss', 'vfmsub213sd', 'vfmsub123ss', 'vfmsub123sd', - 'vfmsub231ss', 'vfmsub231sd', 'vfmsub321ss', 'vfmsub321sd', 'vfnmadd132ss', - 'vfnmadd132sd', 'vfnmadd312ss', 'vfnmadd312sd', 'vfnmadd213ss', - 'vfnmadd213sd', 'vfnmadd123ss', 'vfnmadd123sd', 'vfnmadd231ss', - 'vfnmadd231sd', 'vfnmadd321ss', 'vfnmadd321sd', 'vfnmsub132ss', - 'vfnmsub132sd', 'vfnmsub312ss', 'vfnmsub312sd', 'vfnmsub213ss', - 'vfnmsub213sd', 'vfnmsub123ss', 'vfnmsub123sd', 'vfnmsub231ss', - 'vfnmsub231sd', 'vfnmsub321ss', 'vfnmsub321sd', + 'vfmadd132ps', 'vfmadd132pd', 'vfmadd312ps', 'vfmadd312pd', 'vfmadd213ps', 'vfmadd213pd', + 'vfmadd123ps', 'vfmadd123pd', 'vfmadd231ps', 'vfmadd231pd', 'vfmadd321ps', 'vfmadd321pd', + 'vfmaddsub132ps', 'vfmaddsub132pd', 'vfmaddsub312ps', 'vfmaddsub312pd', 'vfmaddsub213ps', + 'vfmaddsub213pd', 'vfmaddsub123ps', 'vfmaddsub123pd', 'vfmaddsub231ps', 'vfmaddsub231pd', + 'vfmaddsub321ps', 'vfmaddsub321pd', 'vfmsub132ps', 'vfmsub132pd', 'vfmsub312ps', 'vfmsub312pd', + 'vfmsub213ps', 'vfmsub213pd', 'vfmsub123ps', 'vfmsub123pd', 'vfmsub231ps', 'vfmsub231pd', + 'vfmsub321ps', 'vfmsub321pd', 'vfmsubadd132ps', 'vfmsubadd132pd', 'vfmsubadd312ps', + 'vfmsubadd312pd', 'vfmsubadd213ps', 'vfmsubadd213pd', 'vfmsubadd123ps', 'vfmsubadd123pd', + 'vfmsubadd231ps', 'vfmsubadd231pd', 'vfmsubadd321ps', 'vfmsubadd321pd', 'vfnmadd132ps', + 'vfnmadd132pd', 'vfnmadd312ps', 'vfnmadd312pd', 'vfnmadd213ps', 'vfnmadd213pd', 'vfnmadd123ps', + 'vfnmadd123pd', 'vfnmadd231ps', 'vfnmadd231pd', 'vfnmadd321ps', 'vfnmadd321pd', 'vfnmsub132ps', + 'vfnmsub132pd', 'vfnmsub312ps', 'vfnmsub312pd', 'vfnmsub213ps', 'vfnmsub213pd', 'vfnmsub123ps', + 'vfnmsub123pd', 'vfnmsub231ps', 'vfnmsub231pd', 'vfnmsub321ps', 'vfnmsub321pd', 'vfmadd132ss', + 'vfmadd132sd', 'vfmadd312ss', 'vfmadd312sd', 'vfmadd213ss', 'vfmadd213sd', 'vfmadd123ss', + 'vfmadd123sd', 'vfmadd231ss', 'vfmadd231sd', 'vfmadd321ss', 'vfmadd321sd', 'vfmsub132ss', + 'vfmsub132sd', 'vfmsub312ss', 'vfmsub312sd', 'vfmsub213ss', 'vfmsub213sd', 'vfmsub123ss', + 'vfmsub123sd', 'vfmsub231ss', 'vfmsub231sd', 'vfmsub321ss', 'vfmsub321sd', 'vfnmadd132ss', + 'vfnmadd132sd', 'vfnmadd312ss', 'vfnmadd312sd', 'vfnmadd213ss', 'vfnmadd213sd', 'vfnmadd123ss', + 'vfnmadd123sd', 'vfnmadd231ss', 'vfnmadd231sd', 'vfnmadd321ss', 'vfnmadd321sd', 'vfnmsub132ss', + 'vfnmsub132sd', 'vfnmsub312ss', 'vfnmsub312sd', 'vfnmsub213ss', 'vfnmsub213sd', 'vfnmsub123ss', + 'vfnmsub123sd', 'vfnmsub231ss', 'vfnmsub231sd', 'vfnmsub321ss', 'vfnmsub321sd', -- Intel post-32 nm processor instructions. - 'rdfsbase', 'rdgsbase', 'rdrand', 'wrfsbase', 'wrgsbase', 'vcvtph2ps', - 'vcvtps2ph', 'adcx', 'adox', 'rdseed', 'clac', 'stac', + 'rdfsbase', 'rdgsbase', 'rdrand', 'wrfsbase', 'wrgsbase', 'vcvtph2ps', 'vcvtps2ph', 'adcx', + 'adox', 'rdseed', 'clac', 'stac', -- VIA (Centaur) security instructions. - 'xstore', 'xcryptecb', 'xcryptcbc', 'xcryptctr', 'xcryptcfb', 'xcryptofb', - 'montmul', 'xsha1', 'xsha256', + 'xstore', 'xcryptecb', 'xcryptcbc', 'xcryptctr', 'xcryptcfb', 'xcryptofb', 'montmul', 'xsha1', + 'xsha256', -- AMD Lightweight Profiling (LWP) instructions. 'llwpcb', 'slwpcb', 'lwpval', 'lwpins', -- AMD XOP and FMA4 instructions (SSE5). - 'vfmaddpd', 'vfmaddps', 'vfmaddsd', 'vfmaddss', 'vfmaddsubpd', - 'vfmaddsubps', 'vfmsubaddpd', 'vfmsubaddps', 'vfmsubpd', 'vfmsubps', - 'vfmsubsd', 'vfmsubss', 'vfnmaddpd', 'vfnmaddps', 'vfnmaddsd', 'vfnmaddss', - 'vfnmsubpd', 'vfnmsubps', 'vfnmsubsd', 'vfnmsubss', 'vfrczpd', 'vfrczps', - 'vfrczsd', 'vfrczss', 'vpcmov', 'vpcomb', 'vpcomd', 'vpcomq', 'vpcomub', - 'vpcomud', 'vpcomuq', 'vpcomuw', 'vpcomw', 'vphaddbd', 'vphaddbq', - 'vphaddbw', 'vphadddq', 'vphaddubd', 'vphaddubq', 'vphaddubw', 'vphaddudq', - 'vphadduwd', 'vphadduwq', 'vphaddwd', 'vphaddwq', 'vphsubbw', 'vphsubdq', - 'vphsubwd', 'vpmacsdd', 'vpmacsdqh', 'vpmacsdql', 'vpmacssdd', 'vpmacssdqh', - 'vpmacssdql', 'vpmacsswd', 'vpmacssww', 'vpmacswd', 'vpmacsww', 'vpmadcsswd', - 'vpmadcswd', 'vpperm', 'vprotb', 'vprotd', 'vprotq', 'vprotw', 'vpshab', - 'vpshad', 'vpshaq', 'vpshaw', 'vpshlb', 'vpshld', 'vpshlq', 'vpshlw', + 'vfmaddpd', 'vfmaddps', 'vfmaddsd', 'vfmaddss', 'vfmaddsubpd', 'vfmaddsubps', 'vfmsubaddpd', + 'vfmsubaddps', 'vfmsubpd', 'vfmsubps', 'vfmsubsd', 'vfmsubss', 'vfnmaddpd', 'vfnmaddps', + 'vfnmaddsd', 'vfnmaddss', 'vfnmsubpd', 'vfnmsubps', 'vfnmsubsd', 'vfnmsubss', 'vfrczpd', + 'vfrczps', 'vfrczsd', 'vfrczss', 'vpcmov', 'vpcomb', 'vpcomd', 'vpcomq', 'vpcomub', 'vpcomud', + 'vpcomuq', 'vpcomuw', 'vpcomw', 'vphaddbd', 'vphaddbq', 'vphaddbw', 'vphadddq', 'vphaddubd', + 'vphaddubq', 'vphaddubw', 'vphaddudq', 'vphadduwd', 'vphadduwq', 'vphaddwd', 'vphaddwq', + 'vphsubbw', 'vphsubdq', 'vphsubwd', 'vpmacsdd', 'vpmacsdqh', 'vpmacsdql', 'vpmacssdd', + 'vpmacssdqh', 'vpmacssdql', 'vpmacsswd', 'vpmacssww', 'vpmacswd', 'vpmacsww', 'vpmadcsswd', + 'vpmadcswd', 'vpperm', 'vprotb', 'vprotd', 'vprotq', 'vprotw', 'vpshab', 'vpshad', 'vpshaq', + 'vpshaw', 'vpshlb', 'vpshld', 'vpshlq', 'vpshlw', -- Intel AVX2 instructions. - 'vmpsadbw', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw', - 'vpackusdw', 'vpackuswb', 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq', - 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', 'vpalignr', 'vpand', - 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', 'vpcmpeqb', - 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', - 'vpcmpgtq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphsubw', 'vphsubd', - 'vphsubsw', 'vpmaddubsw', 'vpmaddwd', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd', - 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', - 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq', - 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq', - 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmuldq', 'vpmulhrsw', 'vpmulhuw', - 'vpmulhw', 'vpmullw', 'vpmulld', 'vpmuludq', 'vpor', 'vpsadbw', 'vpshufb', - 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd', - 'vpslldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', 'vpsrldq', - 'vpsrlw', 'vpsrld', 'vpsrlq', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq', - 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', 'vpunpckhwd', - 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq', - 'vpunpcklqdq', 'vpxor', 'vmovntdqa', 'vbroadcastss', 'vbroadcastsd', - 'vbroadcasti128', 'vpblendd', 'vpbroadcastb', 'vpbroadcastw', 'vpbroadcastd', - 'vpbroadcastq', 'vpermd', 'vpermpd', 'vpermps', 'vpermq', 'vperm2i128', - 'vextracti128', 'vinserti128', 'vpmaskmovd', 'vpmaskmovq', 'vpmaskmovd', - 'vpmaskmovq', 'vpsllvd', 'vpsllvq', 'vpsllvd', 'vpsllvq', 'vpsravd', - 'vpsrlvd', 'vpsrlvq', 'vpsrlvd', 'vpsrlvq', 'vgatherdpd', 'vgatherqpd', - 'vgatherdpd', 'vgatherqpd', 'vgatherdps', 'vgatherqps', 'vgatherdps', - 'vgatherqps', 'vpgatherdd', 'vpgatherqd', 'vpgatherdd', 'vpgatherqd', + 'vmpsadbw', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw', 'vpackusdw', 'vpackuswb', + 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', 'vpalignr', + 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', 'vpcmpeqb', 'vpcmpeqw', + 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', 'vpcmpgtq', 'vphaddw', 'vphaddd', + 'vphaddsw', 'vphsubw', 'vphsubd', 'vphsubsw', 'vpmaddubsw', 'vpmaddwd', 'vpmaxsb', 'vpmaxsw', + 'vpmaxsd', 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', 'vpminuw', + 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq', 'vpmovsxwd', 'vpmovsxwq', + 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq', 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', + 'vpmuldq', 'vpmulhrsw', 'vpmulhuw', 'vpmulhw', 'vpmullw', 'vpmulld', 'vpmuludq', 'vpor', + 'vpsadbw', 'vpshufb', 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd', + 'vpslldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', 'vpsrldq', 'vpsrlw', 'vpsrld', + 'vpsrlq', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq', 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', + 'vpunpckhbw', 'vpunpckhwd', 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq', + 'vpunpcklqdq', 'vpxor', 'vmovntdqa', 'vbroadcastss', 'vbroadcastsd', 'vbroadcasti128', 'vpblendd', + 'vpbroadcastb', 'vpbroadcastw', 'vpbroadcastd', 'vpbroadcastq', 'vpermd', 'vpermpd', 'vpermps', + 'vpermq', 'vperm2i128', 'vextracti128', 'vinserti128', 'vpmaskmovd', 'vpmaskmovq', 'vpmaskmovd', + 'vpmaskmovq', 'vpsllvd', 'vpsllvq', 'vpsllvd', 'vpsllvq', 'vpsravd', 'vpsrlvd', 'vpsrlvq', + 'vpsrlvd', 'vpsrlvq', 'vgatherdpd', 'vgatherqpd', 'vgatherdpd', 'vgatherqpd', 'vgatherdps', + 'vgatherqps', 'vgatherdps', 'vgatherqps', 'vpgatherdd', 'vpgatherqd', 'vpgatherdd', 'vpgatherqd', 'vpgatherdq', 'vpgatherqq', 'vpgatherdq', 'vpgatherqq', -- Transactional Synchronization Extensions (TSX). 'xabort', 'xbegin', 'xend', 'xtest', - -- Intel BMI1 and BMI2 instructions, AMD TBM instructions. - 'andn', 'bextr', 'blci', 'blcic', 'blsi', 'blsic', 'blcfill', 'blsfill', - 'blcmsk', 'blsmsk', 'blsr', 'blcs', 'bzhi', 'mulx', 'pdep', 'pext', 'rorx', - 'sarx', 'shlx', 'shrx', 'tzcnt', 'tzmsk', 't1mskc', + -- Intel BMI1 and BMI2 instructions AMD TBM instructions. + 'andn', 'bextr', 'blci', 'blcic', 'blsi', 'blsic', 'blcfill', 'blsfill', 'blcmsk', 'blsmsk', + 'blsr', 'blcs', 'bzhi', 'mulx', 'pdep', 'pext', 'rorx', 'sarx', 'shlx', 'shrx', 'tzcnt', 'tzmsk', + 't1mskc', -- Systematic names for the hinting nop instructions. - 'hint_nop0', 'hint_nop1', 'hint_nop2', 'hint_nop3', 'hint_nop4', - 'hint_nop5', 'hint_nop6', 'hint_nop7', 'hint_nop8', 'hint_nop9', - 'hint_nop10', 'hint_nop11', 'hint_nop12', 'hint_nop13', 'hint_nop14', - 'hint_nop15', 'hint_nop16', 'hint_nop17', 'hint_nop18', 'hint_nop19', - 'hint_nop20', 'hint_nop21', 'hint_nop22', 'hint_nop23', 'hint_nop24', - 'hint_nop25', 'hint_nop26', 'hint_nop27', 'hint_nop28', 'hint_nop29', - 'hint_nop30', 'hint_nop31', 'hint_nop32', 'hint_nop33', 'hint_nop34', - 'hint_nop35', 'hint_nop36', 'hint_nop37', 'hint_nop38', 'hint_nop39', - 'hint_nop40', 'hint_nop41', 'hint_nop42', 'hint_nop43', 'hint_nop44', - 'hint_nop45', 'hint_nop46', 'hint_nop47', 'hint_nop48', 'hint_nop49', - 'hint_nop50', 'hint_nop51', 'hint_nop52', 'hint_nop53', 'hint_nop54', - 'hint_nop55', 'hint_nop56', 'hint_nop57', 'hint_nop58', 'hint_nop59', - 'hint_nop60', 'hint_nop61', 'hint_nop62', 'hint_nop63', -}) - --- Types. -local sizes = word_match{ - 'byte', 'word', 'dword', 'qword', 'tword', 'oword', 'yword', - 'a16', 'a32', 'a64', 'o16', 'o32', 'o64' -- instructions -} -local wrt_types = '..' * word_match{ - 'start', 'gotpc', 'gotoff', 'gottpoff', 'got', 'plt', 'sym', 'tlsie' -} -local type = token(l.TYPE, sizes + wrt_types) + 'hint_nop0', 'hint_nop1', 'hint_nop2', 'hint_nop3', 'hint_nop4', 'hint_nop5', 'hint_nop6', + 'hint_nop7', 'hint_nop8', 'hint_nop9', 'hint_nop10', 'hint_nop11', 'hint_nop12', 'hint_nop13', + 'hint_nop14', 'hint_nop15', 'hint_nop16', 'hint_nop17', 'hint_nop18', 'hint_nop19', 'hint_nop20', + 'hint_nop21', 'hint_nop22', 'hint_nop23', 'hint_nop24', 'hint_nop25', 'hint_nop26', 'hint_nop27', + 'hint_nop28', 'hint_nop29', 'hint_nop30', 'hint_nop31', 'hint_nop32', 'hint_nop33', 'hint_nop34', + 'hint_nop35', 'hint_nop36', 'hint_nop37', 'hint_nop38', 'hint_nop39', 'hint_nop40', 'hint_nop41', + 'hint_nop42', 'hint_nop43', 'hint_nop44', 'hint_nop45', 'hint_nop46', 'hint_nop47', 'hint_nop48', + 'hint_nop49', 'hint_nop50', 'hint_nop51', 'hint_nop52', 'hint_nop53', 'hint_nop54', 'hint_nop55', + 'hint_nop56', 'hint_nop57', 'hint_nop58', 'hint_nop59', 'hint_nop60', 'hint_nop61', 'hint_nop62', + 'hint_nop63' +})) +lex:add_style('instruction', lexer.styles['function']) -- Registers. -local register = token('register', word_match{ +lex:add_rule('register', token('register', word_match{ -- 32-bit registers. - 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cx', 'dh', 'di', 'dl', - 'dx', 'eax', 'ebx', 'ebx', 'ecx', 'edi', 'edx', 'esi', 'esp', 'fs', 'mm0', - 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'si', 'st0', 'st1', 'st2', - 'st3', 'st4', 'st5', 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', - 'xmm5', 'xmm6', 'xmm7', 'ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', - 'ymm6', 'ymm7', + 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cx', 'dh', 'di', 'dl', 'dx', 'eax', 'ebx', + 'ebx', 'ecx', 'edi', 'edx', 'esi', 'esp', 'fs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', + 'mm7', 'si', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', + 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', 'ymm6', + 'ymm7', -- 64-bit registers. - 'bpl', 'dil', 'gs', 'r8', 'r8b', 'r8w', 'r9', 'r9b', 'r9w', 'r10', 'r10b', - 'r10w', 'r11', 'r11b', 'r11w', 'r12', 'r12b', 'r12w', 'r13', 'r13b', 'r13w', - 'r14', 'r14b', 'r14w', 'r15', 'r15b', 'r15w', 'rax', 'rbp', 'rbx', 'rcx', - 'rdi', 'rdx', 'rsi', 'rsp', 'sil', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', - 'xmm13', 'xmm14', 'xmm15', 'ymm8', 'ymm9', 'ymm10', 'ymm11', 'ymm12', 'ymm13', + 'bpl', 'dil', 'gs', 'r8', 'r8b', 'r8w', 'r9', 'r9b', 'r9w', 'r10', 'r10b', 'r10w', 'r11', 'r11b', + 'r11w', 'r12', 'r12b', 'r12w', 'r13', 'r13b', 'r13w', 'r14', 'r14b', 'r14w', 'r15', 'r15b', + 'r15w', 'rax', 'rbp', 'rbx', 'rcx', 'rdi', 'rdx', 'rsi', 'rsp', 'sil', 'xmm8', 'xmm9', 'xmm10', + 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15', 'ymm8', 'ymm9', 'ymm10', 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15' -}) +})) +lex:add_style('register', lexer.styles.constant) -local word = (l.alpha + S('$._?')) * (l.alnum + S('$._?#@~'))^0 +-- Types. +local sizes = word_match{ + 'byte', 'word', 'dword', 'qword', 'tword', 'oword', 'yword', + -- Instructions. + 'a16', 'a32', 'a64', 'o16', 'o32', 'o64' +} +local wrt_types = '..' * word_match('start gotpc gotoff gottpoff got plt sym tlsie') +lex:add_rule('type', token(lexer.TYPE, sizes + wrt_types)) + +-- Constants. +local word = (lexer.alpha + S('$._?')) * (lexer.alnum + S('$._?#@~'))^0 +local constants = word_match{ + '__float128h__', '__float128l__', '__float16__', '__float32__', '__float64__', '__float8__', + '__float80e__', '__float80m__', '__Infinity__', '__NaN__', '__QNaN__', '__SNaN__' +} +lex:add_rule('constant', token(lexer.CONSTANT, constants + '$' * P('$')^-1 * -word)) -- Labels. -local label = token(l.LABEL, word * ':') +lex:add_rule('label', token(lexer.LABEL, word * ':')) -- Identifiers. -local identifier = token(l.IDENTIFIER, word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) --- Constants. -local constants = word_match{ - '__float8__', '__float16__', '__float32__', '__float64__', '__float80m__', - '__float80e__', '__float128l__', '__float128h__', '__Infinity__', '__QNaN__', - '__NaN__', '__SNaN__' -} -local constant = token(l.CONSTANT, constants + '$' * P('$')^-1 * -identifier) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) --- Operators. -local operator = token(l.OPERATOR, S('+-/*%<>!=^&|~:,()[]')) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(';'))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'instruction', instruction}, - {'register', register}, - {'type', type}, - {'constant', constant}, - {'label', label}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'preproc', preproc}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('hqb')^-1)) -M._tokenstyles = { - instruction = l.STYLE_FUNCTION, - register = l.STYLE_CONSTANT, +-- Preprocessor. +local pp_word = word_match{ + 'arg', 'assign', 'clear', 'define', 'defstr', 'deftok', 'depend', 'elif', 'elifctx', 'elifdef', + 'elifempty', 'elifenv', 'elifid', 'elifidn', 'elifidni', 'elifmacro', 'elifn', 'elifnctx', + 'elifndef', 'elifnempty', 'elifnenv', 'elifnid', 'elifnidn', 'elifnidni', 'elifnmacro', + 'elifnnum', 'elifnstr', 'elifntoken', 'elifnum', 'elifstr', 'eliftoken', 'else', 'endif', + 'endmacro', 'endrep', 'endwhile', 'error', 'exitmacro', 'exitrep', 'exitwhile', 'fatal', 'final', + 'idefine', 'idefstr', 'ideftok', 'if', 'ifctx', 'ifdef', 'ifempty', 'ifenv', 'ifid', 'ifidn', + 'ifidni', 'ifmacro', 'ifn', 'ifnctx', 'ifndef', 'ifnempty', 'ifnenv', 'ifnid', 'ifnidn', + 'ifnidni', 'ifnmacro', 'ifnnum', 'ifnstr', 'ifntoken', 'ifnum', 'ifstr', 'iftoken', 'imacro', + 'include', 'ixdefine', 'line', 'local', 'macro', 'pathsearch', 'pop', 'push', 'rep', 'repl', + 'rmacro', 'rotate', 'stacksize', 'strcat', 'strlen', 'substr', 'undef', 'unmacro', 'use', + 'warning', 'while', 'xdefine' } +local pp_symbol = '??' + S('!$+?') + '%' * -lexer.space + lexer.digit^1 +lex:add_rule('preproc', token(lexer.PREPROCESSOR, '%' * (pp_word + pp_symbol))) -M._foldsymbols = { - _patterns = {'%l+', '//'}, - [l.PREPROCESSOR] = { - ['if'] = 1, endif = -1, macro = 1, endmacro = -1, rep = 1, endrep = -1, - ['while'] = 1, endwhile = -1, - }, - [l.KEYWORD] = {struc = 1, endstruc = -1}, - [l.COMMENT] = {['//'] = l.fold_line_comments('//')} -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|~:,()[]'))) + +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, '%if', '%endif') +lex:add_fold_point(lexer.PREPROCESSOR, '%macro', '%endmacro') +lex:add_fold_point(lexer.PREPROCESSOR, '%rep', '%endrep') +lex:add_fold_point(lexer.PREPROCESSOR, '%while', '%endwhile') +lex:add_fold_point(lexer.KEYWORD, 'struc', 'endstruc') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';')) -return M +return lex diff --git a/lua/lexers/asp.lua b/lua/lexers/asp.lua @@ -1,42 +1,31 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- ASP LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'asp'} - --- Embedded in HTML. -local html = l.load('html') +local html = lexer.load('html') +local lex = lexer.new('asp', {inherit = html}) -- proxy for HTML -- Embedded VB. -local vb = l.load('vb') +local vb = lexer.load('vb') local vb_start_rule = token('asp_tag', '<%' * P('=')^-1) local vb_end_rule = token('asp_tag', '%>') -l.embed_lexer(html, vb, vb_start_rule, vb_end_rule) +lex:embed(vb, vb_start_rule, vb_end_rule) +lex:add_style('asp_tag', lexer.styles.embedded) -- Embedded VBScript. -local vbs = l.load('vbscript') -local script_element = word_match({'script'}, nil, html.case_insensitive_tags) +local vbs = lexer.load('vb', 'vbscript') +local script_element = word_match('script', true) local vbs_start_rule = #(P('<') * script_element * (P(function(input, index) if input:find('^%s+language%s*=%s*(["\'])vbscript%1', index) or - input:find('^%s+type%s*=%s*(["\'])text/vbscript%1', index) then - return index - end + input:find('^%s+type%s*=%s*(["\'])text/vbscript%1', index) then return index end end) + '>')) * html.embed_start_tag -- <script language="vbscript"> -local vbs_end_rule = #('</' * script_element * l.space^0 * '>') * - html.embed_end_tag -- </script> -l.embed_lexer(html, vbs, vbs_start_rule, vbs_end_rule) - -M._tokenstyles = { - asp_tag = l.STYLE_EMBEDDED -} +local vbs_end_rule = #('</' * script_element * lexer.space^0 * '>') * html.embed_end_tag -- </script> +lex:embed(vbs, vbs_start_rule, vbs_end_rule) -local _foldsymbols = html._foldsymbols -_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%' -_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>' -_foldsymbols.asp_tag = {['<%'] = 1, ['%>'] = -1} -M._foldsymbols = _foldsymbols +-- Fold points. +lex:add_fold_point('asp_tag', '<%', '%>') -return M +return lex diff --git a/lua/lexers/autoit.lua b/lua/lexers/autoit.lua @@ -1,168 +1,129 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- AutoIt LPeg lexer. -- Contributed by Jeff Stone. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'autoit'} +local lex = lexer.new('autoit') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = ';' * l.nonnewline_esc^0 -local block_comment1 = '#comments-start' * (l.any - '#comments-end')^0 * - P('#comments-end')^-1 -local block_comment2 = '#cs' * (l.any - '#ce')^0 * P('#ce')^-1 -local comment = token(l.COMMENT, line_comment + block_comment1 + block_comment2) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local kw = token(l.KEYWORD, word_match({ - 'False', 'True', 'And', 'Or', 'Not', 'ContinueCase', 'ContinueLoop', - 'Default', 'Dim', 'Global', 'Local', 'Const', 'Do', 'Until', 'Enum', 'Exit', - 'ExitLoop', 'For', 'To', 'Step', 'Next', 'In', 'Func', 'Return', 'EndFunc', - 'If', 'Then', 'ElseIf', 'Else', 'EndIf', 'Null', 'ReDim', 'Select', 'Case', - 'EndSelect', 'Static', 'Switch', 'EndSwitch', 'Volatile', 'While', 'WEnd', - 'With', 'EndWith' -}, nil, true)) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match({ + 'False', 'True', 'And', 'Or', 'Not', 'ContinueCase', 'ContinueLoop', 'Default', 'Dim', 'Global', + 'Local', 'Const', 'Do', 'Until', 'Enum', 'Exit', 'ExitLoop', 'For', 'To', 'Step', 'Next', 'In', + 'Func', 'Return', 'EndFunc', 'If', 'Then', 'ElseIf', 'Else', 'EndIf', 'Null', 'ReDim', 'Select', + 'Case', 'EndSelect', 'Static', 'Switch', 'EndSwitch', 'Volatile', 'While', 'WEnd', 'With', + 'EndWith' +}, true))) -- Functions. -local func = token(l.FUNCTION, word_match({ - 'Abs', 'ACos', 'AdlibRegister', 'AdlibUnRegister', 'Asc', 'AscW', 'ASin', - 'Assign', 'ATan', 'AutoItSetOption', 'AutoItWinGetTitle', 'AutoItWinSetTitle', - 'Beep', 'Binary', 'BinaryLen', 'BinaryMid', 'BinaryToString', 'BitAND', - 'BitNOT', 'BitOR', 'BitRotate', 'BitShift', 'BitXOR', 'BlockInput', 'Break', - 'Call', 'CDTray', 'Ceiling', 'Chr', 'ChrW', 'ClipGet', 'ClipPut', - 'ConsoleRead', 'ConsoleWrite', 'ConsoleWriteError', 'ControlClick', - 'ControlCommand', 'ControlDisable', 'ControlEnable', 'ControlFocus', - 'ControlGetFocus', 'ControlGetHandle', 'ControlGetPos', 'ControlGetText', - 'ControlHide', 'ControlListView', 'ControlMove', 'ControlSend', - 'ControlSetText', 'ControlShow', 'ControlTreeView', 'Cos', 'Dec', 'DirCopy', - 'DirCreate', 'DirGetSize', 'DirMove', 'DirRemove', 'DllCall', - 'DllCallAddress', 'DllCallbackFree', 'DllCallbackGetPtr', - 'DllCallbackRegister', 'DllClose', 'DllOpen', 'DllStructCreate', - 'DllStructGetData', 'DllStructGetPtr', 'DllStructGetSize', 'DllStructSetData', - 'DriveGetDrive', 'DriveGetFileSystem', 'DriveGetLabel', 'DriveGetSerial', - 'DriveGetType', 'DriveMapAdd', 'DriveMapDel', 'DriveMapGet', 'DriveSetLabel', - 'DriveSpaceFree', 'DriveSpaceTotal', 'DriveStatus', 'EnvGet', 'EnvSet', - 'EnvUpdate', 'Eval', 'Execute', 'Exp', 'FileChangeDir', 'FileClose', - 'FileCopy', 'FileCreateNTFSLink', 'FileCreateShortcut', 'FileDelete', - 'FileExists', 'FileFindFirstFile', 'FileFindNextFile', 'FileFlush', - 'FileGetAttrib', 'FileGetEncoding', 'FileGetLongName', 'FileGetPos', - 'FileGetShortcut', 'FileGetShortName', 'FileGetSize', 'FileGetTime', - 'FileGetVersion', 'FileInstall', 'FileMove', 'FileOpen', 'FileOpenDialog', - 'FileRead', 'FileReadLine', 'FileReadToArray', 'FileRecycle', - 'FileRecycleEmpty', 'FileSaveDialog', 'FileSelectFolder', 'FileSetAttrib', - 'FileSetEnd', 'FileSetPos', 'FileSetTime', 'FileWrite', 'FileWriteLine', - 'Floor', 'FtpSetProxy', 'FuncName', 'GUICreate', 'GUICtrlCreateAvi', - 'GUICtrlCreateButton', 'GUICtrlCreateCheckbox', 'GUICtrlCreateCombo', - 'GUICtrlCreateContextMenu', 'GUICtrlCreateDate', 'GUICtrlCreateDummy', - 'GUICtrlCreateEdit', 'GUICtrlCreateGraphic', 'GUICtrlCreateGroup', - 'GUICtrlCreateIcon', 'GUICtrlCreateInput', 'GUICtrlCreateLabel', - 'GUICtrlCreateList', 'GUICtrlCreateListView', 'GUICtrlCreateListViewItem', - 'GUICtrlCreateMenu', 'GUICtrlCreateMenuItem', 'GUICtrlCreateMonthCal', - 'GUICtrlCreateObj', 'GUICtrlCreatePic', 'GUICtrlCreateProgress', - 'GUICtrlCreateRadio', 'GUICtrlCreateSlider', 'GUICtrlCreateTab', +lex:add_rule('function', token(lexer.FUNCTION, word_match({ + 'Abs', 'ACos', 'AdlibRegister', 'AdlibUnRegister', 'Asc', 'AscW', 'ASin', 'Assign', 'ATan', + 'AutoItSetOption', 'AutoItWinGetTitle', 'AutoItWinSetTitle', 'Beep', 'Binary', 'BinaryLen', + 'BinaryMid', 'BinaryToString', 'BitAND', 'BitNOT', 'BitOR', 'BitRotate', 'BitShift', 'BitXOR', + 'BlockInput', 'Break', 'Call', 'CDTray', 'Ceiling', 'Chr', 'ChrW', 'ClipGet', 'ClipPut', + 'ConsoleRead', 'ConsoleWrite', 'ConsoleWriteError', 'ControlClick', 'ControlCommand', + 'ControlDisable', 'ControlEnable', 'ControlFocus', 'ControlGetFocus', 'ControlGetHandle', + 'ControlGetPos', 'ControlGetText', 'ControlHide', 'ControlListView', 'ControlMove', 'ControlSend', + 'ControlSetText', 'ControlShow', 'ControlTreeView', 'Cos', 'Dec', 'DirCopy', 'DirCreate', + 'DirGetSize', 'DirMove', 'DirRemove', 'DllCall', 'DllCallAddress', 'DllCallbackFree', + 'DllCallbackGetPtr', 'DllCallbackRegister', 'DllClose', 'DllOpen', 'DllStructCreate', + 'DllStructGetData', 'DllStructGetPtr', 'DllStructGetSize', 'DllStructSetData', 'DriveGetDrive', + 'DriveGetFileSystem', 'DriveGetLabel', 'DriveGetSerial', 'DriveGetType', 'DriveMapAdd', + 'DriveMapDel', 'DriveMapGet', 'DriveSetLabel', 'DriveSpaceFree', 'DriveSpaceTotal', 'DriveStatus', + 'EnvGet', 'EnvSet', 'EnvUpdate', 'Eval', 'Execute', 'Exp', 'FileChangeDir', 'FileClose', + 'FileCopy', 'FileCreateNTFSLink', 'FileCreateShortcut', 'FileDelete', 'FileExists', + 'FileFindFirstFile', 'FileFindNextFile', 'FileFlush', 'FileGetAttrib', 'FileGetEncoding', + 'FileGetLongName', 'FileGetPos', 'FileGetShortcut', 'FileGetShortName', 'FileGetSize', + 'FileGetTime', 'FileGetVersion', 'FileInstall', 'FileMove', 'FileOpen', 'FileOpenDialog', + 'FileRead', 'FileReadLine', 'FileReadToArray', 'FileRecycle', 'FileRecycleEmpty', + 'FileSaveDialog', 'FileSelectFolder', 'FileSetAttrib', 'FileSetEnd', 'FileSetPos', 'FileSetTime', + 'FileWrite', 'FileWriteLine', 'Floor', 'FtpSetProxy', 'FuncName', 'GUICreate', 'GUICtrlCreateAvi', + 'GUICtrlCreateButton', 'GUICtrlCreateCheckbox', 'GUICtrlCreateCombo', 'GUICtrlCreateContextMenu', + 'GUICtrlCreateDate', 'GUICtrlCreateDummy', 'GUICtrlCreateEdit', 'GUICtrlCreateGraphic', + 'GUICtrlCreateGroup', 'GUICtrlCreateIcon', 'GUICtrlCreateInput', 'GUICtrlCreateLabel', + 'GUICtrlCreateList', 'GUICtrlCreateListView', 'GUICtrlCreateListViewItem', 'GUICtrlCreateMenu', + 'GUICtrlCreateMenuItem', 'GUICtrlCreateMonthCal', 'GUICtrlCreateObj', 'GUICtrlCreatePic', + 'GUICtrlCreateProgress', 'GUICtrlCreateRadio', 'GUICtrlCreateSlider', 'GUICtrlCreateTab', 'GUICtrlCreateTabItem', 'GUICtrlCreateTreeView', 'GUICtrlCreateTreeViewItem', - 'GUICtrlCreateUpdown', 'GUICtrlDelete', 'GUICtrlGetHandle', 'GUICtrlGetState', - 'GUICtrlRead', 'GUICtrlRecvMsg', 'GUICtrlRegisterListViewSort', - 'GUICtrlSendMsg', 'GUICtrlSendToDummy', 'GUICtrlSetBkColor', - 'GUICtrlSetColor', 'GUICtrlSetCursor', 'GUICtrlSetData', - 'GUICtrlSetDefBkColor', 'GUICtrlSetDefColor', 'GUICtrlSetFont', - 'GUICtrlSetGraphic', 'GUICtrlSetImage', 'GUICtrlSetLimit', - 'GUICtrlSetOnEvent', 'GUICtrlSetPos', 'GUICtrlSetResizing', 'GUICtrlSetState', - 'GUICtrlSetStyle', 'GUICtrlSetTip', 'GUIDelete', 'GUIGetCursorInfo', - 'GUIGetMsg', 'GUIGetStyle', 'GUIRegisterMsg', 'GUISetAccelerators', - 'GUISetBkColor', 'GUISetCoord', 'GUISetCursor', 'GUISetFont', 'GUISetHelp', - 'GUISetIcon', 'GUISetOnEvent', 'GUISetState', 'GUISetStyle', 'GUIStartGroup', - 'GUISwitch', 'Hex', 'HotKeySet', 'HttpSetProxy', 'HttpSetUserAgent', 'HWnd', - 'InetClose', 'InetGet', 'InetGetInfo', 'InetGetSize', 'InetRead', 'IniDelete', - 'IniRead', 'IniReadSection', 'IniReadSectionNames', 'IniRenameSection', - 'IniWrite', 'IniWriteSection', 'InputBox', 'Int', 'IsAdmin', 'IsArray', - 'IsBinary', 'IsBool', 'IsDeclared', 'IsDllStruct', 'IsFloat', 'IsFunc', - 'IsHWnd', 'IsInt', 'IsKeyword', 'IsNumber', 'IsObj', 'IsPtr', 'IsString', - 'Log', 'MemGetStats', 'Mod', 'MouseClick', 'MouseClickDrag', 'MouseDown', - 'MouseGetCursor', 'MouseGetPos', 'MouseMove', 'MouseUp', 'MouseWheel', - 'MsgBox', 'Number', 'ObjCreate', 'ObjCreateInterface', 'ObjEvent', 'ObjGet', - 'ObjName', 'OnAutoItExitRegister', 'OnAutoItExitUnRegister', 'Ping', - 'PixelChecksum', 'PixelGetColor', 'PixelSearch', 'ProcessClose', - 'ProcessExists', 'ProcessGetStats', 'ProcessList', 'ProcessSetPriority', - 'ProcessWait', 'ProcessWaitClose', 'ProgressOff', 'ProgressOn', 'ProgressSet', - 'Ptr', 'Random', 'RegDelete', 'RegEnumKey', 'RegEnumVal', 'RegRead', - 'RegWrite', 'Round', 'Run', 'RunAs', 'RunAsWait', 'RunWait', 'Send', - 'SendKeepActive', 'SetError', 'SetExtended', 'ShellExecute', - 'ShellExecuteWait', 'Shutdown', 'Sin', 'Sleep', 'SoundPlay', - 'SoundSetWaveVolume', 'SplashImageOn', 'SplashOff', 'SplashTextOn', 'Sqrt', - 'SRandom', 'StatusbarGetText', 'StderrRead', 'StdinWrite', 'StdioClose', - 'StdoutRead', 'String', 'StringAddCR', 'StringCompare', 'StringFormat', - 'StringFromASCIIArray', 'StringInStr', 'StringIsAlNum', 'StringIsAlpha', - 'StringIsASCII', 'StringIsDigit', 'StringIsFloat', 'StringIsInt', - 'StringIsLower', 'StringIsSpace', 'StringIsUpper', 'StringIsXDigit', - 'StringLeft', 'StringLen', 'StringLower', 'StringMid', 'StringRegExp', - 'StringRegExpReplace', 'StringReplace', 'StringReverse', 'StringRight', - 'StringSplit', 'StringStripCR', 'StringStripWS', 'StringToASCIIArray', - 'StringToBinary', 'StringTrimLeft', 'StringTrimRight', 'StringUpper', 'Tan', - 'TCPAccept', 'TCPCloseSocket', 'TCPConnect', 'TCPListen', 'TCPNameToIP', - 'TCPRecv', 'TCPSend', 'TCPShutdown, UDPShutdown', 'TCPStartup, UDPStartup', - 'TimerDiff', 'TimerInit', 'ToolTip', 'TrayCreateItem', 'TrayCreateMenu', - 'TrayGetMsg', 'TrayItemDelete', 'TrayItemGetHandle', 'TrayItemGetState', - 'TrayItemGetText', 'TrayItemSetOnEvent', 'TrayItemSetState', - 'TrayItemSetText', 'TraySetClick', 'TraySetIcon', 'TraySetOnEvent', - 'TraySetPauseIcon', 'TraySetState', 'TraySetToolTip', 'TrayTip', 'UBound', - 'UDPBind', 'UDPCloseSocket', 'UDPOpen', 'UDPRecv', 'UDPSend', 'VarGetType', - 'WinActivate', 'WinActive', 'WinClose', 'WinExists', 'WinFlash', - 'WinGetCaretPos', 'WinGetClassList', 'WinGetClientSize', 'WinGetHandle', - 'WinGetPos', 'WinGetProcess', 'WinGetState', 'WinGetText', 'WinGetTitle', - 'WinKill', 'WinList', 'WinMenuSelectItem', 'WinMinimizeAll', - 'WinMinimizeAllUndo', 'WinMove', 'WinSetOnTop', 'WinSetState', 'WinSetTitle', + 'GUICtrlCreateUpdown', 'GUICtrlDelete', 'GUICtrlGetHandle', 'GUICtrlGetState', 'GUICtrlRead', + 'GUICtrlRecvMsg', 'GUICtrlRegisterListViewSort', 'GUICtrlSendMsg', 'GUICtrlSendToDummy', + 'GUICtrlSetBkColor', 'GUICtrlSetColor', 'GUICtrlSetCursor', 'GUICtrlSetData', + 'GUICtrlSetDefBkColor', 'GUICtrlSetDefColor', 'GUICtrlSetFont', 'GUICtrlSetGraphic', + 'GUICtrlSetImage', 'GUICtrlSetLimit', 'GUICtrlSetOnEvent', 'GUICtrlSetPos', 'GUICtrlSetResizing', + 'GUICtrlSetState', 'GUICtrlSetStyle', 'GUICtrlSetTip', 'GUIDelete', 'GUIGetCursorInfo', + 'GUIGetMsg', 'GUIGetStyle', 'GUIRegisterMsg', 'GUISetAccelerators', 'GUISetBkColor', + 'GUISetCoord', 'GUISetCursor', 'GUISetFont', 'GUISetHelp', 'GUISetIcon', 'GUISetOnEvent', + 'GUISetState', 'GUISetStyle', 'GUIStartGroup', 'GUISwitch', 'Hex', 'HotKeySet', 'HttpSetProxy', + 'HttpSetUserAgent', 'HWnd', 'InetClose', 'InetGet', 'InetGetInfo', 'InetGetSize', 'InetRead', + 'IniDelete', 'IniRead', 'IniReadSection', 'IniReadSectionNames', 'IniRenameSection', 'IniWrite', + 'IniWriteSection', 'InputBox', 'Int', 'IsAdmin', 'IsArray', 'IsBinary', 'IsBool', 'IsDeclared', + 'IsDllStruct', 'IsFloat', 'IsFunc', 'IsHWnd', 'IsInt', 'IsKeyword', 'IsNumber', 'IsObj', 'IsPtr', + 'IsString', 'Log', 'MemGetStats', 'Mod', 'MouseClick', 'MouseClickDrag', 'MouseDown', + 'MouseGetCursor', 'MouseGetPos', 'MouseMove', 'MouseUp', 'MouseWheel', 'MsgBox', 'Number', + 'ObjCreate', 'ObjCreateInterface', 'ObjEvent', 'ObjGet', 'ObjName', 'OnAutoItExitRegister', + 'OnAutoItExitUnRegister', 'Ping', 'PixelChecksum', 'PixelGetColor', 'PixelSearch', 'ProcessClose', + 'ProcessExists', 'ProcessGetStats', 'ProcessList', 'ProcessSetPriority', 'ProcessWait', + 'ProcessWaitClose', 'ProgressOff', 'ProgressOn', 'ProgressSet', 'Ptr', 'Random', 'RegDelete', + 'RegEnumKey', 'RegEnumVal', 'RegRead', 'RegWrite', 'Round', 'Run', 'RunAs', 'RunAsWait', + 'RunWait', 'Send', 'SendKeepActive', 'SetError', 'SetExtended', 'ShellExecute', + 'ShellExecuteWait', 'Shutdown', 'Sin', 'Sleep', 'SoundPlay', 'SoundSetWaveVolume', + 'SplashImageOn', 'SplashOff', 'SplashTextOn', 'Sqrt', 'SRandom', 'StatusbarGetText', 'StderrRead', + 'StdinWrite', 'StdioClose', 'StdoutRead', 'String', 'StringAddCR', 'StringCompare', + 'StringFormat', 'StringFromASCIIArray', 'StringInStr', 'StringIsAlNum', 'StringIsAlpha', + 'StringIsASCII', 'StringIsDigit', 'StringIsFloat', 'StringIsInt', 'StringIsLower', + 'StringIsSpace', 'StringIsUpper', 'StringIsXDigit', 'StringLeft', 'StringLen', 'StringLower', + 'StringMid', 'StringRegExp', 'StringRegExpReplace', 'StringReplace', 'StringReverse', + 'StringRight', 'StringSplit', 'StringStripCR', 'StringStripWS', 'StringToASCIIArray', + 'StringToBinary', 'StringTrimLeft', 'StringTrimRight', 'StringUpper', 'Tan', 'TCPAccept', + 'TCPCloseSocket', 'TCPConnect', 'TCPListen', 'TCPNameToIP', 'TCPRecv', 'TCPSend', 'TCPShutdown', + 'TCPStartup', 'TimerDiff', 'TimerInit', 'ToolTip', 'TrayCreateItem', 'TrayCreateMenu', + 'TrayGetMsg', 'TrayItemDelete', 'TrayItemGetHandle', 'TrayItemGetState', 'TrayItemGetText', + 'TrayItemSetOnEvent', 'TrayItemSetState', 'TrayItemSetText', 'TraySetClick', 'TraySetIcon', + 'TraySetOnEvent', 'TraySetPauseIcon', 'TraySetState', 'TraySetToolTip', 'TrayTip', 'UBound', + 'UDPBind', 'UDPCloseSocket', 'UDPOpen', 'UDPRecv', 'UDPSend', 'UDPShutdown', 'UDPStartup', + 'VarGetType', 'WinActivate', 'WinActive', 'WinClose', 'WinExists', 'WinFlash', 'WinGetCaretPos', + 'WinGetClassList', 'WinGetClientSize', 'WinGetHandle', 'WinGetPos', 'WinGetProcess', + 'WinGetState', 'WinGetText', 'WinGetTitle', 'WinKill', 'WinList', 'WinMenuSelectItem', + 'WinMinimizeAll', 'WinMinimizeAllUndo', 'WinMove', 'WinSetOnTop', 'WinSetState', 'WinSetTitle', 'WinSetTrans', 'WinWait', 'WinWaitActive', 'WinWaitClose', 'WinWaitNotActive' -}, nil, true)) +}, true))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = lexer.to_eol(';') +local block_comment = lexer.range('#comments-start', '#comments-end') + lexer.range('#cs', '#ce') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Preprocessor. -local preproc = token(l.PREPROCESSOR, '#' * word_match({ +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * word_match({ 'include-once', 'include', 'pragma', 'forceref', 'RequireAdmin', 'NoTrayIcon', 'OnAutoItStartRegister' -}, '-', true)) +}, true))) -- Strings. -local dq_str = l.delimited_range('"', true, true) -local sq_str = l.delimited_range("'", true, true) -local inc = l.delimited_range('<>', true, true, true) -local str = token(l.STRING, dq_str + sq_str + inc) +local dq_str = lexer.range('"', true, false) +local sq_str = lexer.range("'", true, false) +local inc = lexer.range('<', '>', true, false, true) +lex:add_rule('string', token(lexer.STRING, dq_str + sq_str + inc)) -- Macros. -local macro = token('macro', '@' * (l.alnum + '_')^1) +lex:add_rule('macro', token('macro', '@' * (lexer.alnum + '_')^1)) +lex:add_style('macro', lexer.styles.preprocessor) -- Variables. -local var = token(l.VARIABLE, '$' * (l.alnum + '_')^1) - --- Identifiers. -local ident = token(l.IDENTIFIER, (l.alnum + '_')^1) +lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + '_')^1)) -- Numbers. -local nbr = token(l.NUMBER, l.float + l.integer) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. -local oper = token(l.OPERATOR, S('+-^*/&<>=?:()[]')) - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'keyword', kw}, - {'function', func}, - {'preproc', preproc}, - {'string', str}, - {'macro', macro}, - {'variable', var}, - {'number', nbr}, - {'identifier', ident}, - {'operator', oper} -} - -M._tokenstyles = { - macro = l.STYLE_PREPROCESSOR -} +lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=?:()[]'))) -return M +return lex diff --git a/lua/lexers/awk.lua b/lua/lexers/awk.lua @@ -1,12 +1,12 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- AWK LPeg lexer. -- Modified by Wolfgang Seeberg 2012, 2013. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'awk'} +local lex = lexer.new('awk') local LEFTBRACKET = '[' local RIGHTBRACKET = ']' @@ -20,14 +20,13 @@ local DQUOTE = '"' local DELIMITER_MATCHES = {['('] = ')', ['['] = ']'} local COMPANION = {['('] = '[', ['['] = '('} local CC = { - alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1, - print = 1, punct = 1, space = 1, upper = 1, xdigit = 1 + alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1, print = 1, punct = 1, + space = 1, upper = 1, xdigit = 1 } local LastRegexEnd = 0 local BackslashAtCommentEnd = 0 local KW_BEFORE_RX = { - case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1, - ['return'] = 1 + case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1, ['return'] = 1 } local function findKeyword(input, e) @@ -146,9 +145,7 @@ local function scanGawkRegex(input, index) return false end -- Is only called immediately after scanGawkRegex(). -local function scanRegex() - return ScanRegexResult -end +local function scanRegex() return ScanRegexResult end local function scanString(input, index) local i = index @@ -160,7 +157,7 @@ local function scanString(input, index) return i + 1 elseif input:sub(i, i) == BACKSLASH then i = i + 1 - -- l.delimited_range() doesn't handle CRLF. + -- lexer.range() doesn't handle CRLF. if input:sub(i, i + 1) == CRLF then i = i + 1 end end i = i + 1 @@ -168,8 +165,7 @@ local function scanString(input, index) return false end --- purpose: prevent isRegex() from entering a comment line that ends with a --- backslash. +-- purpose: prevent isRegex() from entering a comment line that ends with a backslash. local function scanComment(input, index) local _, i = input:find('[^\r\n]*', index) if input:sub(i, i) == BACKSLASH then BackslashAtCommentEnd = i end @@ -220,115 +216,69 @@ local function scanFieldDelimiters(input, index) end -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local comment = token(l.COMMENT, '#' * P(scanComment)) +lex:add_rule('comment', token(lexer.COMMENT, '#' * P(scanComment))) -- Strings. -local string = token(l.STRING, DQUOTE * P(scanString)) - --- Regular expressions. --- Slash delimited regular expressions are preceded by most operators or --- the keywords 'print' and 'case', possibly on a preceding line. They --- can contain unescaped slashes and brackets in brackets. Some escape --- sequences like '\S', '\s' have special meanings with Gawk. Tokens that --- contain them are displayed differently. -local regex = token(l.REGEX, SLASH * P(scanRegex)) -local gawkRegex = token('gawkRegex', SLASH * P(scanGawkRegex)) +lex:add_rule('string', token(lexer.STRING, DQUOTE * P(scanString))) --- no leading sign because it might be binary. -local float = ((l.digit ^ 1 * ('.' * l.digit ^ 0) ^ -1) + - ('.' * l.digit ^ 1)) * (S('eE') * S('+-') ^ -1 * l.digit ^ 1) ^ -1 --- Numbers. -local number = token(l.NUMBER, float) -local gawkNumber = token('gawkNumber', l.hex_num + l.oct_num) - --- Operators. -local operator = token(l.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~')) -local gawkOperator = token('gawkOperator', P("|&") + "@" + "**=" + "**") +-- No leading sign because it might be binary. +local float = ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) + ('.' * lexer.digit^1)) * + (S('eE') * S('+-')^-1 * lexer.digit^1)^-1 -- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc. -local field = token('field', P('$') * S('$+-') ^ 0 * - (float + (l.word ^ 0 * '(' * P(scanFieldDelimiters)) + - (l.word ^ 1 * ('[' * P(scanFieldDelimiters)) ^ -1) + - ('"' * P(scanString)) + ('/' * P(eatRegex) * '/'))) - --- Functions. -local func = token(l.FUNCTION, l.word * #P('(')) - --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) - --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do', - 'else', 'exit', 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if', - 'in', 'index', 'int', 'length', 'log', 'match', 'next', 'nextfile', 'print', - 'printf', 'rand', 'return', 'sin', 'split', 'sprintf', 'sqrt', 'srand', 'sub', - 'substr', 'system', 'tolower', 'toupper', 'while' -}) - -local gawkKeyword = token('gawkKeyword', word_match{ - 'BEGINFILE', 'ENDFILE', 'adump', 'and', 'asort', 'asorti', 'bindtextdomain', - 'case', 'compl', 'dcgettext', 'dcngettext', 'default', 'extension', 'func', - 'gensub', 'include', 'isarray', 'load', 'lshift', 'mktime', 'or', 'patsplit', - 'rshift', 'stopme', 'strftime', 'strtonum', 'switch', 'systime', 'xor' -}) +lex:add_rule('field', token('field', '$' * S('$+-')^0 * + (float + lexer.word^0 * '(' * P(scanFieldDelimiters) + lexer.word^1 * + ('[' * P(scanFieldDelimiters))^-1 + '"' * P(scanString) + '/' * P(eatRegex) * '/'))) +lex:add_style('field', lexer.styles.label) -local builtInVariable = token('builtInVariable', word_match{ - 'ARGC', 'ARGV', 'CONVFMT', 'ENVIRON', 'FILENAME', 'FNR', 'FS', 'NF', 'NR', - 'OFMT', 'OFS', 'ORS', 'RLENGTH', 'RS', 'RSTART', 'SUBSEP' -}) - -local gawkBuiltInVariable = token('gawkBuiltInVariable', word_match { - 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE', - 'LINT', 'PREC', 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN' -}) - --- Within each group order matters, but the groups themselves (except the --- last) can be in any order. -M._rules = { - {'whitespace', ws}, - - {'comment', comment}, - - {'string', string}, +-- Regular expressions. +-- Slash delimited regular expressions are preceded by most operators or the keywords 'print' +-- and 'case', possibly on a preceding line. They can contain unescaped slashes and brackets +-- in brackets. Some escape sequences like '\S', '\s' have special meanings with Gawk. Tokens +-- that contain them are displayed differently. +lex:add_rule('gawkRegex', token('gawkRegex', SLASH * P(scanGawkRegex))) +lex:add_style('gawkRegex', lexer.styles.preprocessor .. {underlined = true}) +lex:add_rule('regex', token(lexer.REGEX, SLASH * P(scanRegex))) - {'field', field}, +-- Operators. +lex:add_rule('gawkOperator', token('gawkOperator', P("|&") + "@" + "**=" + "**")) +lex:add_style('gawkOperator', lexer.styles.operator .. {underlined = true}) +lex:add_rule('operator', token(lexer.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~'))) - {'gawkRegex', gawkRegex}, - {'regex', regex}, - {'gawkOperator', gawkOperator}, - {'operator', operator}, +-- Numbers. +lex:add_rule('gawkNumber', token('gawkNumber', lexer.hex_num + lexer.oct_num)) +lex:add_style('gawkNumber', lexer.styles.number .. {underlined = true}) +lex:add_rule('number', token(lexer.NUMBER, float)) - {'gawkNumber', gawkNumber}, - {'number', number}, +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do', 'else', 'exit', + 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if', 'in', 'index', 'int', 'length', + 'log', 'match', 'next', 'nextfile', 'print', 'printf', 'rand', 'return', 'sin', 'split', + 'sprintf', 'sqrt', 'srand', 'sub', 'substr', 'system', 'tolower', 'toupper', 'while' +})) + +lex:add_rule('builtInVariable', token('builtInVariable', word_match( + 'ARGC ARGV CONVFMT ENVIRON FILENAME FNR FS NF NR OFMT OFS ORS RLENGTH RS RSTART SUBSEP'))) +lex:add_style('builtInVariable', lexer.styles.constant) + +lex:add_rule('gawkBuiltInVariable', token('gawkBuiltInVariable', word_match{ + 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE', 'LINT', 'PREC', + 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN' +})) +lex:add_style('gawkBuiltInVariable', lexer.styles.constant .. {underlined = true}) - {'keyword', keyword}, - {'builtInVariable', builtInVariable}, - {'gawkKeyword', gawkKeyword}, - {'gawkBuiltInVariable', gawkBuiltInVariable}, - {'function', func}, - {'identifier', identifier}, -} +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, lexer.word * #P('('))) -M._tokenstyles = { - builtInVariable = l.STYLE_CONSTANT, - default = l.STYLE_ERROR, - field = l.STYLE_LABEL, - gawkBuiltInVariable = l.STYLE_CONSTANT..',underlined', - gawkKeyword = l.STYLE_KEYWORD..',underlined', - gawkNumber = l.STYLE_NUMBER..',underlined', - gawkOperator = l.STYLE_OPERATOR..',underlined', - gawkRegex = l.STYLE_PREPROCESSOR..',underlined', - regex = l.STYLE_PREPROCESSOR -} +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -M._foldsymbols = { - _patterns = {'[{}]', '#'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['#'] = l.fold_line_comments('#')} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#')) -return M +return lex diff --git a/lua/lexers/bash.lua b/lua/lexers/bash.lua @@ -1,82 +1,58 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Shell LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'bash'} +local lex = lexer.new('bash') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for', 'do', 'done', + 'continue', 'local', 'return', 'select', + -- Operators. + '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t', '-u', '-w', '-x', + '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o', '-z', '-n', '-eq', '-ne', '-lt', '-le', + '-gt', '-ge' +})) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = l.delimited_range("'", false, true) -local dq_str = l.delimited_range('"') -local ex_str = l.delimited_range('`') +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') +local ex_str = lexer.range('`') local heredoc = '<<' * P(function(input, index) - local s, e, minus, _, delimiter = - input:find('(-?)(["\']?)([%a_][%w_]*)%2[\n\r\f;]+', index) - if s == index and delimiter then - -- If the starting delimiter of a here-doc begins with "-", then - -- spaces are allowed to come before the closing delimiter. - local close_pattern - if minus == '-' then - close_pattern = '[\n\r\f%s]+'..delimiter..'\n' - else - close_pattern = '[\n\r\f]+'..delimiter..'\n' - end - local _, e = input:find(close_pattern, e) - return e and e + 1 or #input + 1 - end + local _, e, _, delimiter = input:find('^%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index) + if not delimiter then return end + _, e = input:find('[\n\r\f]+' .. delimiter, e) + return e and e + 1 or #input + 1 end) -local string = token(l.STRING, sq_str + dq_str + ex_str + heredoc) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc)) --- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for', - 'do', 'done', 'continue', 'local', 'return', 'select', - -- Operators. - '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t', - '-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o', - '-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge' -}, '-')) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Variables. -local variable = token(l.VARIABLE, - '$' * (S('!#?*@$') + l.digit^1 + l.word + - l.delimited_range('{}', true, true, true))) +lex:add_rule('variable', token(lexer.VARIABLE, '$' * + (S('!#?*@$') + lexer.digit^1 + lexer.word + lexer.range('{', '}', true, false, true)))) -- Operators. -local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'variable', variable}, - {'operator', operator}, -} +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))) -M._foldsymbols = { - _patterns = {'[a-z]+', '[{}]', '#'}, - [l.KEYWORD] = { - ['if'] = 1, fi = -1, case = 1, esac = -1, ['do'] = 1, done = -1 - }, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['#'] = l.fold_line_comments('#')} -} +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'if', 'fi') +lex:add_fold_point(lexer.KEYWORD, 'case', 'esac') +lex:add_fold_point(lexer.KEYWORD, 'do', 'done') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#')) -return M +return lex diff --git a/lua/lexers/batch.lua b/lua/lexers/batch.lua @@ -1,71 +1,53 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Batch LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'batch'} +local lex = lexer.new('batch', {case_insensitive_fold_points = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local rem = (P('REM') + 'rem') * l.space -local comment = token(l.COMMENT, (rem + '::') * l.nonnewline^0) - --- Strings. -local string = token(l.STRING, l.delimited_range('"', true)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'cd', 'chdir', 'md', 'mkdir', 'cls', 'for', 'if', 'echo', 'echo.', 'move', - 'copy', 'ren', 'del', 'set', 'call', 'exit', 'setlocal', 'shift', - 'endlocal', 'pause', 'defined', 'exist', 'errorlevel', 'else', 'in', 'do', - 'NUL', 'AUX', 'PRN', 'not', 'goto', 'pushd', 'popd' -}, nil, true)) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match({ + 'cd', 'chdir', 'md', 'mkdir', 'cls', 'for', 'if', 'echo', 'echo.', 'move', 'copy', 'ren', 'del', + 'set', 'call', 'exit', 'setlocal', 'shift', 'endlocal', 'pause', 'defined', 'exist', 'errorlevel', + 'else', 'in', 'do', 'NUL', 'AUX', 'PRN', 'not', 'goto', 'pushd', 'popd' +}, true))) -- Functions. -local func = token(l.FUNCTION, word_match({ - 'APPEND', 'ATTRIB', 'CHKDSK', 'CHOICE', 'DEBUG', 'DEFRAG', 'DELTREE', - 'DISKCOMP', 'DISKCOPY', 'DOSKEY', 'DRVSPACE', 'EMM386', 'EXPAND', 'FASTOPEN', - 'FC', 'FDISK', 'FIND', 'FORMAT', 'GRAPHICS', 'KEYB', 'LABEL', 'LOADFIX', - 'MEM', 'MODE', 'MORE', 'MOVE', 'MSCDEX', 'NLSFUNC', 'POWER', 'PRINT', 'RD', - 'REPLACE', 'RESTORE', 'SETVER', 'SHARE', 'SORT', 'SUBST', 'SYS', 'TREE', - 'UNDELETE', 'UNFORMAT', 'VSAFE', 'XCOPY' -}, nil, true)) +lex:add_rule('function', token(lexer.FUNCTION, word_match({ + 'APPEND', 'ATTRIB', 'CHKDSK', 'CHOICE', 'DEBUG', 'DEFRAG', 'DELTREE', 'DISKCOMP', 'DISKCOPY', + 'DOSKEY', 'DRVSPACE', 'EMM386', 'EXPAND', 'FASTOPEN', 'FC', 'FDISK', 'FIND', 'FORMAT', 'GRAPHICS', + 'KEYB', 'LABEL', 'LOADFIX', 'MEM', 'MODE', 'MORE', 'MOVE', 'MSCDEX', 'NLSFUNC', 'POWER', 'PRINT', + 'RD', 'REPLACE', 'RESTORE', 'SETVER', 'SHARE', 'SORT', 'SUBST', 'SYS', 'TREE', 'UNDELETE', + 'UNFORMAT', 'VSAFE', 'XCOPY' +}, true))) + +-- Comments. +local rem = (P('REM') + 'rem') * #lexer.space +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(rem + '::'))) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Variables. -local variable = token(l.VARIABLE, - '%' * (l.digit + '%' * l.alpha) + - l.delimited_range('%', true, true)) +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) --- Operators. -local operator = token(l.OPERATOR, S('+|&!<>=')) +-- Variables. +local arg = '%' * lexer.digit + '%~' * lexer.alnum^1 +local variable = lexer.range('%', true, false) +lex:add_rule('variable', token(lexer.VARIABLE, arg + variable)) -- Labels. -local label = token(l.LABEL, ':' * l.word) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'function', func}, - {'comment', comment}, - {'identifier', identifier}, - {'string', string}, - {'variable', variable}, - {'label', label}, - {'operator', operator}, -} +lex:add_rule('label', token(lexer.LABEL, ':' * lexer.word)) -M._LEXBYLINE = true +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+|&!<>='))) -M._foldsymbols = { - _patterns = {'[A-Za-z]+'}, - [l.KEYWORD] = {setlocal = 1, endlocal = -1, SETLOCAL = 1, ENDLOCAL = -1} -} +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'setlocal', 'endlocal') -return M +return lex diff --git a/lua/lexers/bibtex.lua b/lua/lexers/bibtex.lua @@ -1,58 +1,46 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Bibtex LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'bibtex'} +local lex = lexer.new('bibtex') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Strings. -local string = token(l.STRING, l.delimited_range('"') + - l.delimited_range('{}', false, true, true)) +local ws = token(lexer.WHITESPACE, lexer.space^1) -- Fields. -local field = token('field', word_match{ - 'author', 'title', 'journal', 'year', 'volume', 'number', 'pages', 'month', - 'note', 'key', 'publisher', 'editor', 'series', 'address', 'edition', - 'howpublished', 'booktitle', 'organization', 'chapter', 'school', - 'institution', 'type', 'isbn', 'issn', 'affiliation', 'issue', 'keyword', - 'url' -}) +lex:add_rule('field', token('field', word_match{ + 'author', 'title', 'journal', 'year', 'volume', 'number', 'pages', 'month', 'note', 'key', + 'publisher', 'editor', 'series', 'address', 'edition', 'howpublished', 'booktitle', + 'organization', 'chapter', 'school', 'institution', 'type', 'isbn', 'issn', 'affiliation', + 'issue', 'keyword', 'url' +})) +lex:add_style('field', lexer.styles.constant) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S(',=')) +-- Strings. +local dq_str = lexer.range('"') +local br_str = lexer.range('{', '}', false, false, true) +lex:add_rule('string', token(lexer.STRING, dq_str + br_str)) -M._rules = { - {'whitespace', ws}, - {'field', field}, - {'identifier', identifier}, - {'string', string}, - {'operator', operator}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(',='))) -- Embedded in Latex. -local latex = l.load('latex') +local latex = lexer.load('latex') -- Embedded Bibtex. -local entry = token('entry', P('@') * word_match({ - 'book', 'article', 'booklet', 'conference', 'inbook', 'incollection', - 'inproceedings', 'manual', 'mastersthesis', 'lambda', 'misc', 'phdthesis', - 'proceedings', 'techreport', 'unpublished' -}, nil, true)) -local bibtex_start_rule = entry * ws^0 * token(l.OPERATOR, P('{')) -local bibtex_end_rule = token(l.OPERATOR, P('}')) -l.embed_lexer(latex, M, bibtex_start_rule, bibtex_end_rule) - -M._tokenstyles = { - field = l.STYLE_CONSTANT, - entry = l.STYLE_PREPROCESSOR -} - -return M +local entry = token('entry', '@' * word_match({ + 'book', 'article', 'booklet', 'conference', 'inbook', 'incollection', 'inproceedings', 'manual', + 'mastersthesis', 'lambda', 'misc', 'phdthesis', 'proceedings', 'techreport', 'unpublished' +}, true)) +lex:add_style('entry', lexer.styles.preprocessor) +local bibtex_start_rule = entry * ws^0 * token(lexer.OPERATOR, '{') +local bibtex_end_rule = token(lexer.OPERATOR, '}') +latex:embed(lex, bibtex_start_rule, bibtex_end_rule) + +return lex diff --git a/lua/lexers/boo.lua b/lua/lexers/boo.lua @@ -1,81 +1,64 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Boo LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'boo'} +local lex = lexer.new('boo') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '#' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 -local regex_str = #('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') * - l.delimited_range('/', true) -local string = token(l.STRING, triple_dq_str + sq_str + dq_str) + - token(l.REGEX, regex_str) - - --- Numbers. -local number = token(l.NUMBER, (l.float + l.integer) * - (S('msdhsfFlL') + 'ms')^-1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'and', 'break', 'cast', 'continue', 'elif', 'else', 'ensure', 'except', 'for', - 'given', 'goto', 'if', 'in', 'isa', 'is', 'not', 'or', 'otherwise', 'pass', - 'raise', 'ref', 'try', 'unless', 'when', 'while', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'and', 'break', 'cast', 'continue', 'elif', 'else', 'ensure', 'except', 'for', 'given', 'goto', + 'if', 'in', 'isa', 'is', 'not', 'or', 'otherwise', 'pass', 'raise', 'ref', 'try', 'unless', + 'when', 'while', -- Definitions. - 'abstract', 'callable', 'class', 'constructor', 'def', 'destructor', 'do', - 'enum', 'event', 'final', 'get', 'interface', 'internal', 'of', 'override', - 'partial', 'private', 'protected', 'public', 'return', 'set', 'static', - 'struct', 'transient', 'virtual', 'yield', + 'abstract', 'callable', 'class', 'constructor', 'def', 'destructor', 'do', 'enum', 'event', + 'final', 'get', 'interface', 'internal', 'of', 'override', 'partial', 'private', 'protected', + 'public', 'return', 'set', 'static', 'struct', 'transient', 'virtual', 'yield', -- Namespaces. 'as', 'from', 'import', 'namespace', -- Other. 'self', 'super', 'null', 'true', 'false' -}) +})) -- Types. -local type = token(l.TYPE, word_match{ - 'bool', 'byte', 'char', 'date', 'decimal', 'double', 'duck', 'float', 'int', - 'long', 'object', 'operator', 'regex', 'sbyte', 'short', 'single', 'string', - 'timespan', 'uint', 'ulong', 'ushort' -}) +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'byte', 'char', 'date', 'decimal', 'double', 'duck', 'float', 'int', 'long', 'object', + 'operator', 'regex', 'sbyte', 'short', 'single', 'string', 'timespan', 'uint', 'ulong', 'ushort' +})) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'array', 'assert', 'checked', 'enumerate', '__eval__', 'filter', 'getter', - 'len', 'lock', 'map', 'matrix', 'max', 'min', 'normalArrayIndexing', 'print', - 'property', 'range', 'rawArrayIndexing', 'required', '__switch__', 'typeof', - 'unchecked', 'using', 'yieldAll', 'zip' -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'array', 'assert', 'checked', 'enumerate', '__eval__', 'filter', 'getter', 'len', 'lock', 'map', + 'matrix', 'max', 'min', 'normalArrayIndexing', 'print', 'property', 'range', 'rawArrayIndexing', + 'required', '__switch__', 'typeof', 'unchecked', 'using', 'yieldAll', 'zip' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local tq_str = lexer.range('"""') +local string = token(lexer.STRING, tq_str + sq_str + dq_str) +local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * lexer.range('/', true) +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) + +-- Comments. +local line_comment = lexer.to_eol('#', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'function', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number * (S('msdhsfFlL') + 'ms')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))) -return M +return lex diff --git a/lua/lexers/caml.lua b/lua/lexers/caml.lua @@ -1,83 +1,63 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- OCaml LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'caml'} +local lex = lexer.new('caml') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, l.nested_pair('(*', '*)')) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done', - 'downto', 'else', 'end', 'exception', 'external', 'failwith', 'false', - 'flush', 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', - 'inherit', 'incr', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor', - 'match', 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open', - 'option', 'or', 'parser', 'private', 'ref', 'rec', 'raise', 'regexp', 'sig', - 'struct', 'stdout', 'stdin', 'stderr', 'then', 'to', 'true', 'try', 'type', - 'val', 'virtual', 'when', 'while', 'with' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done', 'downto', 'else', + 'end', 'exception', 'external', 'failwith', 'false', 'flush', 'for', 'fun', 'function', 'functor', + 'if', 'in', 'include', 'incr', 'inherit', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor', + 'match', 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open', 'option', 'or', + 'parser', 'private', 'raise', 'rec', 'ref', 'regexp', 'sig', 'stderr', 'stdin', 'stdout', + 'struct', 'then', 'to', 'true', 'try', 'type', 'val', 'virtual', 'when', 'while', 'with' +})) -- Types. -local type = token(l.TYPE, word_match{ - 'int', 'float', 'bool', 'char', 'string', 'unit' -}) +lex:add_rule('type', token(lexer.TYPE, word_match('bool char float int string unit'))) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'raise', 'invalid_arg', 'failwith', 'compare', 'min', 'max', 'succ', 'pred', - 'mod', 'abs', 'max_int', 'min_int', 'sqrt', 'exp', 'log', 'log10', 'cos', - 'sin', 'tan', 'acos', 'asin', 'atan', 'atan2', 'cosh', 'sinh', 'tanh', 'ceil', - 'floor', 'abs_float', 'mod_float', 'frexp', 'ldexp', 'modf', 'float', - 'float_of_int', 'truncate', 'int_of_float', 'infinity', 'nan', 'max_float', - 'min_float', 'epsilon_float', 'classify_float', 'int_of_char', 'char_of_int', - 'ignore', 'string_of_bool', 'bool_of_string', 'string_of_int', - 'int_of_string', 'string_of_float', 'float_of_string', 'fst', 'snd', 'stdin', - 'stdout', 'stderr', 'print_char', 'print_string', 'print_int', 'print_float', - 'print_endline', 'print_newline', 'prerr_char', 'prerr_string', 'prerr_int', - 'prerr_float', 'prerr_endline', 'prerr_newline', 'read_line', 'read_int', - 'read_float', 'open_out', 'open_out_bin', 'open_out_gen', 'flush', - 'flush_all', 'output_char', 'output_string', 'output', 'output_byte', - 'output_binary_int', 'output_value', 'seek_out', 'pos_out', - 'out_channel_length', 'close_out', 'close_out_noerr', 'set_binary_mode_out', - 'open_in', 'open_in_bin', 'open_in_gen', 'input_char', 'input_line', 'input', - 'really_input', 'input_byte', 'input_binary_int', 'input_value', 'seek_in', - 'pos_in', 'in_channel_length', 'close_in', 'close_in_noerr', - 'set_binary_mode_in', 'incr', 'decr', 'string_of_format', 'format_of_string', - 'exit', 'at_exit' -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'abs', 'abs_float', 'acos', 'asin', 'atan', 'atan2', 'at_exit', 'bool_of_string', 'ceil', + 'char_of_int', 'classify_float', 'close_in', 'close_in_noerr', 'close_out', 'close_out_noerr', + 'compare', 'cos', 'cosh', 'decr', 'epsilon_float', 'exit', 'exp', 'failwith', 'float', + 'float_of_int', 'float_of_string', 'floor', 'flush', 'flush_all', 'format_of_string', 'frexp', + 'fst', 'ignore', 'in_channel_length', 'incr', 'infinity', 'input', 'input_binary_int', + 'input_byte', 'input_char', 'input_line', 'input_value', 'int_of_char', 'int_of_float', + 'int_of_string', 'invalid_arg', 'ldexp', 'log', 'log10', 'max', 'max_float', 'max_int', 'min', + 'min_float', 'min_int', 'mod', 'modf', 'mod_float', 'nan', 'open_in', 'open_in_bin', + 'open_in_gen', 'open_out', 'open_out_bin', 'open_out_gen', 'out_channel_length', 'output', + 'output_binary_int', 'output_byte', 'output_char', 'output_string', 'output_value', 'pos_in', + 'pos_out', 'pred', 'prerr_char', 'prerr_endline', 'prerr_float', 'prerr_int', 'prerr_newline', + 'prerr_string', 'print_char', 'print_endline', 'print_float', 'print_int', 'print_newline', + 'print_string', 'raise', 'read_float', 'read_int', 'read_line', 'really_input', 'seek_in', + 'seek_out', 'set_binary_mode_in', 'set_binary_mode_out', 'sin', 'sinh', 'snd', 'sqrt', 'stderr', + 'stdin', 'stdout', 'string_of_bool', 'string_of_float', 'string_of_format', 'string_of_int', + 'succ', 'tan', 'tanh', 'truncate' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}')) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('(*', '*)', false, false, true))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'function', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}'))) -return M +return lex diff --git a/lua/lexers/chuck.lua b/lua/lexers/chuck.lua @@ -1,92 +1,68 @@ --- Copyright 2010-2017 Martin Morawetz. See LICENSE. +-- Copyright 2010-2022 Martin Morawetz. See LICENSE. -- ChucK LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'chuck'} +local lex = lexer.new('chuck') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = P('L')^-1 * l.delimited_range("'", true) -local dq_str = P('L')^-1 * l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) - --- Constants. -local constant = token(l.CONSTANT, word_match{ - -- special values - 'false', 'maybe', 'me', 'null', 'NULL', 'pi', 'true' -}) - --- Special special value. -local now = token('now', P('now')) - --- Times. -local time = token('time', word_match{ - 'samp', 'ms', 'second', 'minute', 'hour', 'day', 'week' -}) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ -- Control structures. - 'break', 'continue', 'else', 'for', 'if', 'repeat', 'return', 'switch', - 'until', 'while', + 'break', 'continue', 'else', 'for', 'if', 'repeat', 'return', 'switch', 'until', 'while', -- Other chuck keywords. 'function', 'fun', 'spork', 'const', 'new' -}) +})) --- Classes. -local class = token(l.CLASS, word_match{ - -- Class keywords. - 'class', 'extends', 'implements', 'interface', 'private', 'protected', - 'public', 'pure', 'super', 'static', 'this' -}) +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match{ + -- Special values. + 'false', 'maybe', 'me', 'null', 'NULL', 'pi', 'true' +})) -- Types. -local types = token(l.TYPE, word_match{ - 'float', 'int', 'time', 'dur', 'void', 'same' -}) +lex:add_rule('type', token(lexer.TYPE, word_match('float int time dur void same'))) + +-- Classes. +lex:add_rule('class', token(lexer.CLASS, word_match{ + -- Class keywords. + 'class', 'extends', 'implements', 'interface', 'private', 'protected', 'public', 'pure', 'static', + 'super', 'this' +})) -- Global ugens. -local ugen = token('ugen', word_match{'dac', 'adc', 'blackhole'}) +lex:add_rule('ugen', token('ugen', word_match('dac adc blackhole'))) +lex:add_style('ugen', lexer.styles.constant) + +-- Times. +lex:add_rule('time', token('time', word_match('samp ms second minute hour day week'))) +lex:add_style('time', lexer.styles.number) + +-- Special special value. +lex:add_rule('now', token('now', 'now')) +lex:add_style('now', lexer.styles.constant .. {bold = true}) + +-- Strings. +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. -local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@')) - -M._rules = { - {'whitespace', ws}, - {'string', string}, - {'keyword', keyword}, - {'constant', constant}, - {'type', types}, - {'class', class}, - {'ugen', ugen}, - {'time', time}, - {'now', now}, - {'identifier', identifier}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} - -M._tokenstyles = { - ugen = l.STYLE_CONSTANT, - time = l.STYLE_NUMBER, - now = l.STYLE_CONSTANT..',bold' -} - -return M +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@'))) + +return lex diff --git a/lua/lexers/clojure.lua b/lua/lexers/clojure.lua @@ -1,193 +1,147 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2018-2022 Mitchell. See LICENSE. -- Clojure LPeg lexer. +-- Contributed by Christos Chatzifountas. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'clojure'} +local lex = lexer.new('clojure') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = ';' * l.nonnewline^0 -local block_comment = '#_(' * (l.any - ')')^0 * P(')') -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local string = token(l.STRING, l.delimited_range('"')) - --- Numbers. -local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'fn','try','catch','finaly','defonce', - 'and', 'case', 'cond', 'def', 'defn', 'defmacro', - 'do', 'else', 'when', 'when-let', 'if-let', 'if', 'let', 'loop', - 'or', 'recur', 'quote', -}, '-*!')) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'fn', 'try', 'catch', 'finaly', 'defonce', 'and', 'case', 'cond', 'def', 'defn', 'defmacro', 'do', + 'else', 'when', 'when-let', 'if-let', 'if', 'let', 'loop', 'or', 'recur', 'quote' +})) -- Functions. -local func = token(l.FUNCTION, word_match({ - '*', '+', '-', '->ArrayChunk', '->Eduction', '->Vec', - '->VecNode', '->VecSeq', '/', '<', '<=', '=', '==', '>', '>=', - 'StackTraceElement->vec', 'Throwable->map', 'accessor', 'aclone', - 'add-classpath', 'add-watch', 'agent', 'agent-error', 'agent-errors', 'aget', - 'alength', 'alias', 'all-ns', 'alter', 'alter-meta!', 'alter-var-root', - 'ancestors', 'any?', 'apply', 'array-map', 'aset', 'aset-boolean', - 'aset-byte', 'aset-char', 'aset-double', 'aset-float', 'aset-int', - 'aset-long', 'aset-short', 'assoc', 'assoc!', 'assoc-in', 'associative?', - 'atom', 'await', 'await-for', 'bases', 'bean', 'bigdec', 'bigint', - 'biginteger', 'bit-and', 'bit-and-not', 'bit-clear', 'bit-flip', 'bit-not', - 'bit-or', 'bit-set', 'bit-shift-left', 'bit-shift-right', 'bit-test', - 'bit-xor', 'boolean', 'boolean-array', 'boolean?', 'booleans', 'bound-fn*', - 'bound?', 'bounded-count', 'butlast', 'byte', 'byte-array', 'bytes', 'bytes?', - 'cast', 'cat', 'char', 'char-array', 'char?', 'chars', 'class', 'class?', - 'clear-agent-errors', 'clojure-version', 'coll?', 'commute', 'comp', - 'comparator', 'compare', 'compare-and-set!', 'compile', 'complement', - 'completing', 'concat', 'conj', 'conj!', 'cons', 'constantly', - 'construct-proxy', 'contains?', 'count', 'counted?', 'create-ns', - 'create-struct', 'cycle', 'dec', 'decimal?', 'dedupe', 'delay?', - 'deliver', 'denominator', 'deref', 'derive', 'descendants', 'disj', 'disj!', - 'dissoc', 'dissoc!', 'distinct', 'distinct?', 'doall', 'dorun', 'double', - 'double-array', 'double?', 'doubles', 'drop', 'drop-last', 'drop-while', - 'eduction', 'empty', 'empty?', 'ensure', 'ensure-reduced', 'enumeration-seq', - 'error-handler', 'error-mode', 'eval', 'even?', 'every-pred', 'every?', - 'ex-data', 'ex-info', 'extend', 'extenders', 'extends?', 'false?', 'ffirst', - 'file-seq', 'filter', 'filterv', 'find', 'find-keyword', 'find-ns', - 'find-var', 'first', 'flatten', 'float', 'float-array', 'float?', 'floats', - 'flush', 'fn?', 'fnext', 'fnil', 'force', 'format', 'frequencies', - 'future-call', 'future-cancel', 'future-cancelled?', 'future-done?', - 'future?', 'gensym', 'get', 'get-in', 'get-method', 'get-proxy-class', - 'get-thread-bindings', 'get-validator', 'group-by', 'halt-when', 'hash', - 'hash-map', 'hash-ordered-coll', 'hash-set', 'hash-unordered-coll', 'ident?', - 'identical?', 'identity', 'ifn?', 'in-ns', 'inc', 'inc', 'indexed?', - 'init-proxy', 'inst-ms', 'inst?', 'instance?', 'int', 'int-array', 'int?', - 'integer?', 'interleave', 'intern', 'interpose', 'into', 'into-array', 'ints', - 'isa?', 'iterate', 'iterator-seq', 'juxt', 'keep', 'keep-indexed', 'key', - 'keys', 'keyword', 'keyword?', 'last', 'line-seq', 'list', 'list*', 'list?', - 'load', 'load-file', 'load-reader', 'load-string', 'loaded-libs', 'long', - 'long-array', 'longs', 'macroexpand', 'macroexpand-1', 'make-array', - 'make-hierarchy', 'map', 'map-entry?', 'map-indexed', 'map?', 'mapcat', - 'mapv', 'max', 'max-key', 'memoize', 'merge', 'merge-with', 'meta', 'methods', - 'min', 'min-key', 'mix-collection-hash', 'mod', 'name', 'namespace', - 'namespace-munge', 'nat-int?', 'neg-int?', 'neg?', 'newline', 'next', - 'nfirst', 'nil?', 'nnext', 'not', 'not-any?', 'not-empty', 'not-every?', - 'not=', 'ns-aliases', 'ns-imports', 'ns-interns', 'ns-map', 'ns-name', - 'ns-publics', 'ns-refers', 'ns-resolve', 'ns-unalias', 'ns-unmap', 'nth', - 'nthnext', 'nthrest', 'num', 'number?', 'numerator', 'object-array', 'odd?', - 'parents', 'partial', 'partition', 'partition-all', 'partition-by', 'pcalls', - 'peek', 'persistent!', 'pmap', 'pop', 'pop!', 'pop-thread-bindings', - 'pos-int?', 'pos?', 'pr-str', 'prefer-method', 'prefers', 'print', - 'print-str', 'printf', 'println', 'println-str', 'prn', 'prn-str', 'promise', - 'proxy-mappings', 'push-thread-bindings', 'qualified-ident?', - 'qualified-keyword?', 'qualified-symbol?', 'quot', 'rand', 'rand-int', - 'rand-nth', 'random-sample', 'range', 'ratio?', 'rational?', 'rationalize', - 're-find', 're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq', - 'read', 'read-line', 'read-string', 'reader-conditional', - 'reader-conditional?', 'realized?', 'record?', 'reduce', 'reduce-kv', - 'reduced', 'reduced?', 'reductions', 'ref', 'ref-history-count', - 'ref-max-history', 'ref-min-history', 'ref-set', 'refer', - 'release-pending-sends', 'rem', 'remove', 'remove-all-methods', - 'remove-method', 'remove-ns', 'remove-watch', 'repeat', 'repeatedly', - 'replace', 'replicate', 'require', 'reset!', 'reset-meta!', 'reset-vals!', - 'resolve', 'rest', 'restart-agent', 'resultset-seq', 'reverse', 'reversible?', - 'rseq', 'rsubseq', 'run!', 'satisfies?', 'second', 'select-keys', 'send', - 'send-off', 'send-via', 'seq', 'seq?', 'seqable?', 'seque', 'sequence', - 'sequential?', 'set', 'set-agent-send-executor!', - 'set-agent-send-off-executor!', 'set-error-handler!', 'set-error-mode!', - 'set-validator!', 'set?', 'short', 'short-array', 'shorts', 'shuffle', - 'shutdown-agents', 'simple-ident?', 'simple-keyword?', 'simple-symbol?', - 'slurp', 'some', 'some-fn', 'some?', 'sort', 'sort-by', 'sorted-map', - 'sorted-map-by', 'sorted-set', 'sorted-set-by', 'sorted?', 'special-symbol?', - 'spit', 'split-at', 'split-with', 'str', 'string?', 'struct', 'struct-map', - 'subs', 'subseq', 'subvec', 'supers', 'swap!', 'swap-vals!', 'symbol', - 'symbol?', 'tagged-literal', 'tagged-literal?', 'take', 'take-last', - 'take-nth', 'take-while', 'test', 'the-ns', 'thread-bound?', 'to-array', - 'to-array-2d', 'trampoline', 'transduce', 'transient', 'tree-seq', 'true?', - 'type', 'unchecked-add', 'unchecked-add-int', 'unchecked-byte', - 'unchecked-char', 'unchecked-dec', 'unchecked-dec-int', - 'unchecked-divide-int', 'unchecked-double', 'unchecked-float', - 'unchecked-inc', 'unchecked-inc-int', 'unchecked-int', 'unchecked-long', - 'unchecked-multiply', 'unchecked-multiply-int', 'unchecked-negate', - 'unchecked-negate-int', 'unchecked-remainder-int', 'unchecked-short', - 'unchecked-subtract', 'unchecked-subtract-int', 'underive', 'unreduced', - 'unsigned-bit-shift-right', 'update', 'update-in', 'update-proxy', 'uri?', - 'use', 'uuid?', 'val', 'vals', 'var-get', 'var-set', 'var?', 'vary-meta', - 'vec', 'vector', 'vector-of', 'vector?', 'volatile!', 'volatile?', 'vreset!', - 'with-bindings*', 'with-meta', 'with-redefs-fn', 'xml-seq', 'zero?', 'zipmap', - 'diff-similar', 'equality-partition', 'diff', 'inspect', 'inspect-table', - 'inspect-tree', '', 'validated', 'browse-url', 'as-file', 'as-url', - 'make-input-stream', 'make-output-stream', 'make-reader', 'make-writer', - 'as-relative-path', 'copy', 'delete-file', 'file', 'input-stream', - 'make-parents', 'output-stream', 'reader', 'resource', 'writer', - 'add-local-javadoc', 'add-remote-javadoc', 'javadoc', 'sh', 'demunge', - 'load-script', 'main', 'repl', 'repl-caught', 'repl-exception', 'repl-prompt', - 'repl-read', 'root-cause', 'skip-if-eol', 'skip-whitespace', - 'stack-element-str', 'cl-format', 'fresh-line', 'get-pretty-writer', 'pprint', - 'pprint-indent', 'pprint-newline', 'pprint-tab', 'print-table', - 'set-pprint-dispatch', 'write', 'write-out', 'resolve-class', 'do-reflect', - 'typename', '->AsmReflector', '->Constructor', '->Field', '->JavaReflector', - '->Method', 'map->Constructor', 'map->Field', 'map->Method', 'reflect', - 'type-reflect', 'apropos', 'dir-fn', 'find-doc', 'pst', 'set-break-handler!', - 'source-fn', 'thread-stopper', 'difference', 'index', 'intersection', 'join', - 'map-invert', 'project', 'rename', 'rename-keys', 'select', 'subset?', - 'superset?', 'union', 'e', 'print-cause-trace', 'print-stack-trace', - 'print-throwable', 'print-trace-element', 'blank?', 'capitalize', - 'ends-with?', 'escape', 'includes?', 'index-of', 'last-index-of', - 'lower-case', 're-quote-replacement', 'replace-first', 'split', 'split-lines', - 'starts-with?', 'trim', 'trim-newline', 'triml', 'trimr', 'upper-case', - 'apply-template', 'assert-any', 'assert-predicate', 'compose-fixtures', - 'do-report', 'file-position', 'function?', 'get-possibly-unbound-var', - 'inc-report-counter', 'join-fixtures', 'run-all-tests', 'run-tests', - 'successful?', 'test-all-vars', 'test-ns', 'test-vars', - 'testing-contexts-str', 'testing-vars-str', 'keywordize-keys', - 'macroexpand-all', 'postwalk', 'postwalk-demo', 'postwalk-replace', 'prewalk', - 'prewalk-demo', 'prewalk-replace', 'stringify-keys', 'walk', 'append-child', - 'branch?', 'children', 'down', 'edit', 'end?', 'insert-child', 'insert-left', - 'insert-right', 'left', 'leftmost', 'lefts', 'make-node', 'node', 'path', - 'prev', 'right', 'rightmost', 'rights', 'root', 'seq-zip', 'up', 'vector-zip', - 'xml-zip', 'zipper' -}, '-/<>!?=#\'')) - --- Identifiers. -local word = (l.alpha + S('-!?*$=-')) * (l.alnum + S('.-!?*$+-'))^0 -local identifier = token(l.IDENTIFIER, word) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + '*', '+', '-', '->ArrayChunk', '->Eduction', '->Vec', '->VecNode', '->VecSeq', '/', '<', '<=', + '=', '==', '>', '>=', 'StackTraceElement->vec', 'Throwable->map', 'accessor', 'aclone', + 'add-classpath', 'add-watch', 'agent', 'agent-error', 'agent-errors', 'aget', 'alength', 'alias', + 'all-ns', 'alter', 'alter-meta!', 'alter-var-root', 'ancestors', 'any?', 'apply', 'array-map', + 'aset', 'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float', 'aset-int', + 'aset-long', 'aset-short', 'assoc', 'assoc!', 'assoc-in', 'associative?', 'atom', 'await', + 'await-for', 'bases', 'bean', 'bigdec', 'bigint', 'biginteger', 'bit-and', 'bit-and-not', + 'bit-clear', 'bit-flip', 'bit-not', 'bit-or', 'bit-set', 'bit-shift-left', 'bit-shift-right', + 'bit-test', 'bit-xor', 'boolean', 'boolean-array', 'boolean?', 'booleans', 'bound-fn*', 'bound?', + 'bounded-count', 'butlast', 'byte', 'byte-array', 'bytes', 'bytes?', 'cast', 'cat', 'char', + 'char-array', 'char?', 'chars', 'class', 'class?', 'clear-agent-errors', 'clojure-version', + 'coll?', 'commute', 'comp', 'comparator', 'compare', 'compare-and-set!', 'compile', 'complement', + 'completing', 'concat', 'conj', 'conj!', 'cons', 'constantly', 'construct-proxy', 'contains?', + 'count', 'counted?', 'create-ns', 'create-struct', 'cycle', 'dec', 'decimal?', 'dedupe', 'delay?', + 'deliver', 'denominator', 'deref', 'derive', 'descendants', 'disj', 'disj!', 'dissoc', 'dissoc!', + 'distinct', 'distinct?', 'doall', 'dorun', 'double', 'double-array', 'double?', 'doubles', 'drop', + 'drop-last', 'drop-while', 'eduction', 'empty', 'empty?', 'ensure', 'ensure-reduced', + 'enumeration-seq', 'error-handler', 'error-mode', 'eval', 'even?', 'every-pred', 'every?', + 'ex-data', 'ex-info', 'extend', 'extenders', 'extends?', 'false?', 'ffirst', 'file-seq', 'filter', + 'filterv', 'find', 'find-keyword', 'find-ns', 'find-var', 'first', 'flatten', 'float', + 'float-array', 'float?', 'floats', 'flush', 'fn?', 'fnext', 'fnil', 'force', 'format', + 'frequencies', 'future-call', 'future-cancel', 'future-cancelled?', 'future-done?', 'future?', + 'gensym', 'get', 'get-in', 'get-method', 'get-proxy-class', 'get-thread-bindings', + 'get-validator', 'group-by', 'halt-when', 'hash', 'hash-map', 'hash-ordered-coll', 'hash-set', + 'hash-unordered-coll', 'ident?', 'identical?', 'identity', 'ifn?', 'in-ns', 'inc', 'inc', + 'indexed?', 'init-proxy', 'inst-ms', 'inst?', 'instance?', 'int', 'int-array', 'int?', 'integer?', + 'interleave', 'intern', 'interpose', 'into', 'into-array', 'ints', 'isa?', 'iterate', + 'iterator-seq', 'juxt', 'keep', 'keep-indexed', 'key', 'keys', 'keyword', 'keyword?', 'last', + 'line-seq', 'list', 'list*', 'list?', 'load', 'load-file', 'load-reader', 'load-string', + 'loaded-libs', 'long', 'long-array', 'longs', 'macroexpand', 'macroexpand-1', 'make-array', + 'make-hierarchy', 'map', 'map-entry?', 'map-indexed', 'map?', 'mapcat', 'mapv', 'max', 'max-key', + 'memoize', 'merge', 'merge-with', 'meta', 'methods', 'min', 'min-key', 'mix-collection-hash', + 'mod', 'name', 'namespace', 'namespace-munge', 'nat-int?', 'neg-int?', 'neg?', 'newline', 'next', + 'nfirst', 'nil?', 'nnext', 'not', 'not-any?', 'not-empty', 'not-every?', 'not=', 'ns-aliases', + 'ns-imports', 'ns-interns', 'ns-map', 'ns-name', 'ns-publics', 'ns-refers', 'ns-resolve', + 'ns-unalias', 'ns-unmap', 'nth', 'nthnext', 'nthrest', 'num', 'number?', 'numerator', + 'object-array', 'odd?', 'parents', 'partial', 'partition', 'partition-all', 'partition-by', + 'pcalls', 'peek', 'persistent!', 'pmap', 'pop', 'pop!', 'pop-thread-bindings', 'pos-int?', 'pos?', + 'pr-str', 'prefer-method', 'prefers', 'print', 'print-str', 'printf', 'println', 'println-str', + 'prn', 'prn-str', 'promise', 'proxy-mappings', 'push-thread-bindings', 'qualified-ident?', + 'qualified-keyword?', 'qualified-symbol?', 'quot', 'rand', 'rand-int', 'rand-nth', + 'random-sample', 'range', 'ratio?', 'rational?', 'rationalize', 're-find', 're-groups', + 're-matcher', 're-matches', 're-pattern', 're-seq', 'read', 'read-line', 'read-string', + 'reader-conditional', 'reader-conditional?', 'realized?', 'record?', 'reduce', 'reduce-kv', + 'reduced', 'reduced?', 'reductions', 'ref', 'ref-history-count', 'ref-max-history', + 'ref-min-history', 'ref-set', 'refer', 'release-pending-sends', 'rem', 'remove', + 'remove-all-methods', 'remove-method', 'remove-ns', 'remove-watch', 'repeat', 'repeatedly', + 'replace', 'replicate', 'require', 'reset!', 'reset-meta!', 'reset-vals!', 'resolve', 'rest', + 'restart-agent', 'resultset-seq', 'reverse', 'reversible?', 'rseq', 'rsubseq', 'run!', + 'satisfies?', 'second', 'select-keys', 'send', 'send-off', 'send-via', 'seq', 'seq?', 'seqable?', + 'seque', 'sequence', 'sequential?', 'set', 'set-agent-send-executor!', + 'set-agent-send-off-executor!', 'set-error-handler!', 'set-error-mode!', 'set-validator!', 'set?', + 'short', 'short-array', 'shorts', 'shuffle', 'shutdown-agents', 'simple-ident?', + 'simple-keyword?', 'simple-symbol?', 'slurp', 'some', 'some-fn', 'some?', 'sort', 'sort-by', + 'sorted-map', 'sorted-map-by', 'sorted-set', 'sorted-set-by', 'sorted?', 'special-symbol?', + 'spit', 'split-at', 'split-with', 'str', 'string?', 'struct', 'struct-map', 'subs', 'subseq', + 'subvec', 'supers', 'swap!', 'swap-vals!', 'symbol', 'symbol?', 'tagged-literal', + 'tagged-literal?', 'take', 'take-last', 'take-nth', 'take-while', 'test', 'the-ns', + 'thread-bound?', 'to-array', 'to-array-2d', 'trampoline', 'transduce', 'transient', 'tree-seq', + 'true?', 'type', 'unchecked-add', 'unchecked-add-int', 'unchecked-byte', 'unchecked-char', + 'unchecked-dec', 'unchecked-dec-int', 'unchecked-divide-int', 'unchecked-double', + 'unchecked-float', 'unchecked-inc', 'unchecked-inc-int', 'unchecked-int', 'unchecked-long', + 'unchecked-multiply', 'unchecked-multiply-int', 'unchecked-negate', 'unchecked-negate-int', + 'unchecked-remainder-int', 'unchecked-short', 'unchecked-subtract', 'unchecked-subtract-int', + 'underive', 'unreduced', 'unsigned-bit-shift-right', 'update', 'update-in', 'update-proxy', + 'uri?', 'use', 'uuid?', 'val', 'vals', 'var-get', 'var-set', 'var?', 'vary-meta', 'vec', 'vector', + 'vector-of', 'vector?', 'volatile!', 'volatile?', 'vreset!', 'with-bindings*', 'with-meta', + 'with-redefs-fn', 'xml-seq', 'zero?', 'zipmap', 'diff-similar', 'equality-partition', 'diff', + 'inspect', 'inspect-table', 'inspect-tree', 'validated', 'browse-url', 'as-file', 'as-url', + 'make-input-stream', 'make-output-stream', 'make-reader', 'make-writer', 'as-relative-path', + 'copy', 'delete-file', 'file', 'input-stream', 'make-parents', 'output-stream', 'reader', + 'resource', 'writer', 'add-local-javadoc', 'add-remote-javadoc', 'javadoc', 'sh', 'demunge', + 'load-script', 'main', 'repl', 'repl-caught', 'repl-exception', 'repl-prompt', 'repl-read', + 'root-cause', 'skip-if-eol', 'skip-whitespace', 'stack-element-str', 'cl-format', 'fresh-line', + 'get-pretty-writer', 'pprint', 'pprint-indent', 'pprint-newline', 'pprint-tab', 'print-table', + 'set-pprint-dispatch', 'write', 'write-out', 'resolve-class', 'do-reflect', 'typename', + '->AsmReflector', '->Constructor', '->Field', '->JavaReflector', '->Method', 'map->Constructor', + 'map->Field', 'map->Method', 'reflect', 'type-reflect', 'apropos', 'dir-fn', 'find-doc', 'pst', + 'set-break-handler!', 'source-fn', 'thread-stopper', 'difference', 'index', 'intersection', + 'join', 'map-invert', 'project', 'rename', 'rename-keys', 'select', 'subset?', 'superset?', + 'union', 'e', 'print-cause-trace', 'print-stack-trace', 'print-throwable', 'print-trace-element', + 'blank?', 'capitalize', 'ends-with?', 'escape', 'includes?', 'index-of', 'last-index-of', + 'lower-case', 're-quote-replacement', 'replace-first', 'split', 'split-lines', 'starts-with?', + 'trim', 'trim-newline', 'triml', 'trimr', 'upper-case', 'apply-template', 'assert-any', + 'assert-predicate', 'compose-fixtures', 'do-report', 'file-position', 'function?', + 'get-possibly-unbound-var', 'inc-report-counter', 'join-fixtures', 'run-all-tests', 'run-tests', + 'successful?', 'test-all-vars', 'test-ns', 'test-vars', 'testing-contexts-str', + 'testing-vars-str', 'keywordize-keys', 'macroexpand-all', 'postwalk', 'postwalk-demo', + 'postwalk-replace', 'prewalk', 'prewalk-demo', 'prewalk-replace', 'stringify-keys', 'walk', + 'append-child', 'branch?', 'children', 'down', 'edit', 'end?', 'insert-child', 'insert-left', + 'insert-right', 'left', 'leftmost', 'lefts', 'make-node', 'node', 'path', 'prev', 'right', + 'rightmost', 'rights', 'root', 'seq-zip', 'up', 'vector-zip', 'xml-zip', 'zipper' +})) --- Operators. -local operator = token(l.OPERATOR, S('`@()')) - --- Clojure keywords -local clojure_keyword = token('clojure_keyword', ':' * S(':')^-1 * word * ('/' * word )^-1) -local clojure_symbol = token('clojure_symbol', "\'" * word * ('/' * word )^-1 ) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'func', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, - {"clojure_keyword", clojure_keyword}, - {"clojure_symbol", clojure_symbol} -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * (S('./') * lexer.digit^1)^-1)) +-- Identifiers. +local word = (lexer.alpha + S('-!?*$=-')) * (lexer.alnum + S('.-!?*$+-'))^0 +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -M._tokenstyles = { - clojure_keyword = l.STYLE_TYPE, - clojure_symbol = l.STYLE_TYPE..',bold', -} +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) -M._foldsymbols = { - _patterns = {'[%(%)%[%]{}]', ';'}, - [l.OPERATOR] = { - ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1 - }, - [l.COMMENT] = {['#_('] = 1, [';'] = l.fold_line_comments(';')} -} +-- Comments. +local line_comment = lexer.to_eol(';') +local block_comment = lexer.range('#_(', ')') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -return M +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('`@()'))) + +-- Clojure keywords. +lex:add_rule('clojure_keyword', token('clojure_keyword', ':' * S(':')^-1 * word * ('/' * word)^-1)) +lex:add_style('clojure_keyword', lexer.styles.type) +lex:add_rule('clojure_symbol', token('clojure_symbol', "\'" * word * ('/' * word)^-1)) +lex:add_style('clojure_symbol', lexer.styles.type .. {bold = true}) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '#_(', ')') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';')) +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') + +return lex diff --git a/lua/lexers/cmake.lua b/lua/lexers/cmake.lua @@ -1,173 +1,132 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- CMake LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'cmake'} +local lex = lexer.new('cmake') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) - --- Strings. -local string = token(l.STRING, l.delimited_range('"')) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'IF', 'ENDIF', 'FOREACH', 'ENDFOREACH', 'WHILE', 'ENDWHILE', 'ELSE', 'ELSEIF' -}, nil, true)) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match( + 'IF ENDIF FOREACH ENDFOREACH WHILE ENDWHILE ELSE ELSEIF', true))) -- Commands. -local command = token(l.FUNCTION, word_match({ - 'ADD_CUSTOM_COMMAND', 'ADD_CUSTOM_TARGET', 'ADD_DEFINITIONS', - 'ADD_DEPENDENCIES', 'ADD_EXECUTABLE', 'ADD_LIBRARY', 'ADD_SUBDIRECTORY', - 'ADD_TEST', 'AUX_SOURCE_DIRECTORY', 'BUILD_COMMAND', 'BUILD_NAME', - 'CMAKE_MINIMUM_REQUIRED', 'CONFIGURE_FILE', 'CREATE_TEST_SOURCELIST', - 'ENABLE_LANGUAGE', 'ENABLE_TESTING', 'ENDMACRO', 'EXECUTE_PROCESS', - 'EXEC_PROGRAM', 'EXPORT_LIBRARY_DEPENDENCIES', 'FILE', 'FIND_FILE', - 'FIND_LIBRARY', 'FIND_PACKAGE', 'FIND_PATH', 'FIND_PROGRAM', 'FLTK_WRAP_UI', - 'GET_CMAKE_PROPERTY', 'GET_DIRECTORY_PROPERTY', 'GET_FILENAME_COMPONENT', - 'GET_SOURCE_FILE_PROPERTY', 'GET_TARGET_PROPERTY', 'GET_TEST_PROPERTY', - 'INCLUDE', 'INCLUDE_DIRECTORIES', 'INCLUDE_EXTERNAL_MSPROJECT', - 'INCLUDE_REGULAR_EXPRESSION', 'INSTALL', 'INSTALL_FILES', 'INSTALL_PROGRAMS', - 'INSTALL_TARGETS', 'LINK_DIRECTORIES', 'LINK_LIBRARIES', 'LIST', 'LOAD_CACHE', - 'LOAD_COMMAND', 'MACRO', 'MAKE_DIRECTORY', 'MARK_AS_ADVANCED', 'MATH', - 'MESSAGE', 'OPTION', 'OUTPUT_REQUIRED_FILES', 'PROJECT', 'QT_WRAP_CPP', - 'QT_WRAP_UI', 'REMOVE', 'REMOVE_DEFINITIONS', 'SEPARATE_ARGUMENTS', 'SET', - 'SET_DIRECTORY_PROPERTIES', 'SET_SOURCE_FILES_PROPERTIES', - 'SET_TARGET_PROPERTIES', 'SET_TESTS_PROPERTIES', 'SITE_NAME', 'SOURCE_GROUP', - 'STRING', 'SUBDIRS', 'SUBDIR_DEPENDS', 'TARGET_LINK_LIBRARIES', 'TRY_COMPILE', - 'TRY_RUN', 'USE_MANGLED_MESA', 'UTILITY_SOURCE', 'VARIABLE_REQUIRES', - 'VTK_MAKE_INSTANTIATOR', 'VTK_WRAP_JAVA', 'VTK_WRAP_PYTHON', 'VTK_WRAP_TCL', - 'WRITE_FILE', -}, nil, true)) +lex:add_rule('command', token(lexer.FUNCTION, word_match({ + 'ADD_CUSTOM_COMMAND', 'ADD_CUSTOM_TARGET', 'ADD_DEFINITIONS', 'ADD_DEPENDENCIES', + 'ADD_EXECUTABLE', 'ADD_LIBRARY', 'ADD_SUBDIRECTORY', 'ADD_TEST', 'AUX_SOURCE_DIRECTORY', + 'BUILD_COMMAND', 'BUILD_NAME', 'CMAKE_MINIMUM_REQUIRED', 'CONFIGURE_FILE', + 'CREATE_TEST_SOURCELIST', 'ENABLE_LANGUAGE', 'ENABLE_TESTING', 'ENDMACRO', 'EXEC_PROGRAM', + 'EXECUTE_PROCESS', 'EXPORT_LIBRARY_DEPENDENCIES', 'FILE', 'FIND_FILE', 'FIND_LIBRARY', + 'FIND_PACKAGE', 'FIND_PATH', 'FIND_PROGRAM', 'FLTK_WRAP_UI', 'GET_CMAKE_PROPERTY', + 'GET_DIRECTORY_PROPERTY', 'GET_FILENAME_COMPONENT', 'GET_SOURCE_FILE_PROPERTY', + 'GET_TARGET_PROPERTY', 'GET_TEST_PROPERTY', 'INCLUDE', 'INCLUDE_DIRECTORIES', + 'INCLUDE_EXTERNAL_MSPROJECT', 'INCLUDE_REGULAR_EXPRESSION', 'INSTALL', 'INSTALL_FILES', + 'INSTALL_PROGRAMS', 'INSTALL_TARGETS', 'LINK_DIRECTORIES', 'LINK_LIBRARIES', 'LIST', 'LOAD_CACHE', + 'LOAD_COMMAND', 'MACRO', 'MAKE_DIRECTORY', 'MARK_AS_ADVANCED', 'MATH', 'MESSAGE', 'OPTION', + 'OUTPUT_REQUIRED_FILES', 'PROJECT', 'QT_WRAP_CPP', 'QT_WRAP_UI', 'REMOVE', 'REMOVE_DEFINITIONS', + 'SEPARATE_ARGUMENTS', 'SET', 'SET_DIRECTORY_PROPERTIES', 'SET_SOURCE_FILES_PROPERTIES', + 'SET_TARGET_PROPERTIES', 'SET_TESTS_PROPERTIES', 'SITE_NAME', 'SOURCE_GROUP', 'STRING', + 'SUBDIR_DEPENDS', 'SUBDIRS', 'TARGET_LINK_LIBRARIES', 'TRY_COMPILE', 'TRY_RUN', + 'USE_MANGLED_MESA', 'UTILITY_SOURCE', 'VARIABLE_REQUIRES', 'VTK_MAKE_INSTANTIATOR', + 'VTK_WRAP_JAVA', 'VTK_WRAP_PYTHON', 'VTK_WRAP_TCL', 'WRITE_FILE' +}, true))) -- Constants. -local constant = token(l.CONSTANT, word_match({ - 'BOOL', 'CACHE', 'FALSE', 'N', 'NO', 'ON', 'OFF', 'NOTFOUND', 'TRUE' -}, nil, true)) +lex:add_rule('constant', + token(lexer.CONSTANT, word_match('BOOL CACHE FALSE N NO ON OFF NOTFOUND TRUE', true))) -- Variables. -local variable = token(l.VARIABLE, word_match{ - 'APPLE', 'BORLAND', 'CMAKE_AR', 'CMAKE_BACKWARDS_COMPATIBILITY', - 'CMAKE_BASE_NAME', 'CMAKE_BINARY_DIR', 'CMAKE_BUILD_TOOL', 'CMAKE_BUILD_TYPE', - 'CMAKE_CACHEFILE_DIR', 'CMAKE_CACHE_MAJOR_VERSION', - 'CMAKE_CACHE_MINOR_VERSION', 'CMAKE_CACHE_RELEASE_VERSION', - 'CMAKE_CFG_INTDIR', 'CMAKE_COLOR_MAKEFILE', 'CMAKE_COMMAND', - 'CMAKE_COMPILER_IS_GNUCC', 'CMAKE_COMPILER_IS_GNUCC_RUN', - 'CMAKE_COMPILER_IS_GNUCXX', 'CMAKE_COMPILER_IS_GNUCXX_RUN', +lex:add_rule('variable', token(lexer.VARIABLE, word_match{ + 'APPLE', 'ARGS', 'BORLAND', 'CMAKE_AR', 'CMAKE_BACKWARDS_COMPATIBILITY', 'CMAKE_BASE_NAME', + 'CMAKE_BINARY_DIR', 'CMAKE_BUILD_TOOL', 'CMAKE_BUILD_TYPE', 'CMAKE_CACHEFILE_DIR', + 'CMAKE_CACHE_MAJOR_VERSION', 'CMAKE_CACHE_MINOR_VERSION', 'CMAKE_CACHE_RELEASE_VERSION', + 'CMAKE_C_COMPILE_OBJECT', 'CMAKE_C_COMPILER', 'CMAKE_C_COMPILER_ARG1', 'CMAKE_C_COMPILER_ENV_VAR', + 'CMAKE_C_COMPILER_FULLPATH', 'CMAKE_C_COMPILER_LOADED', 'CMAKE_C_COMPILER_WORKS', + 'CMAKE_C_CREATE_SHARED_LIBRARY', 'CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS', + 'CMAKE_C_CREATE_SHARED_MODULE', 'CMAKE_C_CREATE_STATIC_LIBRARY', 'CMAKE_CFG_INTDIR', + 'CMAKE_C_FLAGS', 'CMAKE_C_FLAGS_DEBUG', 'CMAKE_C_FLAGS_DEBUG_INIT', 'CMAKE_C_FLAGS_INIT', + 'CMAKE_C_FLAGS_MINSIZEREL', 'CMAKE_C_FLAGS_MINSIZEREL_INIT', 'CMAKE_C_FLAGS_RELEASE', + 'CMAKE_C_FLAGS_RELEASE_INIT', 'CMAKE_C_FLAGS_RELWITHDEBINFO', 'CMAKE_C_FLAGS_RELWITHDEBINFO_INIT', + 'CMAKE_C_IGNORE_EXTENSIONS', 'CMAKE_C_INFORMATION_LOADED', 'CMAKE_C_LINKER_PREFERENCE', + 'CMAKE_C_LINK_EXECUTABLE', 'CMAKE_C_LINK_FLAGS', 'CMAKE_COLOR_MAKEFILE', 'CMAKE_COMMAND', + 'CMAKE_COMPILER_IS_GNUCC', 'CMAKE_COMPILER_IS_GNUCC_RUN', 'CMAKE_COMPILER_IS_GNUCXX', + 'CMAKE_COMPILER_IS_GNUCXX_RUN', 'CMAKE_C_OUTPUT_EXTENSION', 'CMAKE_C_SOURCE_FILE_EXTENSIONS', 'CMAKE_CTEST_COMMAND', 'CMAKE_CURRENT_BINARY_DIR', 'CMAKE_CURRENT_SOURCE_DIR', - 'CMAKE_CXX_COMPILER', 'CMAKE_CXX_COMPILER_ARG1', 'CMAKE_CXX_COMPILER_ENV_VAR', - 'CMAKE_CXX_COMPILER_FULLPATH', 'CMAKE_CXX_COMPILER_LOADED', - 'CMAKE_CXX_COMPILER_WORKS', 'CMAKE_CXX_COMPILE_OBJECT', - 'CMAKE_CXX_CREATE_SHARED_LIBRARY', - 'CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS', - 'CMAKE_CXX_CREATE_SHARED_MODULE', 'CMAKE_CXX_CREATE_STATIC_LIBRARY', - 'CMAKE_CXX_FLAGS', 'CMAKE_CXX_FLAGS_DEBUG', 'CMAKE_CXX_FLAGS_DEBUG_INIT', - 'CMAKE_CXX_FLAGS_INIT', 'CMAKE_CXX_FLAGS_MINSIZEREL', - 'CMAKE_CXX_FLAGS_MINSIZEREL_INIT', 'CMAKE_CXX_FLAGS_RELEASE', - 'CMAKE_CXX_FLAGS_RELEASE_INIT', 'CMAKE_CXX_FLAGS_RELWITHDEBINFO', - 'CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_CXX_IGNORE_EXTENSIONS', - 'CMAKE_CXX_INFORMATION_LOADED', 'CMAKE_CXX_LINKER_PREFERENCE', - 'CMAKE_CXX_LINK_EXECUTABLE', 'CMAKE_CXX_LINK_FLAGS', - 'CMAKE_CXX_OUTPUT_EXTENSION', 'CMAKE_CXX_SOURCE_FILE_EXTENSIONS', - 'CMAKE_C_COMPILER', 'CMAKE_C_COMPILER_ARG1', 'CMAKE_C_COMPILER_ENV_VAR', - 'CMAKE_C_COMPILER_FULLPATH', 'CMAKE_C_COMPILER_LOADED', - 'CMAKE_C_COMPILER_WORKS', 'CMAKE_C_COMPILE_OBJECT', - 'CMAKE_C_CREATE_SHARED_LIBRARY', - 'CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS', - 'CMAKE_C_CREATE_SHARED_MODULE', 'CMAKE_C_CREATE_STATIC_LIBRARY', - 'CMAKE_C_FLAGS', 'CMAKE_C_FLAGS_DEBUG', 'CMAKE_C_FLAGS_DEBUG_INIT', - 'CMAKE_C_FLAGS_INIT', 'CMAKE_C_FLAGS_MINSIZEREL', - 'CMAKE_C_FLAGS_MINSIZEREL_INIT', 'CMAKE_C_FLAGS_RELEASE', - 'CMAKE_C_FLAGS_RELEASE_INIT', 'CMAKE_C_FLAGS_RELWITHDEBINFO', - 'CMAKE_C_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_C_IGNORE_EXTENSIONS', - 'CMAKE_C_INFORMATION_LOADED', 'CMAKE_C_LINKER_PREFERENCE', - 'CMAKE_C_LINK_EXECUTABLE', 'CMAKE_C_LINK_FLAGS', 'CMAKE_C_OUTPUT_EXTENSION', - 'CMAKE_C_SOURCE_FILE_EXTENSIONS', 'CMAKE_DL_LIBS', 'CMAKE_EDIT_COMMAND', - 'CMAKE_EXECUTABLE_SUFFIX', 'CMAKE_EXE_LINKER_FLAGS', - 'CMAKE_EXE_LINKER_FLAGS_DEBUG', 'CMAKE_EXE_LINKER_FLAGS_MINSIZEREL', - 'CMAKE_EXE_LINKER_FLAGS_RELEASE', 'CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO', - 'CMAKE_FILES_DIRECTORY', 'CMAKE_FIND_APPBUNDLE', 'CMAKE_FIND_FRAMEWORK', - 'CMAKE_FIND_LIBRARY_PREFIXES', 'CMAKE_FIND_LIBRARY_SUFFIXES', - 'CMAKE_GENERATOR', 'CMAKE_HOME_DIRECTORY', 'CMAKE_INCLUDE_FLAG_C', - 'CMAKE_INCLUDE_FLAG_CXX', 'CMAKE_INCLUDE_FLAG_C_SEP', 'CMAKE_INIT_VALUE', - 'CMAKE_INSTALL_PREFIX', 'CMAKE_LIBRARY_PATH_FLAG', 'CMAKE_LINK_LIBRARY_FLAG', - 'CMAKE_LINK_LIBRARY_SUFFIX', 'CMAKE_MAJOR_VERSION', 'CMAKE_MAKE_PROGRAM', - 'CMAKE_MINOR_VERSION', 'CMAKE_MODULE_EXISTS', 'CMAKE_MODULE_LINKER_FLAGS', - 'CMAKE_MODULE_LINKER_FLAGS_DEBUG', 'CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL', - 'CMAKE_MODULE_LINKER_FLAGS_RELEASE', - 'CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO', - 'CMAKE_MacOSX_Content_COMPILE_OBJECT', 'CMAKE_NUMBER_OF_LOCAL_GENERATORS', - 'CMAKE_OSX_ARCHITECTURES', 'CMAKE_OSX_SYSROOT', 'CMAKE_PARENT_LIST_FILE', + 'CMAKE_CXX_COMPILE_OBJECT', 'CMAKE_CXX_COMPILER', 'CMAKE_CXX_COMPILER_ARG1', + 'CMAKE_CXX_COMPILER_ENV_VAR', 'CMAKE_CXX_COMPILER_FULLPATH', 'CMAKE_CXX_COMPILER_LOADED', + 'CMAKE_CXX_COMPILER_WORKS', 'CMAKE_CXX_CREATE_SHARED_LIBRARY', + 'CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS', 'CMAKE_CXX_CREATE_SHARED_MODULE', + 'CMAKE_CXX_CREATE_STATIC_LIBRARY', 'CMAKE_CXX_FLAGS', 'CMAKE_CXX_FLAGS_DEBUG', + 'CMAKE_CXX_FLAGS_DEBUG_INIT', 'CMAKE_CXX_FLAGS_INIT', 'CMAKE_CXX_FLAGS_MINSIZEREL', + 'CMAKE_CXX_FLAGS_MINSIZEREL_INIT', 'CMAKE_CXX_FLAGS_RELEASE', 'CMAKE_CXX_FLAGS_RELEASE_INIT', + 'CMAKE_CXX_FLAGS_RELWITHDEBINFO', 'CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT', + 'CMAKE_CXX_IGNORE_EXTENSIONS', 'CMAKE_CXX_INFORMATION_LOADED', 'CMAKE_CXX_LINKER_PREFERENCE', + 'CMAKE_CXX_LINK_EXECUTABLE', 'CMAKE_CXX_LINK_FLAGS', 'CMAKE_CXX_OUTPUT_EXTENSION', + 'CMAKE_CXX_SOURCE_FILE_EXTENSIONS', 'CMAKE_DL_LIBS', 'CMAKE_EDIT_COMMAND', + 'CMAKE_EXECUTABLE_SUFFIX', 'CMAKE_EXE_LINKER_FLAGS', 'CMAKE_EXE_LINKER_FLAGS_DEBUG', + 'CMAKE_EXE_LINKER_FLAGS_MINSIZEREL', 'CMAKE_EXE_LINKER_FLAGS_RELEASE', + 'CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO', 'CMAKE_FILES_DIRECTORY', 'CMAKE_FIND_APPBUNDLE', + 'CMAKE_FIND_FRAMEWORK', 'CMAKE_FIND_LIBRARY_PREFIXES', 'CMAKE_FIND_LIBRARY_SUFFIXES', + 'CMAKE_GENERATOR', 'CMAKE_HOME_DIRECTORY', 'CMAKE_INCLUDE_FLAG_C', 'CMAKE_INCLUDE_FLAG_C_SEP', + 'CMAKE_INCLUDE_FLAG_CXX', 'CMAKE_INIT_VALUE', 'CMAKE_INSTALL_PREFIX', 'CMAKE_LIBRARY_PATH_FLAG', + 'CMAKE_LINK_LIBRARY_FLAG', 'CMAKE_LINK_LIBRARY_SUFFIX', 'CMAKE_MacOSX_Content_COMPILE_OBJECT', + 'CMAKE_MAJOR_VERSION', 'CMAKE_MAKE_PROGRAM', 'CMAKE_MINOR_VERSION', 'CMAKE_MODULE_EXISTS', + 'CMAKE_MODULE_LINKER_FLAGS', 'CMAKE_MODULE_LINKER_FLAGS_DEBUG', + 'CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL', 'CMAKE_MODULE_LINKER_FLAGS_RELEASE', + 'CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO', 'CMAKE_NUMBER_OF_LOCAL_GENERATORS', + 'CMAKE_OSX_ARCHITECTURES', '_CMAKE_OSX_MACHINE', 'CMAKE_OSX_SYSROOT', 'CMAKE_PARENT_LIST_FILE', 'CMAKE_PATCH_VERSION', 'CMAKE_PLATFORM_HAS_INSTALLNAME', - 'CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES', 'CMAKE_PLATFORM_ROOT_BIN', - 'CMAKE_PROJECT_NAME', 'CMAKE_RANLIB', 'CMAKE_ROOT', - 'CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS', - 'CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS', - 'CMAKE_SHARED_LIBRARY_CXX_FLAGS', 'CMAKE_SHARED_LIBRARY_C_FLAGS', - 'CMAKE_SHARED_LIBRARY_LINK_C_FLAGS', 'CMAKE_SHARED_LIBRARY_PREFIX', - 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG', - 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP', - 'CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG', 'CMAKE_SHARED_LIBRARY_SONAME_C_FLAG', - 'CMAKE_SHARED_LIBRARY_SUFFIX', 'CMAKE_SHARED_LINKER_FLAGS', - 'CMAKE_SHARED_LINKER_FLAGS_DEBUG', 'CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL', - 'CMAKE_SHARED_LINKER_FLAGS_RELEASE', - 'CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO', - 'CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS', 'CMAKE_SHARED_MODULE_CREATE_C_FLAGS', - 'CMAKE_SHARED_MODULE_PREFIX', 'CMAKE_SHARED_MODULE_SUFFIX', - 'CMAKE_SIZEOF_VOID_P', 'CMAKE_SKIP_RPATH', 'CMAKE_SOURCE_DIR', + 'CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES', 'CMAKE_PLATFORM_ROOT_BIN', 'CMAKE_PROJECT_NAME', + 'CMAKE_RANLIB', 'CMAKE_ROOT', 'CMAKE_SHARED_LIBRARY_C_FLAGS', + 'CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS', 'CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS', + 'CMAKE_SHARED_LIBRARY_CXX_FLAGS', 'CMAKE_SHARED_LIBRARY_LINK_C_FLAGS', + 'CMAKE_SHARED_LIBRARY_PREFIX', 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG', + 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP', 'CMAKE_SHARED_LIBRARY_SONAME_C_FLAG', + 'CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG', 'CMAKE_SHARED_LIBRARY_SUFFIX', + 'CMAKE_SHARED_LINKER_FLAGS', 'CMAKE_SHARED_LINKER_FLAGS_DEBUG', + 'CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL', 'CMAKE_SHARED_LINKER_FLAGS_RELEASE', + 'CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO', 'CMAKE_SHARED_MODULE_CREATE_C_FLAGS', + 'CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS', 'CMAKE_SHARED_MODULE_PREFIX', + 'CMAKE_SHARED_MODULE_SUFFIX', 'CMAKE_SIZEOF_VOID_P', 'CMAKE_SKIP_RPATH', 'CMAKE_SOURCE_DIR', 'CMAKE_STATIC_LIBRARY_PREFIX', 'CMAKE_STATIC_LIBRARY_SUFFIX', 'CMAKE_SYSTEM', - 'CMAKE_SYSTEM_AND_CXX_COMPILER_INFO_FILE', - 'CMAKE_SYSTEM_AND_C_COMPILER_INFO_FILE', 'CMAKE_SYSTEM_APPBUNDLE_PATH', - 'CMAKE_SYSTEM_FRAMEWORK_PATH', 'CMAKE_SYSTEM_INCLUDE_PATH', - 'CMAKE_SYSTEM_INFO_FILE', 'CMAKE_SYSTEM_LIBRARY_PATH', 'CMAKE_SYSTEM_LOADED', - 'CMAKE_SYSTEM_NAME', 'CMAKE_SYSTEM_PROCESSOR', 'CMAKE_SYSTEM_PROGRAM_PATH', - 'CMAKE_SYSTEM_SPECIFIC_INFORMATION_LOADED', 'CMAKE_SYSTEM_VERSION', - 'CMAKE_UNAME', 'CMAKE_USE_RELATIVE_PATHS', 'CMAKE_VERBOSE_MAKEFILE', 'CYGWIN', - 'EXECUTABLE_OUTPUT_PATH', 'FORCE', 'HAVE_CMAKE_SIZEOF_VOID_P', - 'LIBRARY_OUTPUT_PATH', 'MACOSX_BUNDLE', 'MINGW', 'MSVC60', 'MSVC70', 'MSVC71', - 'MSVC80', 'MSVC', 'MSVC_IDE', 'PROJECT_BINARY_DIR', 'PROJECT_NAME', - 'PROJECT_SOURCE_DIR', 'PROJECT_BINARY_DIR', 'PROJECT_SOURCE_DIR', - 'RUN_CONFIGURE', 'UNIX', 'WIN32', '_CMAKE_OSX_MACHINE', - -- More variables. - 'LOCATION', 'TARGET', 'POST_BUILD', 'PRE_BUILD', 'ARGS' -} + P('$') * l.delimited_range('{}', false, true)) + 'CMAKE_SYSTEM_AND_C_COMPILER_INFO_FILE', 'CMAKE_SYSTEM_AND_CXX_COMPILER_INFO_FILE', + 'CMAKE_SYSTEM_APPBUNDLE_PATH', 'CMAKE_SYSTEM_FRAMEWORK_PATH', 'CMAKE_SYSTEM_INCLUDE_PATH', + 'CMAKE_SYSTEM_INFO_FILE', 'CMAKE_SYSTEM_LIBRARY_PATH', 'CMAKE_SYSTEM_LOADED', 'CMAKE_SYSTEM_NAME', + 'CMAKE_SYSTEM_PROCESSOR', 'CMAKE_SYSTEM_PROGRAM_PATH', 'CMAKE_SYSTEM_SPECIFIC_INFORMATION_LOADED', + 'CMAKE_SYSTEM_VERSION', 'CMAKE_UNAME', 'CMAKE_USE_RELATIVE_PATHS', 'CMAKE_VERBOSE_MAKEFILE', + 'CYGWIN', 'EXECUTABLE_OUTPUT_PATH', 'FORCE', 'HAVE_CMAKE_SIZEOF_VOID_P', 'LIBRARY_OUTPUT_PATH', + 'LOCATION', 'MACOSX_BUNDLE', 'MINGW', 'MSVC', 'MSVC60', 'MSVC70', 'MSVC71', 'MSVC80', 'MSVC_IDE', + 'POST_BUILD', 'PRE_BUILD', 'PROJECT_BINARY_DIR', 'PROJECT_NAME', 'PROJECT_SOURCE_DIR', + 'RUN_CONFIGURE', 'TARGET', 'UNIX', 'WIN32' +} + P('$') * lexer.range('{', '}'))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, word_match{ + 'AND', 'COMMAND', 'DEFINED', 'DOC', 'EQUAL', 'EXISTS', 'GREATER', 'INTERNAL', 'LESS', 'MATCHES', + 'NAME', 'NAMES', 'NAME_WE', 'NOT', 'OR', 'PATH', 'PATHS', 'PROGRAM', 'STREQUAL', 'STRGREATER', + 'STRINGS', 'STRLESS' +} + S('=(){}'))) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, word_match({ - 'AND', 'COMMAND', 'DEFINED', 'DOC', 'EQUAL', 'EXISTS', 'GREATER', 'INTERNAL', - 'LESS', 'MATCHES', 'NAME', 'NAMES', 'NAME_WE', 'NOT', 'OR', 'PATH', 'PATHS', - 'PROGRAM', 'STREQUAL', 'STRGREATER', 'STRINGS', 'STRLESS' -}) + S('=(){}')) +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'command', command}, - {'constant', constant}, - {'variable', variable}, - {'operator', operator}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, -} +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -M._foldsymbols = { - _patterns = {'[A-Z]+', '[%(%){}]', '#'}, - [l.KEYWORD] = { - IF = 1, ENDIF = -1, FOREACH = 1, ENDFOREACH = -1, WHILE = 1, ENDWHILE = -1 - }, - [l.FUNCTION] = {MACRO = 1, ENDMACRO = -1}, - [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['#'] = l.fold_line_comments('#')} -} +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'IF', 'ENDIF') +lex:add_fold_point(lexer.KEYWORD, 'FOREACH', 'ENDFOREACH') +lex:add_fold_point(lexer.KEYWORD, 'WHILE', 'ENDWHILE') +lex:add_fold_point(lexer.FUNCTION, 'MACRO', 'ENDMACRO') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#')) -return M +return lex diff --git a/lua/lexers/coffeescript.lua b/lua/lexers/coffeescript.lua @@ -1,62 +1,49 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- CoffeeScript LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'coffeescript'} +local lex = lexer.new('coffeescript', {fold_by_indentation = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local block_comment = '###' * (l.any - '###')^0 * P('###')^-1 -local line_comment = '#' * l.nonnewline_esc^0 -local comment = token(l.COMMENT, block_comment + line_comment) - --- Strings. -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') -local regex_str = #P('/') * l.last_char_includes('+-*%<>!=^&|?~:;,([{') * - l.delimited_range('/', true) * S('igm')^0 -local string = token(l.STRING, sq_str + dq_str) + token(l.REGEX, regex_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'all', 'and', 'bind', 'break', 'by', 'case', 'catch', 'class', 'const', - 'continue', 'default', 'delete', 'do', 'each', 'else', 'enum', 'export', - 'extends', 'false', 'for', 'finally', 'function', 'if', 'import', 'in', - 'instanceof', 'is', 'isnt', 'let', 'loop', 'native', 'new', 'no', 'not', 'of', - 'off', 'on', 'or', 'return', 'super', 'switch', 'then', 'this', 'throw', - 'true', 'try', 'typeof', 'unless', 'until', 'var', 'void', 'with', 'when', - 'while', 'yes' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'all', 'and', 'bind', 'break', 'by', 'case', 'catch', 'class', 'const', 'continue', 'default', + 'delete', 'do', 'each', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', + 'function', 'if', 'import', 'in', 'instanceof', 'is', 'isnt', 'let', 'loop', 'native', 'new', + 'no', 'not', 'of', 'off', 'on', 'or', 'return', 'super', 'switch', 'then', 'this', 'throw', + 'true', 'try', 'typeof', 'unless', 'until', 'var', 'void', 'when', 'while', 'with', 'yes' +})) -- Fields: object properties and methods. -local field = token(l.FUNCTION, '.' * (S('_$') + l.alpha) * - (S('_$') + l.alnum)^0) +lex:add_rule('field', + token(lexer.FUNCTION, '.' * (S('_$') + lexer.alpha) * (S('_$') + lexer.alnum)^0)) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local string = token(lexer.STRING, sq_str + dq_str) +local regex_str = + #P('/') * lexer.last_char_includes('+-*%<>!=^&|?~:;,([{') * lexer.range('/', true) * S('igm')^0 +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) + +-- Comments. +local block_comment = lexer.range('###') +local line_comment = lexer.to_eol('#', true) +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. -local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'field', field}, - {'identifier', identifier}, - {'comment', comment}, - {'number', number}, - {'string', string}, - {'operator', operator}, -} - -M._FOLDBYINDENTATION = true - -return M +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))) + +return lex diff --git a/lua/lexers/container.lua b/lua/lexers/container.lua @@ -1,7 +1,5 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Container LPeg lexer. -- This is SciTE's plain text lexer. -local M = {_NAME = 'container'} - -return M +return require('lexer').new('container') diff --git a/lua/lexers/context.lua b/lua/lexers/context.lua @@ -1,59 +1,53 @@ --- Copyright 2006-2017 Robert Gieseke. See LICENSE. +-- Copyright 2006-2022 Robert Gieseke, Lars Otter. See LICENSE. -- ConTeXt LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'context'} +local lex = lexer.new('context') + +-- TeX and ConTeXt mkiv environment definitions. +local beginend = (P('begin') + 'end') +local startstop = (P('start') + 'stop') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local comment = token(l.COMMENT, '%' * l.nonnewline^0) - --- Commands. -local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}'))) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%'))) -- Sections. -local section = token('section', '\\' * word_match{ - 'part', 'chapter', 'section', 'subsection', 'subsubsection', 'title', - 'subject', 'subsubject', 'subsubsubject' -}) +local wm_section = word_match{ + 'chapter', 'part', 'section', 'subject', 'subsection', 'subsubject', 'subsubsection', + 'subsubsubject', 'subsubsubsection', 'subsubsubsubject', 'title' +} +local section = token(lexer.CLASS, '\\' * startstop^-1 * wm_section) +lex:add_rule('section', section) --- ConTeXt environments. -local environment = token('environment', '\\' * (P('start') + 'stop') * l.word) +-- TeX and ConTeXt mkiv environments. +local environment = token(lexer.STRING, '\\' * (beginend + startstop) * lexer.alpha^1) +lex:add_rule('environment', environment) --- Operators. -local operator = token(l.OPERATOR, S('$&#{}[]')) - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'environment', environment}, - {'section', section}, - {'keyword', command}, - {'operator', operator}, -} +-- Commands. +local command = token(lexer.KEYWORD, '\\' * + (lexer.alpha^1 * P('\\') * lexer.space^1 + lexer.alpha^1 + S('!"#$%&\',./;=[\\]_{|}~`^-'))) +lex:add_rule('command', command) -M._tokenstyles = { - environment = l.STYLE_KEYWORD, - section = l.STYLE_CLASS -} +-- Operators. +local operator = token(lexer.OPERATOR, S('#$_[]{}~^')) +lex:add_rule('operator', operator) -M._foldsymbols = { - _patterns = {'\\start', '\\stop', '[{}]', '%%'}, - ['environment'] = {['\\start'] = 1, ['\\stop'] = -1}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['%'] = l.fold_line_comments('%')} -} +-- Fold points. +lex:add_fold_point('environment', '\\start', '\\stop') +lex:add_fold_point('environment', '\\begin', '\\end') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('%')) -- Embedded Lua. -local luatex = l.load('lua') +local luatex = lexer.load('lua') local luatex_start_rule = #P('\\startluacode') * environment local luatex_end_rule = #P('\\stopluacode') * environment -l.embed_lexer(M, luatex, luatex_start_rule, luatex_end_rule) - +lex:embed(luatex, luatex_start_rule, luatex_end_rule) -return M +return lex diff --git a/lua/lexers/cpp.lua b/lua/lexers/cpp.lua @@ -1,90 +1,75 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- C++ LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'cpp'} +local lex = lexer.new('cpp') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = P('L')^-1 * l.delimited_range("'", true) -local dq_str = P('L')^-1 * l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) - --- Preprocessor. -local preproc_word = word_match{ - 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'import', - 'line', 'pragma', 'undef', 'using', 'warning' -} -local preproc = #l.starts_line('#') * - (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + - token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * - (token(l.WHITESPACE, S('\t ')^1) * - token(l.STRING, l.delimited_range('<>', true, true)))^-1) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 'const_cast', - 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else', 'explicit', - 'export', 'extern', 'false', 'for', 'friend', 'goto', 'if', 'inline', - 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public', - 'register', 'reinterpret_cast', 'return', 'sizeof', 'static', 'static_cast', - 'switch', 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', - 'typename', 'using', 'virtual', 'volatile', 'while', - -- Operators - 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', - 'xor', 'xor_eq', - -- C++11 - 'alignas', 'alignof', 'constexpr', 'decltype', 'final', 'noexcept', - 'override', 'static_assert', 'thread_local' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 'const_cast', 'continue', 'default', + 'delete', 'do', 'dynamic_cast', 'else', 'explicit', 'export', 'extern', 'false', 'for', 'friend', + 'goto', 'if', 'inline', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', + 'public', 'register', 'reinterpret_cast', 'return', 'sizeof', 'static', 'static_cast', 'switch', + 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typename', 'using', 'virtual', + 'volatile', 'while', + -- Operators. + 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq', + -- C++11. + 'alignas', 'alignof', 'constexpr', 'decltype', 'final', 'noexcept', 'override', 'static_assert', + 'thread_local' +})) -- Types. -local type = token(l.TYPE, word_match{ - 'bool', 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed', - 'struct', 'union', 'unsigned', 'void', 'wchar_t', - -- C++11 +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed', 'struct', 'union', + 'unsigned', 'void', 'wchar_t', + -- C++11. 'char16_t', 'char32_t', 'nullptr' -}) +})) + +-- Strings. +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')) +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +local dec = lexer.digit^1 * ("'" * lexer.digit^1)^0 +local hex = '0' * S('xX') * lexer.xdigit^1 * ("'" * lexer.xdigit^1)^0 +local bin = '0' * S('bB') * S('01')^1 * ("'" * S('01')^1)^0 * -lexer.xdigit +local integer = S('+-')^-1 * (hex + bin + dec) +lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) + +-- Preprocessor. +local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1 +local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * + word_match('define elif else endif error if ifdef ifndef import line pragma undef using warning')) +lex:add_rule('preprocessor', include + preproc) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'preproc', preproc}, - {'operator', operator}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))) -M._foldsymbols = { - _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'}, - [l.PREPROCESSOR] = { - region = 1, endregion = -1, - ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1 - }, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} -} +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/crystal.lua b/lua/lexers/crystal.lua @@ -1,141 +1,102 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Copyright 2017 Michel Martens. -- Crystal LPeg lexer (based on Ruby). -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'crystal'} +local lex = lexer.new('crystal') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local line_comment = '#' * l.nonnewline_esc^0 -local comment = token(l.COMMENT, line_comment) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'alias', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', 'else', 'elsif', 'end', + 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil', 'not', 'redo', 'rescue', 'retry', + 'return', 'self', 'super', 'then', 'true', 'undef', 'unless', 'until', 'when', 'while', 'yield', + '__FILE__', '__LINE__' +})) -local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'} -local literal_delimitted = P(function(input, index) - local delimiter = input:sub(index, index) - if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics - local match_pos, patt - if delimiter_matches[delimiter] then - -- Handle nested delimiter/matches in strings. - local s, e = delimiter, delimiter_matches[delimiter] - patt = l.delimited_range(s..e, false, false, true) - else - patt = l.delimited_range(delimiter) - end - match_pos = lpeg.match(patt, input, index) - return match_pos or #input + 1 - end -end) +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'abort', 'at_exit', 'caller', 'delay', 'exit', 'fork', 'future', 'get_stack_top', 'gets', 'lazy', + 'loop', 'main', 'p', 'print', 'printf', 'puts', 'raise', 'rand', 'read_line', 'require', 'sleep', + 'spawn', 'sprintf', 'system', 'with_color', + -- Macros. + 'assert_responds_to', 'debugger', 'parallel', 'pp', 'record', 'redefine_main' +}) * -S('.:|')) + +-- Identifiers. +local word_char = lexer.alnum + S('_!?') +local word = (lexer.alpha + '_') * word_char^0 +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -- Strings. -local cmd_str = l.delimited_range('`') -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') +local cmd_str = lexer.range('`') +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') local heredoc = '<<' * P(function(input, index) - local s, e, indented, _, delimiter = - input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) - if s == index and delimiter then - local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') - local _, e = input:find(end_heredoc..delimiter, e) - return e and e + 1 or #input + 1 - end + local _, e, indented, _, delimiter = input:find('^(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) + if not delimiter then return end + local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') + _, e = input:find(end_heredoc .. delimiter, e) + return e and e + 1 or #input + 1 end) +local string = token(lexer.STRING, (sq_str + dq_str + heredoc + cmd_str) * S('f')^-1) -- TODO: regex_str fails with `obj.method /patt/` syntax. -local regex_str = #P('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') * - l.delimited_range('/', true, false) * S('iomx')^0 -local string = token(l.STRING, (sq_str + dq_str + heredoc + cmd_str) * - S('f')^-1) + - token(l.REGEX, regex_str) - -local word_char = l.alnum + S('_!?') +local regex_str = + #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * lexer.range('/', true) * S('iomx')^0 +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) -- Numbers. -local dec = l.digit^1 * ('_' * l.digit^1)^0 * S('ri')^-1 +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1 local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 -local integer = S('+-')^-1 * (bin + l.hex_num + l.oct_num + dec) +local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec) -- TODO: meta, control, etc. for numeric_literal. -local numeric_literal = '?' * (l.any - l.space) * -word_char -local number = token(l.NUMBER, l.float * S('ri')^-1 + integer + numeric_literal) - --- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'alias', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', 'else', - 'elsif', 'end', 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil', - 'not', 'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true', - 'undef', 'unless', 'until', 'when', 'while', 'yield', '__FILE__', '__LINE__' -}, '?!')) - --- Functions. -local func = token(l.FUNCTION, word_match({ - 'abort', 'at_exit', 'caller', 'delay', 'exit', 'fork', 'future', - 'get_stack_top', 'gets', 'lazy', 'loop', 'main', 'p', 'print', 'printf', - 'puts', 'raise', 'rand', 'read_line', 'require', 'sleep', 'spawn', 'sprintf', - 'system', 'with_color', - -- Macros - 'assert_responds_to', 'debugger', 'parallel', 'pp', 'record', 'redefine_main' -}, '?!')) * -S('.:|') - --- Identifiers. -local word = (l.alpha + '_') * word_char^0 -local identifier = token(l.IDENTIFIER, word) +local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char +lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer + numeric_literal)) -- Variables. -local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + l.digit + '-' * - S('0FadiIKlpvw')) +local global_var = '$' * + (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + '-' * S('0FadiIKlpvw')) local class_var = '@@' * word local inst_var = '@' * word -local variable = token(l.VARIABLE, global_var + class_var + inst_var) +lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var + inst_var)) -- Symbols. -local symbol = token('symbol', ':' * P(function(input, index) +lex:add_rule('symbol', token('symbol', ':' * P(function(input, index) if input:sub(index - 2, index - 2) ~= ':' then return index end -end) * (word_char^1 + sq_str + dq_str)) +end) * (word_char^1 + sq_str + dq_str))) +lex:add_style('symbol', lexer.styles.constant) -- Operators. -local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'function', func}, - {'identifier', identifier}, - {'comment', comment}, - {'string', string}, - {'number', number}, - {'variable', variable}, - {'symbol', symbol}, - {'operator', operator}, -} - -M._tokenstyles = { - symbol = l.STYLE_CONSTANT -} +lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))) +-- Fold points. local function disambiguate(text, pos, line, s) - return line:sub(1, s - 1):match('^%s*$') and - not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 + return line:sub(1, s - 1):match('^%s*$') and not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and + 1 or 0 end - -M._foldsymbols = { - _patterns = {'%l+', '[%(%)%[%]{}]', '#'}, - [l.KEYWORD] = { - begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1, - case = 1, - ['if'] = disambiguate, ['while'] = disambiguate, - ['unless'] = disambiguate, ['until'] = disambiguate, - ['end'] = -1 - }, - [l.OPERATOR] = { - ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1 - }, - [l.COMMENT] = { - ['#'] = l.fold_line_comments('#') - } -} - -return M +lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') +lex:add_fold_point(lexer.KEYWORD, 'case', 'end') +lex:add_fold_point(lexer.KEYWORD, 'class', 'end') +lex:add_fold_point(lexer.KEYWORD, 'def', 'end') +lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +lex:add_fold_point(lexer.KEYWORD, 'for', 'end') +lex:add_fold_point(lexer.KEYWORD, 'module', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'while', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'unless', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'until', disambiguate) +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#')) + +return lex diff --git a/lua/lexers/csharp.lua b/lua/lexers/csharp.lua @@ -1,84 +1,64 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- C# LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'csharp'} +local lex = lexer.new('csharp') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'class', 'delegate', 'enum', 'event', 'interface', 'namespace', 'struct', 'using', 'abstract', + 'const', 'explicit', 'extern', 'fixed', 'implicit', 'internal', 'lock', 'out', 'override', + 'params', 'partial', 'private', 'protected', 'public', 'ref', 'sealed', 'static', 'readonly', + 'unsafe', 'virtual', 'volatile', 'add', 'as', 'assembly', 'base', 'break', 'case', 'catch', + 'checked', 'continue', 'default', 'do', 'else', 'finally', 'for', 'foreach', 'get', 'goto', 'if', + 'in', 'is', 'new', 'remove', 'return', 'set', 'sizeof', 'stackalloc', 'super', 'switch', 'this', + 'throw', 'try', 'typeof', 'unchecked', 'value', 'var', 'void', 'while', 'yield', 'null', 'true', + 'false' +})) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'long', 'object', 'operator', + 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort' +})) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local ml_str = P('@')^-1 * l.delimited_range('"', false, true) -local string = token(l.STRING, sq_str + dq_str + ml_str) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local ml_str = P('@')^-1 * lexer.range('"', false, false) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str)) -- Numbers. -local number = token(l.NUMBER, (l.float + l.integer) * S('lLdDfFMm')^-1) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('lLdDfFmM')^-1)) -- Preprocessor. -local preproc_word = word_match{ - 'define', 'elif', 'else', 'endif', 'error', 'if', 'line', 'undef', 'warning', - 'region', 'endregion' -} -local preproc = token(l.PREPROCESSOR, - l.starts_line('#') * S('\t ')^0 * preproc_word * - (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0)) - --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'class', 'delegate', 'enum', 'event', 'interface', 'namespace', 'struct', - 'using', 'abstract', 'const', 'explicit', 'extern', 'fixed', 'implicit', - 'internal', 'lock', 'out', 'override', 'params', 'partial', 'private', - 'protected', 'public', 'ref', 'sealed', 'static', 'readonly', 'unsafe', - 'virtual', 'volatile', 'add', 'as', 'assembly', 'base', 'break', 'case', - 'catch', 'checked', 'continue', 'default', 'do', 'else', 'finally', 'for', - 'foreach', 'get', 'goto', 'if', 'in', 'is', 'new', 'remove', 'return', 'set', - 'sizeof', 'stackalloc', 'super', 'switch', 'this', 'throw', 'try', 'typeof', - 'unchecked', 'value', 'void', 'while', 'yield', - 'null', 'true', 'false' -}) - --- Types. -local type = token(l.TYPE, word_match{ - 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'long', 'object', - 'operator', 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort' -}) - --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * + word_match('define elif else endif error if line undef warning region endregion'))) -- Operators. -local operator = token(l.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'preproc', preproc}, - {'operator', operator}, -} +lex:add_rule('operator', token(lexer.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}'))) -M._foldsymbols = { - _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'}, - [l.PREPROCESSOR] = { - region = 1, endregion = -1, - ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1 - }, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} -} +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'region', 'endregion') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/css.lua b/lua/lexers/css.lua @@ -1,166 +1,168 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- CSS LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'css'} +local lex = lexer.new('css') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Properties. +lex:add_rule('property', token('property', word_match{ + -- CSS 1. + 'color', 'background-color', 'background-image', 'background-repeat', 'background-attachment', + 'background-position', 'background', 'font-family', 'font-style', 'font-variant', 'font-weight', + 'font-size', 'font', 'word-spacing', 'letter-spacing', 'text-decoration', 'vertical-align', + 'text-transform', 'text-align', 'text-indent', 'line-height', 'margin-top', 'margin-right', + 'margin-bottom', 'margin-left', 'margin', 'padding-top', 'padding-right', 'padding-bottom', + 'padding-left', 'padding', 'border-top-width', 'border-right-width', 'border-bottom-width', + 'border-left-width', 'border-width', 'border-top', 'border-right', 'border-bottom', 'border-left', + 'border', 'border-color', 'border-style', 'width', 'height', 'float', 'clear', 'display', + 'white-space', 'list-style-type', 'list-style-image', 'list-style-position', 'list-style', + -- CSS 2. + 'border-top-color', 'border-right-color', 'border-bottom-color', 'border-left-color', + 'border-color', 'border-top-style', 'border-right-style', 'border-bottom-style', + 'border-left-style', 'border-style', 'top', 'right', 'bottom', 'left', 'position', 'z-index', + 'direction', 'unicode-bidi', 'min-width', 'max-width', 'min-height', 'max-height', 'overflow', + 'clip', 'visibility', 'content', 'quotes', 'counter-reset', 'counter-increment', 'marker-offset', + 'size', 'marks', 'page-break-before', 'page-break-after', 'page-break-inside', 'page', 'orphans', + 'widows', 'font-stretch', 'font-size-adjust', 'unicode-range', 'units-per-em', 'src', 'panose-1', + 'stemv', 'stemh', 'slope', 'cap-height', 'x-height', 'ascent', 'descent', 'widths', 'bbox', + 'definition-src', 'baseline', 'centerline', 'mathline', 'topline', 'text-shadow', 'caption-side', + 'table-layout', 'border-collapse', 'border-spacing', 'empty-cells', 'speak-header', 'cursor', + 'outline', 'outline-width', 'outline-style', 'outline-color', 'volume', 'speak', 'pause-before', + 'pause-after', 'pause', 'cue-before', 'cue-after', 'cue', 'play-during', 'azimuth', 'elevation', + 'speech-rate', 'voice-family', 'pitch', 'pitch-range', 'stress', 'richness', 'speak-punctuation', + 'speak-numeral', + -- CSS 3. + 'flex', 'flex-basis', 'flex-direction', 'flex-flow', 'flex-grow', 'flex-shrink', 'flex-wrap', + 'align-content', 'align-items', 'align-self', 'justify-content', 'order', 'border-radius', + 'transition', 'transform', 'box-shadow', 'filter', 'opacity', 'resize', 'word-break', 'word-wrap', + 'box-sizing', 'animation', 'text-overflow' +})) +lex:add_style('property', lexer.styles.keyword) + +-- Values. +lex:add_rule('value', token('value', word_match{ + -- CSS 1. + 'auto', 'none', 'normal', 'italic', 'oblique', 'small-caps', 'bold', 'bolder', 'lighter', + 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large', 'larger', 'smaller', + 'transparent', 'repeat', 'repeat-x', 'repeat-y', 'no-repeat', 'scroll', 'fixed', 'top', 'bottom', + 'left', 'center', 'right', 'justify', 'both', 'underline', 'overline', 'line-through', 'blink', + 'baseline', 'sub', 'super', 'text-top', 'middle', 'text-bottom', 'capitalize', 'uppercase', + 'lowercase', 'thin', 'medium', 'thick', 'dotted', 'dashed', 'solid', 'double', 'groove', 'ridge', + 'inset', 'outset', 'block', 'inline', 'list-item', 'pre', 'no-wrap', 'inside', 'outside', 'disc', + 'circle', 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha', 'upper-alpha', 'aqua', + 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', 'maroon', 'navy', 'olive', 'purple', 'red', + 'silver', 'teal', 'white', 'yellow', + -- CSS 2. + 'inherit', 'run-in', 'compact', 'marker', 'table', 'inline-table', 'table-row-group', + 'table-header-group', 'table-footer-group', 'table-row', 'table-column-group', 'table-column', + 'table-cell', 'table-caption', 'static', 'relative', 'absolute', 'fixed', 'ltr', 'rtl', 'embed', + 'bidi-override', 'visible', 'hidden', 'scroll', 'collapse', 'open-quote', 'close-quote', + 'no-open-quote', 'no-close-quote', 'decimal-leading-zero', 'lower-greek', 'lower-latin', + 'upper-latin', 'hebrew', 'armenian', 'georgian', 'cjk-ideographic', 'hiragana', 'katakana', + 'hiragana-iroha', 'katakana-iroha', 'landscape', 'portrait', 'crop', 'cross', 'always', 'avoid', + 'wider', 'narrower', 'ultra-condensed', 'extra-condensed', 'condensed', 'semi-condensed', + 'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded', 'caption', 'icon', 'menu', + 'message-box', 'small-caption', 'status-bar', 'separate', 'show', 'hide', 'once', 'crosshair', + 'default', 'pointer', 'move', 'text', 'wait', 'help', 'e-resize', 'ne-resize', 'nw-resize', + 'n-resize', 'se-resize', 'sw-resize', 's-resize', 'w-resize', 'ActiveBorder', 'ActiveCaption', + 'AppWorkspace', 'Background', 'ButtonFace', 'ButtonHighlight', 'ButtonShadow', + 'InactiveCaptionText', 'ButtonText', 'CaptionText', 'GrayText', 'Highlight', 'HighlightText', + 'InactiveBorder', 'InactiveCaption', 'InfoBackground', 'InfoText', 'Menu', 'MenuText', + 'Scrollbar', 'ThreeDDarkShadow', 'ThreeDFace', 'ThreeDHighlight', 'ThreeDLightShadow', + 'ThreeDShadow', 'Window', 'WindowFrame', 'WindowText', 'silent', 'x-soft', 'soft', 'medium', + 'loud', 'x-loud', 'spell-out', 'mix', 'left-side', 'far-left', 'center-left', 'center-right', + 'far-right', 'right-side', 'behind', 'leftwards', 'rightwards', 'below', 'level', 'above', + 'higher', 'lower', 'x-slow', 'slow', 'medium', 'fast', 'x-fast', 'faster', 'slower', 'male', + 'female', 'child', 'x-low', 'low', 'high', 'x-high', 'code', 'digits', 'continous', + -- CSS 3. + 'flex', 'row', 'column', 'ellipsis', 'inline-block' +})) +lex:add_style('value', lexer.styles.constant) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'attr', 'blackness', 'blend', 'blenda', 'blur', 'brightness', 'calc', 'circle', 'color-mod', + 'contrast', 'counter', 'cubic-bezier', 'device-cmyk', 'drop-shadow', 'ellipse', 'gray', + 'grayscale', 'hsl', 'hsla', 'hue', 'hue-rotate', 'hwb', 'image', 'inset', 'invert', 'lightness', + 'linear-gradient', 'matrix', 'matrix3d', 'opacity', 'perspective', 'polygon', 'radial-gradient', + 'rect', 'repeating-linear-gradient', 'repeating-radial-gradient', 'rgb', 'rgba', 'rotate', + 'rotate3d', 'rotateX', 'rotateY', 'rotateZ', 'saturate', 'saturation', 'scale', 'scale3d', + 'scaleX', 'scaleY', 'scaleZ', 'sepia', 'shade', 'skewX', 'skewY', 'steps', 'tint', 'toggle', + 'translate', 'translate3d', 'translateX', 'translateY', 'translateZ', 'url', 'whiteness', 'var' +})) --- Comments. -local comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1) +-- Colors. +local xdigit = lexer.xdigit +lex:add_rule('color', token('color', word_match{ + 'aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', + 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', + 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', + 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', + 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', + 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', + 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', + 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', + 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', + 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', + 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', + 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', + 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', + 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', + 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', + 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', + 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', + 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', + 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', + 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'transparent', + 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen' +} + '#' * xdigit * xdigit * xdigit * (xdigit * xdigit * xdigit)^-1)) +lex:add_style('color', lexer.styles.number) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + S('_-'))^0)) + +-- Pseudo classes and pseudo elements. +lex:add_rule('pseudoclass', ':' * token('pseudoclass', word_match{ + 'active', 'checked', 'disabled', 'empty', 'enabled', 'first-child', 'first-of-type', 'focus', + 'hover', 'in-range', 'invalid', 'lang', 'last-child', 'last-of-type', 'link', 'not', 'nth-child', + 'nth-last-child', 'nth-last-of-type', 'nth-of-type', 'only-of-type', 'only-child', 'optional', + 'out-of-range', 'read-only', 'read-write', 'required', 'root', 'target', 'valid', 'visited' +})) +lex:add_style('pseudoclass', lexer.styles.constant) +lex:add_rule('pseudoelement', '::' * + token('pseudoelement', word_match('after before first-letter first-line selection'))) +lex:add_style('pseudoelement', lexer.styles.constant) -- Strings. -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') -local string = token(l.STRING, sq_str + dq_str) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) --- Numbers. -local number = token(l.NUMBER, l.digit^1) - --- Keywords. -local css1_property = word_match({ - 'color', 'background-color', 'background-image', 'background-repeat', - 'background-attachment', 'background-position', 'background', 'font-family', - 'font-style', 'font-variant', 'font-weight', 'font-size', 'font', - 'word-spacing', 'letter-spacing', 'text-decoration', 'vertical-align', - 'text-transform', 'text-align', 'text-indent', 'line-height', 'margin-top', - 'margin-right', 'margin-bottom', 'margin-left', 'margin', 'padding-top', - 'padding-right', 'padding-bottom', 'padding-left', 'padding', - 'border-top-width', 'border-right-width', 'border-bottom-width', - 'border-left-width', 'border-width', 'border-top', 'border-right', - 'border-bottom', 'border-left', 'border', 'border-color', 'border-style', - 'width', 'height', 'float', 'clear', 'display', 'white-space', - 'list-style-type', 'list-style-image', 'list-style-position', 'list-style' -}, '-') -local css1_value = word_match({ - 'auto', 'none', 'normal', 'italic', 'oblique', 'small-caps', 'bold', 'bolder', - 'lighter', 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', - 'xx-large', 'larger', 'smaller', 'transparent', 'repeat', 'repeat-x', - 'repeat-y', 'no-repeat', 'scroll', 'fixed', 'top', 'bottom', 'left', 'center', - 'right', 'justify', 'both', 'underline', 'overline', 'line-through', 'blink', - 'baseline', 'sub', 'super', 'text-top', 'middle', 'text-bottom', 'capitalize', - 'uppercase', 'lowercase', 'thin', 'medium', 'thick', 'dotted', 'dashed', - 'solid', 'double', 'groove', 'ridge', 'inset', 'outset', 'block', 'inline', - 'list-item', 'pre', 'no-wrap', 'inside', 'outside', 'disc', 'circle', - 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha', - 'upper-alpha', 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', - 'maroon', 'navy', 'olive', 'purple', 'red', 'silver', 'teal', 'white', - 'yellow' -}, '-') -local css2_property = word_match({ - 'border-top-color', 'border-right-color', 'border-bottom-color', - 'border-left-color', 'border-color', 'border-top-style', 'border-right-style', - 'border-bottom-style', 'border-left-style', 'border-style', 'top', 'right', - 'bottom', 'left', 'position', 'z-index', 'direction', 'unicode-bidi', - 'min-width', 'max-width', 'min-height', 'max-height', 'overflow', 'clip', - 'visibility', 'content', 'quotes', 'counter-reset', 'counter-increment', - 'marker-offset', 'size', 'marks', 'page-break-before', 'page-break-after', - 'page-break-inside', 'page', 'orphans', 'widows', 'font-stretch', - 'font-size-adjust', 'unicode-range', 'units-per-em', 'src', 'panose-1', - 'stemv', 'stemh', 'slope', 'cap-height', 'x-height', 'ascent', 'descent', - 'widths', 'bbox', 'definition-src', 'baseline', 'centerline', 'mathline', - 'topline', 'text-shadow', 'caption-side', 'table-layout', 'border-collapse', - 'border-spacing', 'empty-cells', 'speak-header', 'cursor', 'outline', - 'outline-width', 'outline-style', 'outline-color', 'volume', 'speak', - 'pause-before', 'pause-after', 'pause', 'cue-before', 'cue-after', 'cue', - 'play-during', 'azimuth', 'elevation', 'speech-rate', 'voice-family', 'pitch', - 'pitch-range', 'stress', 'richness', 'speak-punctuation', 'speak-numeral' -}, '-') -local css2_value = word_match({ - 'inherit', 'run-in', 'compact', 'marker', 'table', 'inline-table', - 'table-row-group', 'table-header-group', 'table-footer-group', 'table-row', - 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'static', - 'relative', 'absolute', 'fixed', 'ltr', 'rtl', 'embed', 'bidi-override', - 'visible', 'hidden', 'scroll', 'collapse', 'open-quote', 'close-quote', - 'no-open-quote', 'no-close-quote', 'decimal-leading-zero', 'lower-greek', - 'lower-latin', 'upper-latin', 'hebrew', 'armenian', 'georgian', - 'cjk-ideographic', 'hiragana', 'katakana', 'hiragana-iroha', 'katakana-iroha', - 'landscape', 'portrait', 'crop', 'cross', 'always', 'avoid', 'wider', - 'narrower', 'ultra-condensed', 'extra-condensed', 'condensed', - 'semi-condensed', 'semi-expanded', 'expanded', 'extra-expanded', - 'ultra-expanded', 'caption', 'icon', 'menu', 'message-box', 'small-caption', - 'status-bar', 'separate', 'show', 'hide', 'once', 'crosshair', 'default', - 'pointer', 'move', 'text', 'wait', 'help', 'e-resize', 'ne-resize', - 'nw-resize', 'n-resize', 'se-resize', 'sw-resize', 's-resize', 'w-resize', - 'ActiveBorder', 'ActiveCaption', 'AppWorkspace', 'Background', 'ButtonFace', - 'ButtonHighlight', 'ButtonShadow', 'InactiveCaptionText', 'ButtonText', - 'CaptionText', 'GrayText', 'Highlight', 'HighlightText', 'InactiveBorder', - 'InactiveCaption', 'InfoBackground', 'InfoText', 'Menu', 'MenuText', - 'Scrollbar', 'ThreeDDarkShadow', 'ThreeDFace', 'ThreeDHighlight', - 'ThreeDLightShadow', 'ThreeDShadow', 'Window', 'WindowFrame', 'WindowText', - 'silent', 'x-soft', 'soft', 'medium', 'loud', 'x-loud', 'spell-out', 'mix', - 'left-side', 'far-left', 'center-left', 'center-right', 'far-right', - 'right-side', 'behind', 'leftwards', 'rightwards', 'below', 'level', 'above', - 'higher', 'lower', 'x-slow', 'slow', 'medium', 'fast', 'x-fast', 'faster', - 'slower', 'male', 'female', 'child', 'x-low', 'low', 'high', 'x-high', 'code', - 'digits', 'continous' -}, '-') -local property = token(l.KEYWORD, css1_property + css2_property) -local value = token('value', css1_value + css2_value) -local keyword = property + value +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('/*', '*/'))) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0) +-- Numbers. +local unit = token('unit', word_match( + 'ch cm deg dpcm dpi dppx em ex grad Hz in kHz mm ms pc pt px q rad rem s turn vh vmax vmin vw')) +lex:add_style('unit', lexer.styles.number) +lex:add_rule('number', token(lexer.NUMBER, lexer.dec_num) * unit^-1) -- Operators. -local operator = token(l.OPERATOR, S('~!#*>+=|.,:;()[]{}')) +lex:add_rule('operator', token(lexer.OPERATOR, S('~!#*>+=|.,:;()[]{}'))) -- At rule. -local at_rule = token('at_rule', P('@') * word_match{ - 'charset', 'font-face', 'media', 'page', 'import' -}) +lex:add_rule('at_rule', token('at_rule', '@' * + word_match('charset font-face media page import namespace keyframes'))) +lex:add_style('at_rule', lexer.styles.preprocessor) --- Colors. -local xdigit = l.xdigit -local hex_color = '#' * xdigit * xdigit * xdigit * (xdigit * xdigit * xdigit)^-1 -local color_name = word_match{ - 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', 'maroon', 'navy', - 'olive', 'orange', 'purple', 'red', 'silver', 'teal', 'white', 'yellow' -} -local color = token('color', hex_color + color_name) - --- Pseudo. -local pseudo = token(l.CONSTANT, word_match({ - -- Pseudo elements. - 'first-line', 'first-letter', 'before', 'after', - -- Pseudo classes. - 'first-child', 'link', 'visited', 'hover', 'active', 'focus', 'lang', -}, '-')) - --- Units. -local unit = token('unit', word_match{ - 'em', 'ex', 'px', 'pt', 'pc', 'in', 'ft', 'mm', 'cm', 'kHz', 'Hz', 'deg', - 'rad', 'grad', 'ms', 's' -} + '%') - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'pseudo', pseudo}, - {'color', color}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number * unit^-1}, - {'operator', operator}, - {'at_rule', at_rule}, -} - -M._tokenstyles = { - unit = l.STYLE_LABEL, - value = l.STYLE_CONSTANT, - color = l.STYLE_NUMBER, - at_rule = l.STYLE_PREPROCESSOR -} - -M._foldsymbols = { - _patterns = {'[{}]', '/%*', '%*/'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1} -} - -return M +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') + +return lex diff --git a/lua/lexers/cuda.lua b/lua/lexers/cuda.lua @@ -1,92 +1,69 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- CUDA LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S -local table = _G.table +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'cuda'} +local lex = lexer.new('cuda', {inherit = lexer.load('cpp')}) -- Whitespace -local ws = token(l.WHITESPACE, l.space^1) +lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - '__global__', '__host__', '__device__', '__constant__', '__shared__' -}) +local keyword = token(lexer.KEYWORD, + word_match('__global__ __host__ __device__ __constant__ __shared__')) +lex:modify_rule('keyword', keyword + lex:get_rule('keyword')) + +-- Types. +lex:modify_rule('type', token(lexer.TYPE, word_match{ + 'uint', 'int1', 'uint1', 'int2', 'uint2', 'int3', 'uint3', 'int4', 'uint4', 'float1', 'float2', + 'float3', 'float4', 'char1', 'char2', 'char3', 'char4', 'uchar1', 'uchar2', 'uchar3', 'uchar4', + 'short1', 'short2', 'short3', 'short4', 'dim1', 'dim2', 'dim3', 'dim4' +}) + lex:get_rule('type') + -- Functions. -local func = token(l.FUNCTION, word_match{ +token(lexer.FUNCTION, word_match{ -- Atom. - 'atomicAdd', 'atomicAnd', 'atomicCAS', 'atomicDec', 'atomicExch', 'atomicInc', - 'atomicMax', 'atomicMin', 'atomicOr', 'atomicSub', 'atomicXor', + 'atomicAdd', 'atomicAnd', 'atomicCAS', 'atomicDec', 'atomicExch', 'atomicInc', 'atomicMax', + 'atomicMin', 'atomicOr', 'atomicSub', 'atomicXor', -- -- Dev. - 'tex1D', 'tex1Dfetch', 'tex2D', '__float_as_int', '__int_as_float', - '__float2int_rn', '__float2int_rz', '__float2int_ru', '__float2int_rd', - '__float2uint_rn', '__float2uint_rz', '__float2uint_ru', '__float2uint_rd', - '__int2float_rn', '__int2float_rz', '__int2float_ru', '__int2float_rd', - '__uint2float_rn', '__uint2float_rz', '__uint2float_ru', '__uint2float_rd', - '__fadd_rz', '__fmul_rz', '__fdividef', '__mul24', '__umul24', '__mulhi', - '__umulhi', '__mul64hi', '__umul64hi', 'min', 'umin', 'fminf', 'fmin', 'max', - 'umax', 'fmaxf', 'fmax', 'abs', 'fabsf', 'fabs', 'sqrtf', 'sqrt', 'sinf', - '__sinf', 'sin', 'cosf', '__cosf', 'cos', 'sincosf', '__sincosf', 'expf', - '__expf', 'exp', 'logf', '__logf', 'log', + 'tex1D', 'tex1Dfetch', 'tex2D', '__float_as_int', '__int_as_float', '__float2int_rn', + '__float2int_rz', '__float2int_ru', '__float2int_rd', '__float2uint_rn', '__float2uint_rz', + '__float2uint_ru', '__float2uint_rd', '__int2float_rn', '__int2float_rz', '__int2float_ru', + '__int2float_rd', '__uint2float_rn', '__uint2float_rz', '__uint2float_ru', '__uint2float_rd', + '__fadd_rz', '__fmul_rz', '__fdividef', '__mul24', '__umul24', '__mulhi', '__umulhi', '__mul64hi', + '__umul64hi', 'min', 'umin', 'fminf', 'fmin', 'max', 'umax', 'fmaxf', 'fmax', 'abs', 'fabsf', + 'fabs', 'sqrtf', 'sqrt', 'sinf', '__sinf', 'sin', 'cosf', '__cosf', 'cos', 'sincosf', '__sincosf', + 'expf', '__expf', 'exp', 'logf', '__logf', 'log', -- -- Runtime. - 'cudaBindTexture', 'cudaBindTextureToArray', 'cudaChooseDevice', - 'cudaConfigureCall', 'cudaCreateChannelDesc', 'cudaD3D10GetDevice', - 'cudaD3D10MapResources', 'cudaD3D10RegisterResource', - 'cudaD3D10ResourceGetMappedArray', 'cudaD3D10ResourceGetMappedPitch', + 'cudaBindTexture', 'cudaBindTextureToArray', 'cudaChooseDevice', 'cudaConfigureCall', + 'cudaCreateChannelDesc', 'cudaD3D10GetDevice', 'cudaD3D10MapResources', + 'cudaD3D10RegisterResource', 'cudaD3D10ResourceGetMappedArray', 'cudaD3D10ResourceGetMappedPitch', 'cudaD3D10ResourceGetMappedPointer', 'cudaD3D10ResourceGetMappedSize', 'cudaD3D10ResourceGetSurfaceDimensions', 'cudaD3D10ResourceSetMapFlags', - 'cudaD3D10SetDirect3DDevice', 'cudaD3D10UnmapResources', - 'cudaD3D10UnregisterResource', 'cudaD3D9GetDevice', - 'cudaD3D9GetDirect3DDevice', 'cudaD3D9MapResources', - 'cudaD3D9RegisterResource', 'cudaD3D9ResourceGetMappedArray', - 'cudaD3D9ResourceGetMappedPitch', 'cudaD3D9ResourceGetMappedPointer', - 'cudaD3D9ResourceGetMappedSize', 'cudaD3D9ResourceGetSurfaceDimensions', - 'cudaD3D9ResourceSetMapFlags', 'cudaD3D9SetDirect3DDevice', - 'cudaD3D9UnmapResources', 'cudaD3D9UnregisterResource', 'cudaEventCreate', - 'cudaEventDestroy', 'cudaEventElapsedTime', 'cudaEventQuery', - 'cudaEventRecord', 'cudaEventSynchronize', 'cudaFree', 'cudaFreeArray', - 'cudaFreeHost', 'cudaGetChannelDesc', 'cudaGetDevice', 'cudaGetDeviceCount', - 'cudaGetDeviceProperties', 'cudaGetErrorString', 'cudaGetLastError', - 'cudaGetSymbolAddress', 'cudaGetSymbolSize', 'cudaGetTextureAlignmentOffset', - 'cudaGetTextureReference', 'cudaGLMapBufferObject', + 'cudaD3D10SetDirect3DDevice', 'cudaD3D10UnmapResources', 'cudaD3D10UnregisterResource', + 'cudaD3D9GetDevice', 'cudaD3D9GetDirect3DDevice', 'cudaD3D9MapResources', + 'cudaD3D9RegisterResource', 'cudaD3D9ResourceGetMappedArray', 'cudaD3D9ResourceGetMappedPitch', + 'cudaD3D9ResourceGetMappedPointer', 'cudaD3D9ResourceGetMappedSize', + 'cudaD3D9ResourceGetSurfaceDimensions', 'cudaD3D9ResourceSetMapFlags', + 'cudaD3D9SetDirect3DDevice', 'cudaD3D9UnmapResources', 'cudaD3D9UnregisterResource', + 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventElapsedTime', 'cudaEventQuery', + 'cudaEventRecord', 'cudaEventSynchronize', 'cudaFree', 'cudaFreeArray', 'cudaFreeHost', + 'cudaGetChannelDesc', 'cudaGetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties', + 'cudaGetErrorString', 'cudaGetLastError', 'cudaGetSymbolAddress', 'cudaGetSymbolSize', + 'cudaGetTextureAlignmentOffset', 'cudaGetTextureReference', 'cudaGLMapBufferObject', 'cudaGLRegisterBufferObject', 'cudaGLSetGLDevice', 'cudaGLUnmapBufferObject', - 'cudaGLUnregisterBufferObject', 'cudaLaunch', 'cudaMalloc', 'cudaMalloc3D', - 'cudaMalloc3DArray', 'cudaMallocArray', 'cudaMallocHost', 'cudaMallocPitch', - 'cudaMemcpy', 'cudaMemcpy2D', 'cudaMemcpy2DArrayToArray', - 'cudaMemcpy2DFromArray', 'cudaMemcpy2DToArray', 'cudaMemcpy3D', - 'cudaMemcpyArrayToArray', 'cudaMemcpyFromArray', 'cudaMemcpyFromSymbol', - 'cudaMemcpyToArray', 'cudaMemcpyToSymbol', 'cudaMemset', 'cudaMemset2D', - 'cudaMemset3D', 'cudaSetDevice', 'cudaSetupArgument', 'cudaStreamCreate', - 'cudaStreamDestroy', 'cudaStreamQuery', 'cudaStreamSynchronize', - 'cudaThreadExit', 'cudaThreadSynchronize', 'cudaUnbindTexture' -}) - --- Types. -local type = token(l.TYPE, word_match{ - 'uint', 'int1', 'uint1', 'int2', 'uint2', 'int3', 'uint3', 'int4', 'uint4', - 'float1', 'float2', 'float3', 'float4', 'char1', 'char2', 'char3', 'char4', - 'uchar1', 'uchar2', 'uchar3', 'uchar4', 'short1', 'short2', 'short3', - 'short4', 'dim1', 'dim2', 'dim3', 'dim4' -}) + 'cudaGLUnregisterBufferObject', 'cudaLaunch', 'cudaMalloc', 'cudaMalloc3D', 'cudaMalloc3DArray', + 'cudaMallocArray', 'cudaMallocHost', 'cudaMallocPitch', 'cudaMemcpy', 'cudaMemcpy2D', + 'cudaMemcpy2DArrayToArray', 'cudaMemcpy2DFromArray', 'cudaMemcpy2DToArray', 'cudaMemcpy3D', + 'cudaMemcpyArrayToArray', 'cudaMemcpyFromArray', 'cudaMemcpyFromSymbol', 'cudaMemcpyToArray', + 'cudaMemcpyToSymbol', 'cudaMemset', 'cudaMemset2D', 'cudaMemset3D', 'cudaSetDevice', + 'cudaSetupArgument', 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamQuery', + 'cudaStreamSynchronize', 'cudaThreadExit', 'cudaThreadSynchronize', 'cudaUnbindTexture' +}) + -- Variables. -local variable = token(l.VARIABLE, word_match{ - 'gridDim', 'blockIdx', 'blockDim', 'threadIdx' -}) - --- Extend cpp lexer to include CUDA elements. -local cpp = l.load('cpp') -local _rules = cpp._rules -_rules[1] = {'whitespace', ws} -table.insert(_rules, 2, {'cuda_keyword', keyword}) -table.insert(_rules, 3, {'cuda_function', func}) -table.insert(_rules, 4, {'cuda_type', type}) -table.insert(_rules, 5, {'cuda_variable', variable}) -M._rules = _rules -M._foldsymbols = cpp._foldsymbols +token(lexer.VARIABLE, word_match('gridDim blockIdx blockDim threadIdx'))) -return M +return lex diff --git a/lua/lexers/dart.lua b/lua/lexers/dart.lua @@ -1,77 +1,56 @@ --- Copyright 2013-2017 Brian Schott (@Hackerpilot on Github). See LICENSE. +-- Copyright 2013-2022 Mitchell. See LICENSE. -- Dart LPeg lexer. +-- Written by Brian Schott (@Hackerpilot on Github). -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'dart'} +local lex = lexer.new('dart') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local nested_comment = l.nested_pair('/*', '*/') -local comment = token(l.COMMENT, line_comment + nested_comment) - --- Strings. -local sq_str = S('r')^-1 * l.delimited_range("'", true) -local dq_str = S('r')^-1 * l.delimited_range('"', true) -local sq_str_multiline = S('r')^-1 * l.delimited_range('"""') -local dq_str_multiline = S('r')^-1 * l.delimited_range("''' ") -local string = token(l.STRING, - sq_str + dq_str + sq_str_multiline + dq_str_multiline) - --- Numbers. -local number = token(l.NUMBER, (l.float + l.hex_num)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default', - 'do', 'else', 'enum', 'extends', 'false', 'final' , 'finally', 'for', 'if', - 'in', 'is', 'new', 'null', 'rethrow', 'return', 'super', 'switch', 'this', - 'throw', 'true', 'try', 'var', 'void', 'while', 'with', -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default', 'do', 'else', 'enum', + 'extends', 'false', 'final', 'finally', 'for', 'if', 'in', 'is', 'new', 'null', 'rethrow', + 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'var', 'void', 'while', 'with' +})) + +-- Built-ins. +lex:add_rule('builtin', token(lexer.CONSTANT, word_match{ + 'abstract', 'as', 'dynamic', 'export', 'external', 'factory', 'get', 'implements', 'import', + 'library', 'operator', 'part', 'set', 'static', 'typedef' +})) -local builtin_identifiers = token(l.CONSTANT, word_match{ - 'abstract', 'as', 'dynamic', 'export', 'external', 'factory', 'get', - 'implements', 'import', 'library', 'operator', 'part', 'set', 'static', - 'typedef' -}) +-- Strings. +local sq_str = S('r')^-1 * lexer.range("'", true) +local dq_str = S('r')^-1 * lexer.range('"', true) +local tq_str = S('r')^-1 * (lexer.range("'''") + lexer.range('"""')) +lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str)) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}')) +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) --- Preprocs. -local annotation = token('annotation', '@' * l.word^1) +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'constant', builtin_identifiers}, - {'string', string}, - {'identifier', identifier}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, - {'annotation', annotation}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}'))) -M._tokenstyles = { - annotation = l.STYLE_PREPROCESSOR, -} +-- Annotations. +lex:add_rule('annotation', token('annotation', '@' * lexer.word^1)) +lex:add_style('annotation', lexer.styles.preprocessor) -M._foldsymbols = { - _patterns = {'[{}]', '/[*+]', '[*+]/', '//'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = { - ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1, - ['//'] = l.fold_line_comments('//') - } -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/desktop.lua b/lua/lexers/desktop.lua @@ -1,62 +1,53 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Desktop Entry LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'desktop'} +local lex = lexer.new('desktop') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) +-- Keys. +lex:add_rule('key', token('key', word_match{ + 'Type', 'Version', 'Name', 'GenericName', 'NoDisplay', 'Comment', 'Icon', 'Hidden', 'OnlyShowIn', + 'NotShowIn', 'TryExec', 'Exec', 'Exec', 'Path', 'Terminal', 'MimeType', 'Categories', + 'StartupNotify', 'StartupWMClass', 'URL' +})) +lex:add_style('key', lexer.styles.keyword) --- Strings. -local string = token(l.STRING, l.delimited_range('"')) +-- Values. +lex:add_rule('value', token('value', word_match('true false'))) +lex:add_style('value', lexer.styles.constant) + +-- Identifiers. +lex:add_rule('identifier', lexer.token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + S('_-'))^0)) -- Group headers. -local group_header = l.starts_line(token(l.STRING, - l.delimited_range('[]', false, true))) +local bracketed = lexer.range('[', ']') +lex:add_rule('header', lexer.starts_line(token('header', bracketed))) +lex:add_style('header', lexer.styles.label) --- Numbers. -local number = token(l.NUMBER, (l.float + l.integer)) +-- Locales. +lex:add_rule('locale', token('locale', bracketed)) +lex:add_style('locale', lexer.styles.class) --- Keywords. -local keyword = token(l.KEYWORD, word_match{'true', 'false'}) +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) --- Locales. -local locale = token(l.CLASS, l.delimited_range('[]', false, true)) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) --- Keys. -local key = token(l.VARIABLE, word_match{ - 'Type', 'Version', 'Name', 'GenericName', 'NoDisplay', 'Comment', 'Icon', - 'Hidden', 'OnlyShowIn', 'NotShowIn', 'TryExec', 'Exec', 'Exec', 'Path', - 'Terminal', 'MimeType', 'Categories', 'StartupNotify', 'StartupWMClass', 'URL' -}) +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Field codes. -local code = l.token(l.CONSTANT, P('%') * S('fFuUdDnNickvm')) - --- Identifiers. -local identifier = l.token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0) +lex:add_rule('code', lexer.token('code', '%' * S('fFuUdDnNickvm'))) +lex:add_style('code', lexer.styles.variable) -- Operators. -local operator = token(l.OPERATOR, S('=')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'key', key}, - {'identifier', identifier}, - {'group_header', group_header}, - {'locale', locale}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'code', code}, - {'operator', operator}, -} - -return M +lex:add_rule('operator', token(lexer.OPERATOR, S('='))) + +return lex diff --git a/lua/lexers/diff.lua b/lua/lexers/diff.lua @@ -1,44 +1,29 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Diff LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'diff'} +local lex = lexer.new('diff', {lex_by_line = true}) -- Text, separators, and file headers. -local index = token(l.COMMENT, 'Index: ' * l.any^0 * P(-1)) -local separator = token(l.COMMENT, ('---' + P('*')^4 + P('=')^1) * l.space^0 * - -1) -local header = token('header', (P('*** ') + '--- ' + '+++ ') * l.any^1) +lex:add_rule('index', token(lexer.COMMENT, 'Index: ' * lexer.any^0 * -1)) +lex:add_rule('separator', token(lexer.COMMENT, ('---' + P('*')^4 + P('=')^1) * lexer.space^0 * -1)) +lex:add_rule('header', token('header', (P('*** ') + '--- ' + '+++ ') * lexer.any^1)) +lex:add_style('header', lexer.styles.comment) -- Location. -local location = token(l.NUMBER, ('@@' + l.digit^1 + '****') * l.any^1) +lex:add_rule('location', token(lexer.NUMBER, ('@@' + lexer.dec_num + '****') * lexer.any^1)) -- Additions, deletions, and changes. -local addition = token('addition', S('>+') * l.any^0) -local deletion = token('deletion', S('<-') * l.any^0) -local change = token('change', '! ' * l.any^0) +lex:add_rule('addition', token('addition', S('>+') * lexer.any^0)) +lex:add_style('addition', {fore = lexer.colors.green}) +lex:add_rule('deletion', token('deletion', S('<-') * lexer.any^0)) +lex:add_style('deletion', {fore = lexer.colors.red}) +lex:add_rule('change', token('change', '!' * lexer.any^0)) +lex:add_style('change', {fore = lexer.colors.yellow}) -M._rules = { - {'index', index}, - {'separator', separator}, - {'header', header}, - {'location', location}, - {'addition', addition}, - {'deletion', deletion}, - {'change', change}, - {'any_line', token('default', l.any^1)}, -} +lex:add_rule('any_line', token(lexer.DEFAULT, lexer.any^1)) -M._tokenstyles = { - header = l.STYLE_COMMENT, - addition = 'fore:green', - deletion = 'fore:red', - change = 'fore:yellow', -} - -M._LEXBYLINE = true - -return M +return lex diff --git a/lua/lexers/django.lua b/lua/lexers/django.lua @@ -1,77 +1,55 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Django LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'django'} +local lex = lexer.new('django') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '{#' * (l.any - l.newline - '#}')^0 * - P('#}')^-1) - --- Strings. -local string = token(l.STRING, l.delimited_range('"', false, true)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'as', 'block', 'blocktrans', 'by', 'endblock', 'endblocktrans', 'comment', - 'endcomment', 'cycle', 'date', 'debug', 'else', 'extends', 'filter', - 'endfilter', 'firstof', 'for', 'endfor', 'if', 'endif', 'ifchanged', - 'endifchanged', 'ifnotequal', 'endifnotequal', 'in', 'load', 'not', 'now', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'as', 'block', 'blocktrans', 'by', 'endblock', 'endblocktrans', 'comment', 'endcomment', 'cycle', + 'date', 'debug', 'else', 'extends', 'filter', 'endfilter', 'firstof', 'for', 'endfor', 'if', + 'endif', 'ifchanged', 'endifchanged', 'ifnotequal', 'endifnotequal', 'in', 'load', 'not', 'now', 'or', 'parsed', 'regroup', 'ssi', 'trans', 'with', 'widthratio' -}) +})) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'add', 'addslashes', 'capfirst', 'center', 'cut', 'date', 'default', - 'dictsort', 'dictsortreversed', 'divisibleby', 'escape', 'filesizeformat', - 'first', 'fix_ampersands', 'floatformat', 'get_digit', 'join', 'length', - 'length_is', 'linebreaks', 'linebreaksbr', 'linenumbers', 'ljust', 'lower', - 'make_list', 'phone2numeric', 'pluralize', 'pprint', 'random', 'removetags', - 'rjust', 'slice', 'slugify', 'stringformat', 'striptags', 'time', 'timesince', - 'title', 'truncatewords', 'unordered_list', 'upper', 'urlencode', 'urlize', - 'urlizetrunc', 'wordcount', 'wordwrap', 'yesno', -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'add', 'addslashes', 'capfirst', 'center', 'cut', 'date', 'default', 'dictsort', + 'dictsortreversed', 'divisibleby', 'escape', 'filesizeformat', 'first', 'fix_ampersands', + 'floatformat', 'get_digit', 'join', 'length', 'length_is', 'linebreaks', 'linebreaksbr', + 'linenumbers', 'ljust', 'lower', 'make_list', 'phone2numeric', 'pluralize', 'pprint', 'random', + 'removetags', 'rjust', 'slice', 'slugify', 'stringformat', 'striptags', 'time', 'timesince', + 'title', 'truncatewords', 'unordered_list', 'upper', 'urlencode', 'urlize', 'urlizetrunc', + 'wordcount', 'wordwrap', 'yesno' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) - --- Operators. -local operator = token(l.OPERATOR, S(':,.|')) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'function', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'operator', operator}, -} +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"', false, false))) --- Embedded in HTML. -local html = l.load('html') +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':,.|'))) --- Embedded Django. +-- Embed Django in HTML. +local html = lexer.load('html') +local html_comment = lexer.range('<!--', '-->') +local django_comment = lexer.range('{#', '#}', true) +html:modify_rule('comment', token(lexer.COMMENT, html_comment + django_comment)) local django_start_rule = token('django_tag', '{' * S('{%')) local django_end_rule = token('django_tag', S('%}') * '}') -l.embed_lexer(html, M, django_start_rule, django_end_rule) --- Modify HTML patterns to embed Django. -html._RULES['comment'] = html._RULES['comment'] + comment - -M._tokenstyles = { - django_tag = l.STYLE_EMBEDDED -} +html:embed(lex, django_start_rule, django_end_rule) +lex:add_style('django_tag', lexer.styles.embedded) -local _foldsymbols = html._foldsymbols -_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '{[%%{]' -_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '[%%}]}' -_foldsymbols.django_tag = {['{{'] = 1, ['}}'] = -1, ['{%'] = 1, ['%}'] = -1} -M._foldsymbols = _foldsymbols +-- Fold points. +lex:add_fold_point('django_tag', '{{', '}}') +lex:add_fold_point('django_tag', '{%', '%}') -return M +return lex diff --git a/lua/lexers/dmd.lua b/lua/lexers/dmd.lua @@ -1,176 +1,142 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- D LPeg lexer. -- Heavily modified by Brian Schott (@Hackerpilot on Github). -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'dmd'} +local lex = lexer.new('dmd') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Class names. +lex:add_rule('class', + token(lexer.TYPE, P('class') + 'struct') * ws^-1 * token(lexer.CLASS, lexer.word)) + +-- Versions. +local version = word_match{ + 'AArch64', 'AIX', 'all', 'Alpha', 'Alpha_HardFloat', 'Alpha_SoftFloat', 'Android', 'ARM', + 'ARM_HardFloat', 'ARM_SoftFloat', 'ARM_SoftFP', 'ARM_Thumb', 'assert', 'BigEndian', 'BSD', + 'Cygwin', 'D_Coverage', 'D_Ddoc', 'D_HardFloat', 'DigitalMars', 'D_InlineAsm_X86', + 'D_InlineAsm_X86_64', 'D_LP64', 'D_NoBoundsChecks', 'D_PIC', 'DragonFlyBSD', 'D_SIMD', + 'D_SoftFloat', 'D_Version2', 'D_X32', 'FreeBSD', 'GNU', 'Haiku', 'HPPA', 'HPPA64', 'Hurd', 'IA64', + 'LDC', 'linux', 'LittleEndian', 'MIPS32', 'MIPS64', 'MIPS_EABI', 'MIPS_HardFloat', 'MIPS_N32', + 'MIPS_N64', 'MIPS_O32', 'MIPS_O64', 'MIPS_SoftFloat', 'NetBSD', 'none', 'OpenBSD', 'OSX', 'Posix', + 'PPC', 'PPC64', 'PPC_HardFloat', 'PPC_SoftFloat', 'S390', 'S390X', 'SDC', 'SH', 'SH64', 'SkyOS', + 'Solaris', 'SPARC', 'SPARC64', 'SPARC_HardFloat', 'SPARC_SoftFloat', 'SPARC_V8Plus', 'SysV3', + 'SysV4', 'unittest', 'Win32', 'Win64', 'Windows', 'X86', 'X86_64' +} +local open_paren = token(lexer.OPERATOR, '(') +lex:add_rule('version', token(lexer.KEYWORD, 'version') * ws^-1 * open_paren * ws^-1 * + token('versions', version)) +lex:add_style('versions', lexer.styles.constant) --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local nested_comment = l.nested_pair('/+', '+/') -local comment = token(l.COMMENT, line_comment + block_comment + nested_comment) +-- Scopes. +local scope = word_match('exit success failure') +lex:add_rule('scope', + token(lexer.KEYWORD, 'scope') * ws^-1 * open_paren * ws^-1 * token('scopes', scope)) +lex:add_style('scopes', lexer.styles.constant) --- Strings. -local sq_str = l.delimited_range("'", true) * S('cwd')^-1 -local dq_str = l.delimited_range('"') * S('cwd')^-1 -local lit_str = 'r' * l.delimited_range('"', false, true) * S('cwd')^-1 -local bt_str = l.delimited_range('`', false, true) * S('cwd')^-1 -local hex_str = 'x' * l.delimited_range('"') * S('cwd')^-1 -local other_hex_str = '\\x' * (l.xdigit * l.xdigit)^1 -local del_str = l.nested_pair('q"[', ']"') * S('cwd')^-1 + - l.nested_pair('q"(', ')"') * S('cwd')^-1 + - l.nested_pair('q"{', '}"') * S('cwd')^-1 + - l.nested_pair('q"<', '>"') * S('cwd')^-1 + - P('q') * l.nested_pair('{', '}') * S('cwd')^-1 -local string = token(l.STRING, del_str + sq_str + dq_str + lit_str + bt_str + - hex_str + other_hex_str) +-- Traits. +local trait = word_match{ + 'allMembers', 'classInstanceSize', 'compiles', 'derivedMembers', 'getAttributes', 'getMember', + 'getOverloads', 'getProtection', 'getUnitTests', 'getVirtualFunctions', 'getVirtualIndex', + 'getVirtualMethods', 'hasMember', 'identifier', 'isAbstractClass', 'isAbstractFunction', + 'isArithmetic', 'isAssociativeArray', 'isFinalClass', 'isFinalFunction', 'isFloating', + 'isIntegral', 'isLazy', 'isNested', 'isOut', 'isOverrideFunction', 'isPOD', 'isRef', 'isSame', + 'isScalar', 'isStaticArray', 'isStaticFunction', 'isUnsigned', 'isVirtualFunction', + 'isVirtualMethod', 'parent' +} +lex:add_rule('trait', + token(lexer.KEYWORD, '__traits') * ws^-1 * open_paren * ws^-1 * token('traits', trait)) +lex:add_style('traits', {fore = lexer.colors.yellow}) --- Numbers. -local dec = l.digit^1 * ('_' * l.digit^1)^0 -local hex_num = l.hex_num * ('_' * l.xdigit^1)^0 -local bin_num = '0' * S('bB') * S('01_')^1 -local oct_num = '0' * S('01234567_')^1 -local integer = S('+-')^-1 * (hex_num + oct_num + bin_num + dec) -local number = token(l.NUMBER, (l.float + integer) * S('uUlLdDfFi')^-1) +-- Function names. +lex:add_rule('function', + token(lexer.FUNCTION, lexer.word) * #(ws^-1 * ('!' * lexer.word^-1 * ws^-1)^-1 * '(')) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'abstract', 'align', 'asm', 'assert', 'auto', 'body', 'break', 'case', 'cast', - 'catch', 'const', 'continue', 'debug', 'default', 'delete', - 'deprecated', 'do', 'else', 'extern', 'export', 'false', 'final', 'finally', - 'for', 'foreach', 'foreach_reverse', 'goto', 'if', 'import', 'immutable', - 'in', 'inout', 'invariant', 'is', 'lazy', 'macro', 'mixin', 'new', 'nothrow', - 'null', 'out', 'override', 'pragma', 'private', 'protected', 'public', 'pure', - 'ref', 'return', 'scope', 'shared', 'static', 'super', 'switch', - 'synchronized', 'this', 'throw','true', 'try', 'typeid', 'typeof', 'unittest', - 'version', 'virtual', 'volatile', 'while', 'with', '__gshared', '__thread', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'abstract', 'align', 'asm', 'assert', 'auto', 'body', 'break', 'case', 'cast', 'catch', 'const', + 'continue', 'debug', 'default', 'delete', 'deprecated', 'do', 'else', 'extern', 'export', 'false', + 'final', 'finally', 'for', 'foreach', 'foreach_reverse', 'goto', 'if', 'import', 'immutable', + 'in', 'inout', 'invariant', 'is', 'lazy', 'macro', 'mixin', 'new', 'nothrow', 'null', 'out', + 'override', 'pragma', 'private', 'protected', 'public', 'pure', 'ref', 'return', 'scope', + 'shared', 'static', 'super', 'switch', 'synchronized', 'this', 'throwtrue', 'try', 'typeid', + 'typeof', 'unittest', 'version', 'virtual', 'volatile', 'while', 'with', '__gshared', '__thread', '__traits', '__vector', '__parameters' -}) +})) -- Types. -local type = token(l.TYPE, word_match{ - 'alias', 'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'class', - 'creal', 'dchar', 'delegate', 'double', 'enum', 'float', 'function', - 'idouble', 'ifloat', 'int', 'interface', 'ireal', 'long', 'module', 'package', - 'ptrdiff_t', 'real', 'short', 'size_t', 'struct', 'template', 'typedef', - 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'ushort', 'void', 'wchar', +local type = token(lexer.TYPE, word_match{ + 'alias', 'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'class', 'creal', 'dchar', + 'delegate', 'double', 'enum', 'float', 'function', 'idouble', 'ifloat', 'int', 'interface', + 'ireal', 'long', 'module', 'package', 'ptrdiff_t', 'real', 'short', 'size_t', 'struct', + 'template', 'typedef', 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'ushort', 'void', 'wchar', 'string', 'wstring', 'dstring', 'hash_t', 'equals_t' }) +lex:add_rule('type', type) -- Constants. -local constant = token(l.CONSTANT, word_match{ - '__FILE__', '__LINE__', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__', - '__VENDOR__', '__VERSION__', '__FUNCTION__', '__PRETTY_FUNCTION__', - '__MODULE__', -}) - -local class_sequence = token(l.TYPE, P('class') + P('struct')) * ws^1 * - token(l.CLASS, l.word) +lex:add_rule('constant', token(lexer.CONSTANT, word_match{ + '__FILE__', '__LINE__', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__', '__VENDOR__', + '__VERSION__', '__FUNCTION__', '__PRETTY_FUNCTION__', '__MODULE__' +})) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Properties. +local dot = token(lexer.OPERATOR, '.') +lex:add_rule('property', lpeg.B(lexer.alnum + ')') * dot * token(lexer.VARIABLE, word_match{ + 'alignof', 'dig', 'dup', 'epsilon', 'idup', 'im', 'init', 'infinity', 'keys', 'length', + 'mangleof', 'mant_dig', 'max', 'max_10_exp', 'max_exp', 'min', 'min_normal', 'min_10_exp', + 'min_exp', 'nan', 'offsetof', 'ptr', 're', 'rehash', 'reverse', 'sizeof', 'sort', 'stringof', + 'tupleof', 'values' +})) --- Operators. -local operator = token(l.OPERATOR, S('?=!<>+-*$/%&|^~.,;()[]{}')) +-- Strings. +local sq_str = lexer.range("'", true) * S('cwd')^-1 +local dq_str = lexer.range('"') * S('cwd')^-1 +local lit_str = 'r' * lexer.range('"', false, false) * S('cwd')^-1 +local bt_str = lexer.range('`', false, false) * S('cwd')^-1 +local hex_str = 'x' * lexer.range('"') * S('cwd')^-1 +local other_hex_str = '\\x' * (lexer.xdigit * lexer.xdigit)^1 +local str = sq_str + dq_str + lit_str + bt_str + hex_str + other_hex_str +for left, right in pairs{['['] = ']', ['('] = ')', ['{'] = '}', ['<'] = '>'} do + str = str + lexer.range('q"' .. left, right .. '"', false, false, true) * S('cwd')^-1 +end +lex:add_rule('string', token(lexer.STRING, str)) --- Properties. -local properties = (type + identifier + operator) * token(l.OPERATOR, '.') * - token(l.VARIABLE, word_match{ - 'alignof', 'dig', 'dup', 'epsilon', 'idup', 'im', 'init', 'infinity', - 'keys', 'length', 'mangleof', 'mant_dig', 'max', 'max_10_exp', 'max_exp', - 'min', 'min_normal', 'min_10_exp', 'min_exp', 'nan', 'offsetof', 'ptr', - 're', 'rehash', 'reverse', 'sizeof', 'sort', 'stringof', 'tupleof', - 'values' - }) - --- Preprocs. -local annotation = token('annotation', '@' * l.word^1) -local preproc = token(l.PREPROCESSOR, '#' * l.nonnewline^0) +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Traits. -local traits_list = token('traits', word_match{ - 'allMembers', 'classInstanceSize', 'compiles', 'derivedMembers', - 'getAttributes', 'getMember', 'getOverloads', 'getProtection', 'getUnitTests', - 'getVirtualFunctions', 'getVirtualIndex', 'getVirtualMethods', 'hasMember', - 'identifier', 'isAbstractClass', 'isAbstractFunction', 'isArithmetic', - 'isAssociativeArray', 'isFinalClass', 'isFinalFunction', 'isFloating', - 'isIntegral', 'isLazy', 'isNested', 'isOut', 'isOverrideFunction', 'isPOD', - 'isRef', 'isSame', 'isScalar', 'isStaticArray', 'isStaticFunction', - 'isUnsigned', 'isVirtualFunction', 'isVirtualMethod', 'parent' -}) +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +local nested_comment = lexer.range('/+', '+/', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment + nested_comment)) -local scopes_list = token('scopes', word_match{'exit', 'success', 'failure'}) - --- versions -local versions_list = token('versions', word_match{ - 'AArch64', 'AIX', 'all', 'Alpha', 'Alpha_HardFloat', 'Alpha_SoftFloat', - 'Android', 'ARM', 'ARM_HardFloat', 'ARM_SoftFloat', 'ARM_SoftFP', 'ARM_Thumb', - 'assert', 'BigEndian', 'BSD', 'Cygwin', 'D_Coverage', 'D_Ddoc', 'D_HardFloat', - 'DigitalMars', 'D_InlineAsm_X86', 'D_InlineAsm_X86_64', 'D_LP64', - 'D_NoBoundsChecks', 'D_PIC', 'DragonFlyBSD', 'D_SIMD', 'D_SoftFloat', - 'D_Version2', 'D_X32', 'FreeBSD', 'GNU', 'Haiku', 'HPPA', 'HPPA64', 'Hurd', - 'IA64', 'LDC', 'linux', 'LittleEndian', 'MIPS32', 'MIPS64', 'MIPS_EABI', - 'MIPS_HardFloat', 'MIPS_N32', 'MIPS_N64', 'MIPS_O32', 'MIPS_O64', - 'MIPS_SoftFloat', 'NetBSD', 'none', 'OpenBSD', 'OSX', 'Posix', 'PPC', 'PPC64', - 'PPC_HardFloat', 'PPC_SoftFloat', 'S390', 'S390X', 'SDC', 'SH', 'SH64', - 'SkyOS', 'Solaris', 'SPARC', 'SPARC64', 'SPARC_HardFloat', 'SPARC_SoftFloat', - 'SPARC_V8Plus', 'SysV3', 'SysV4', 'unittest', 'Win32', 'Win64', 'Windows', - 'X86', 'X86_64' -}) +-- Numbers. +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 +local hex_num = lexer.hex_num * ('_' * lexer.xdigit^1)^0 +local bin_num = '0' * S('bB') * S('01_')^1 * -lexer.xdigit +local oct_num = '0' * S('01234567_')^1 +local integer = S('+-')^-1 * (hex_num + oct_num + bin_num + dec) +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + integer) * S('uULdDfFi')^-1)) -local versions = token(l.KEYWORD, 'version') * l.space^0 * - token(l.OPERATOR, '(') * l.space^0 * versions_list - -local scopes = token(l.KEYWORD, 'scope') * l.space^0 * - token(l.OPERATOR, '(') * l.space^0 * scopes_list - -local traits = token(l.KEYWORD, '__traits') * l.space^0 * - token(l.OPERATOR, '(') * l.space^0 * traits_list - -local func = token(l.FUNCTION, l.word) * - #(l.space^0 * (P('!') * l.word^-1 * l.space^-1)^-1 * P('(')) - -M._rules = { - {'whitespace', ws}, - {'class', class_sequence}, - {'traits', traits}, - {'versions', versions}, - {'scopes', scopes}, - {'keyword', keyword}, - {'variable', properties}, - {'type', type}, - {'function', func}, - {'constant', constant}, - {'string', string}, - {'identifier', identifier}, - {'comment', comment}, - {'number', number}, - {'preproc', preproc}, - {'operator', operator}, - {'annotation', annotation}, -} +-- Preprocessor. +lex:add_rule('annotation', token('annotation', '@' * lexer.word^1)) +lex:add_style('annotation', lexer.styles.preprocessor) +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.to_eol('#'))) -M._tokenstyles = { - annotation = l.STYLE_PREPROCESSOR, - traits = l.STYLE_CLASS, - versions = l.STYLE_CONSTANT, - scopes = l.STYLE_CONSTANT -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('?=!<>+-*$/%&|^~.,;:()[]{}'))) -M._foldsymbols = { - _patterns = {'[{}]', '/[*+]', '[*+]/', '//'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = { - ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1, - ['//'] = l.fold_line_comments('//') - } -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '/+', '+/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/dockerfile.lua b/lua/lexers/dockerfile.lua @@ -1,55 +1,40 @@ --- Copyright 2016-2017 Alejandro Baez (https://keybase.io/baez). See LICENSE. +-- Copyright 2016-2022 Alejandro Baez (https://keybase.io/baez). See LICENSE. -- Dockerfile LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'dockerfile'} +local lex = lexer.new('dockerfile', {fold_by_indentation = true}) -- Whitespace -local indent = #l.starts_line(S(' \t')) * - (token(l.WHITESPACE, ' ') + token('indent_error', '\t'))^1 -local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1) - --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) - --- Strings. -local sq_str = l.delimited_range("'", false, true) -local dq_str = l.delimited_range('"') -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'ADD', 'ARG', 'CMD', 'COPY', 'ENTRYPOINT', 'ENV', 'EXPOSE', 'FROM', 'LABEL', - 'MAINTAINER', 'ONBUILD', 'RUN', 'STOPSIGNAL', 'USER', 'VOLUME', 'WORKDIR' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'ADD', 'ARG', 'CMD', 'COPY', 'ENTRYPOINT', 'ENV', 'EXPOSE', 'FROM', 'LABEL', 'MAINTAINER', + 'ONBUILD', 'RUN', 'STOPSIGNAL', 'USER', 'VOLUME', 'WORKDIR' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Variable. -local variable = token(l.VARIABLE, - S('$')^1 * (S('{')^1 * l.word * S('}')^1 + l.word)) +lex:add_rule('variable', + token(lexer.VARIABLE, S('$')^1 * (P('{')^1 * lexer.word * P('}')^1 + lexer.word))) + +-- Strings. +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. -local operator = token(l.OPERATOR, S('\\[],=:{}')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'variable', variable}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} - -M._FOLDBYINDENTATION = true - -return M +lex:add_rule('operator', token(lexer.OPERATOR, S('\\[],=:{}'))) + +return lex diff --git a/lua/lexers/dot.lua b/lua/lexers/dot.lua @@ -1,71 +1,56 @@ --- Copyright 2006-2017 Brian "Sir Alaran" Schott. See LICENSE. +-- Copyright 2006-2022 Brian "Sir Alaran" Schott. See LICENSE. -- Dot LPeg lexer. -- Based off of lexer code by Mitchell. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'dot'} +local lex = lexer.new('dot') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.digit^1 + l.float) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'graph', 'node', 'edge', 'digraph', 'fontsize', 'rankdir', - 'fontname', 'shape', 'label', 'arrowhead', 'arrowtail', 'arrowsize', - 'color', 'comment', 'constraint', 'decorate', 'dir', 'headlabel', 'headport', - 'headURL', 'labelangle', 'labeldistance', 'labelfloat', 'labelfontcolor', - 'labelfontname', 'labelfontsize', 'layer', 'lhead', 'ltail', 'minlen', - 'samehead', 'sametail', 'style', 'taillabel', 'tailport', 'tailURL', 'weight', - 'subgraph' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'graph', 'node', 'edge', 'digraph', 'fontsize', 'rankdir', 'fontname', 'shape', 'label', + 'arrowhead', 'arrowtail', 'arrowsize', 'color', 'comment', 'constraint', 'decorate', 'dir', + 'headlabel', 'headport', 'headURL', 'labelangle', 'labeldistance', 'labelfloat', 'labelfontcolor', + 'labelfontname', 'labelfontsize', 'layer', 'lhead', 'ltail', 'minlen', 'samehead', 'sametail', + 'style', 'taillabel', 'tailport', 'tailURL', 'weight', 'subgraph' +})) -- Types. -local type = token(l.TYPE, word_match{ - 'box', 'polygon', 'ellipse', 'circle', 'point', 'egg', 'triangle', - 'plaintext', 'diamond', 'trapezium', 'parallelogram', 'house', 'pentagon', - 'hexagon', 'septagon', 'octagon', 'doublecircle', 'doubleoctagon', - 'tripleoctagon', 'invtriangle', 'invtrapezium', 'invhouse', 'Mdiamond', - 'Msquare', 'Mcircle', 'rect', 'rectangle', 'none', 'note', 'tab', 'folder', - 'box3d', 'record' -}) +lex:add_rule('type', token(lexer.TYPE, word_match{ + ' box', 'polygon', 'ellipse', 'circle', 'point', 'egg', 'triangle', 'plaintext', 'diamond', + 'trapezium', 'parallelogram', 'house', 'pentagon', 'hexagon', 'septagon', 'octagon', + 'doublecircle', 'doubleoctagon', 'tripleoctagon', 'invtriangle', 'invtrapezium', 'invhouse', + 'Mdiamond', 'Msquare', 'Mcircle', 'rect', 'rectangle', 'none', 'note', 'tab', 'folder', 'box3d', + 'record' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('->()[]{};')) +-- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'number', number}, - {'string', string}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.dec_num + lexer.float)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('->()[]{};'))) -M._foldsymbols = { - _patterns = {'[{}]', '/%*', '%*/', '//'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/eiffel.lua b/lua/lexers/eiffel.lua @@ -1,69 +1,58 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Eiffel LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'eiffel'} +local lex = lexer.new('eiffel') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '--' * l.nonnewline^0) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'alias', 'all', 'and', 'as', 'check', 'class', 'creation', 'debug', - 'deferred', 'do', 'else', 'elseif', 'end', 'ensure', 'expanded', 'export', - 'external', 'feature', 'from', 'frozen', 'if', 'implies', 'indexing', 'infix', - 'inherit', 'inspect', 'invariant', 'is', 'like', 'local', 'loop', 'not', - 'obsolete', 'old', 'once', 'or', 'prefix', 'redefine', 'rename', 'require', - 'rescue', 'retry', 'select', 'separate', 'then', 'undefine', 'until', - 'variant', 'when', 'xor', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'alias', 'all', 'and', 'as', 'check', 'class', 'creation', 'debug', 'deferred', 'do', 'else', + 'elseif', 'end', 'ensure', 'expanded', 'export', 'external', 'feature', 'from', 'frozen', 'if', + 'implies', 'indexing', 'infix', 'inherit', 'inspect', 'invariant', 'is', 'like', 'local', 'loop', + 'not', 'obsolete', 'old', 'once', 'or', 'prefix', 'redefine', 'rename', 'require', 'rescue', + 'retry', 'select', 'separate', 'then', 'undefine', 'until', 'variant', 'when', 'xor', -- 'current', 'false', 'precursor', 'result', 'strip', 'true', 'unique', 'void' -}) +})) -- Types. -local type = token(l.TYPE, word_match{ - 'character', 'string', 'bit', 'boolean', 'integer', 'real', 'none', 'any' -}) +lex:add_rule('type', + token(lexer.TYPE, word_match('character string bit boolean integer real none any'))) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}')) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--'))) -M._foldsymbols = { - _patterns = {'[a-z]+', '%-%-'}, - [l.KEYWORD] = { - check = 1, debug = 1, deferred = 1, ['do'] = 1, from = 1, ['if'] = 1, - inspect = 1, once = 1, class = function(text, pos, line, s) - return line:find('deferred%s+class') and 0 or 1 - end, ['end'] = -1 - }, - [l.COMMENT] = {['--'] = l.fold_line_comments('--')} -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -return M +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'check', 'end') +lex:add_fold_point(lexer.KEYWORD, 'debug', 'end') +lex:add_fold_point(lexer.KEYWORD, 'deferred', + function(text, pos, line, s) return line:find('deferred%s+class') and 0 or 1 end) +lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +lex:add_fold_point(lexer.KEYWORD, 'from', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'inspect', 'end') +lex:add_fold_point(lexer.KEYWORD, 'once', 'end') +lex:add_fold_point(lexer.KEYWORD, 'class', + function(text, pos, line, s) return line:find('deferred%s+class') and 0 or 1 end) +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('--')) + +return lex diff --git a/lua/lexers/elixir.lua b/lua/lexers/elixir.lua @@ -1,123 +1,97 @@ --- Copyright 2015-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2015-2022 Mitchell. See LICENSE. -- Contributed by Richard Philips. --- Elixer LPeg lexer. +-- Elixir LPeg lexer. -local l = require('lexer') -local token, style, color, word_match = l.token, l.style, l.color, l.word_match -local B, P, R, S = lpeg.B, lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local B, P, S = lpeg.B, lpeg.P, lpeg.S -local M = {_NAME = 'elixir'} +local lex = lexer.new('elixir', {fold_by_indentation = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Sigils. +local sigil11 = '~' * S('CRSW') * lexer.range('<', '>') +local sigil12 = '~' * S('CRSW') * lexer.range('{', '}') +local sigil13 = '~' * S('CRSW') * lexer.range('[', ']') +local sigil14 = '~' * S('CRSW') * lexer.range('(', ')') +local sigil15 = '~' * S('CRSW') * lexer.range('|', false, false) +local sigil16 = '~' * S('CRSW') * lexer.range('/', false, false) +local sigil17 = '~' * S('CRSW') * lexer.range('"', false, false) +local sigil18 = '~' * S('CRSW') * lexer.range("'", false, false) +local sigil19 = '~' * S('CRSW') * lexer.range('"""') +local sigil10 = '~' * S('CRSW') * lexer.range("'''") +local sigil21 = '~' * S('crsw') * lexer.range('<', '>') +local sigil22 = '~' * S('crsw') * lexer.range('{', '}') +local sigil23 = '~' * S('crsw') * lexer.range('[', ']') +local sigil24 = '~' * S('crsw') * lexer.range('(', ')') +local sigil25 = '~' * S('crsw') * lexer.range('|') +local sigil26 = '~' * S('crsw') * lexer.range('/') +local sigil27 = '~' * S('crsw') * lexer.range('"') +local sigil28 = '~' * S('crsw') * lexer.range("'") +local sigil29 = '~' * S('crsw') * lexer.range('"""') +local sigil20 = '~' * S('crsw') * lexer.range("'''") +local sigil_token = token(lexer.REGEX, + sigil10 + sigil19 + sigil11 + sigil12 + sigil13 + sigil14 + sigil15 + sigil16 + sigil17 + sigil18 + + sigil20 + sigil29 + sigil21 + sigil22 + sigil23 + sigil24 + sigil25 + sigil26 + sigil27 + + sigil28) +local sigiladdon_token = token(lexer.EMBEDDED, lexer.alpha^0) +lex:add_rule('sigil', sigil_token * sigiladdon_token) + +-- Atoms. +local atom1 = B(1 - P(':')) * ':' * lexer.range('"') +local atom2 = B(1 - P(':')) * ':' * lexer.alpha * (lexer.alnum + S('_@'))^0 * S('?!')^-1 +local atom3 = B(1 - (lexer.alnum + S('_:'))) * lexer.upper * (lexer.alnum + S('_@'))^0 * S('?!')^-1 +lex:add_rule('atom', token(lexer.CONSTANT, atom1 + atom2 + atom3)) -- Strings. -local dq_str = l.delimited_range('"', false) -local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 -local string = token(l.STRING, triple_dq_str + dq_str) - --- Numbers -local dec = l.digit * (l.digit + P("_"))^0 -local bin = '0b' * S('01')^1 -local oct = '0o' * R('07')^1 -local integer = bin + l.hex_num + oct + dec -local float = l.digit^1 * P(".") * l.digit^1 * S("eE") * - (S('+-')^-1 * l.digit^1)^-1 -local number_token = B(1 - R('az', 'AZ', '__')) * - (S('+-')^-1) * token(l.NUMBER, (float + integer)) - --- Keywords. -local keyword_token = token(l.KEYWORD, word_match{ - "is_atom", "is_binary", "is_bitstring", "is_boolean", "is_float", - "is_function", "is_integer", "is_list", "is_map", "is_number", "is_pid", - "is_port", "is_record", "is_reference", "is_tuple", "is_exception", "case", - "when", "cond", "for", "if", "unless", "try", "receive", "send", "exit", - "raise", "throw", "after", "rescue", "catch", "else", "do", "end", "quote", - "unquote", "super", "import", "require", "alias", "use", "self", "with", "fn" -}) +local dq_str = lexer.range('"') +local triple_dq_str = lexer.range('"""') +lex:add_rule('string', token(lexer.STRING, triple_dq_str + dq_str)) --- Functions -local function_token = token(l.FUNCTION, word_match{ - "defstruct", "defrecordp", "defrecord", "defprotocol", "defp", - "defoverridable", "defmodule", "defmacrop", "defmacro", "defimpl", - "defexception", "defdelegate", "defcallback", "def" -}) - --- Sigils -local sigil11 = P("~") * S("CRSW") * l.delimited_range('<>', false, true) -local sigil12 = P("~") * S("CRSW") * l.delimited_range('{}', false, true) -local sigil13 = P("~") * S("CRSW") * l.delimited_range('[]', false, true) -local sigil14 = P("~") * S("CRSW") * l.delimited_range('()', false, true) -local sigil15 = P("~") * S("CRSW") * l.delimited_range('|', false, true) -local sigil16 = P("~") * S("CRSW") * l.delimited_range('/', false, true) -local sigil17 = P("~") * S("CRSW") * l.delimited_range('"', false, true) -local sigil18 = P("~") * S("CRSW") * l.delimited_range("'", false, true) -local sigil19 = P("~") * S("CRSW") * '"""' * (l.any - '"""')^0 * P('"""')^-1 -local sigil10 = P("~") * S("CRSW") * "'''" * (l.any - "'''")^0 * P("'''")^-1 -local sigil21 = P("~") * S("crsw") * l.delimited_range('<>', false, false) -local sigil22 = P("~") * S("crsw") * l.delimited_range('{}', false, false) -local sigil23 = P("~") * S("crsw") * l.delimited_range('[]', false, false) -local sigil24 = P("~") * S("crsw") * l.delimited_range('()', false, false) -local sigil25 = P("~") * S("crsw") * l.delimited_range('|', false, false) -local sigil26 = P("~") * S("crsw") * l.delimited_range('/', false, false) -local sigil27 = P("~") * S("crsw") * l.delimited_range('"', false, false) -local sigil28 = P("~") * S("crsw") * l.delimited_range("'", false, false) -local sigil29 = P("~") * S("csrw") * '"""' * (l.any - '"""')^0 * P('"""')^-1 -local sigil20 = P("~") * S("csrw") * "'''" * (l.any - "'''")^0 * P("'''")^-1 -local sigil_token = token(l.REGEX, sigil10 + sigil19 + sigil11 + sigil12 + - sigil13 + sigil14 + sigil15 + sigil16 + - sigil17 + sigil18 + sigil20 + sigil29 + - sigil21 + sigil22 + sigil23 + sigil24 + - sigil25 + sigil26 + sigil27 + sigil28) -local sigiladdon_token = token(l.EMBEDDED, R('az', 'AZ')^0) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) --- Attributes -local attribute_token = token(l.LABEL, B(1 - R('az', 'AZ', '__')) * P('@') * - R('az','AZ') * R('az','AZ','09','__')^0) +-- Attributes. +lex:add_rule('attribute', token(lexer.LABEL, B(1 - (lexer.alnum + '_')) * '@' * lexer.alpha * + (lexer.alnum + '_')^0)) --- Booleans -local boolean_token = token(l.NUMBER, - P(':')^-1 * word_match{"true", "false", "nil"}) +-- Booleans. +lex:add_rule('boolean', token(lexer.NUMBER, P(':')^-1 * word_match('true false nil'))) --- Identifiers -local identifier = token(l.IDENTIFIER, R('az', '__') * - R('az', 'AZ', '__', '09')^0 * S('?!')^-1) +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'defstruct', 'defrecordp', 'defrecord', 'defprotocol', 'defp', 'defoverridable', 'defmodule', + 'defmacrop', 'defmacro', 'defimpl', 'defexception', 'defdelegate', 'defcallback', 'def' +})) --- Atoms -local atom1 = B(1 - P(':')) * P(':') * dq_str -local atom2 = B(1 - P(':')) * P(':') * R('az', 'AZ') * - R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 -local atom3 = B(1 - R('az', 'AZ', '__', '09', '::')) * - R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 -local atom_token = token(l.CONSTANT, atom1 + atom2 + atom3) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 'is_float', 'is_function', 'is_integer', + 'is_list', 'is_map', 'is_number', 'is_pid', 'is_port', 'is_record', 'is_reference', 'is_tuple', + 'is_exception', 'case', 'when', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'send', 'exit', + 'raise', 'throw', 'after', 'rescue', 'catch', 'else', 'do', 'end', 'quote', 'unquote', 'super', + 'import', 'require', 'alias', 'use', 'self', 'with', 'fn' +})) -- Operators -local operator1 = word_match{"and", "or", "not", "when", "xor", "in"} -local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' + - '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' + - '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' + - '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' + - '/' + '~~~' + '@' -local operator_token = token(l.OPERATOR, operator1 + operator2) +local operator1 = word_match('and or not when xor in') +local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' + '<<' + '<=' + '<-' + + '<' + '>>>' + '>>' + '>=' + '>' + '->' + '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + + '||' + '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' + '/' + '~~~' + '@' +lex:add_rule('operator', token(lexer.OPERATOR, operator1 + operator2)) -M._rules = { - {'whitespace', ws}, - {'sigil', sigil_token * sigiladdon_token}, - {'atom', atom_token}, - {'string', string}, - {'comment', comment}, - {'attribute', attribute_token}, - {'boolean', boolean_token}, - {'function', function_token}, - {'keyword', keyword_token}, - {'operator', operator_token}, - {'identifier', identifier}, - {'number', number_token}, -} +-- Identifiers +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word * S('?!')^-1)) -M._FOLDBYINDENTATION = true +-- Numbers +local dec = lexer.digit * (lexer.digit + '_')^0 +local bin = '0b' * S('01')^1 +local oct = '0o' * lpeg.R('07')^1 +local integer = bin + lexer.hex_num + oct + dec +local float = lexer.digit^1 * '.' * lexer.digit^1 * S('eE') * (S('+-')^-1 * lexer.digit^1)^-1 +lex:add_rule('number', + B(1 - (lexer.alpha + '_')) * S('+-')^-1 * token(lexer.NUMBER, float + integer)) -return M +return lex diff --git a/lua/lexers/elm.lua b/lua/lexers/elm.lua @@ -1,64 +1,43 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2020-2022 Mitchell. See LICENSE. -- Elm LPeg lexer --- Modified by Alex Suraci. -- Adapted from Haskell LPeg lexer by Karl Schultheisz. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'elm'} +local lex = lexer.new('elm', {fold_by_indentation = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local line_comment = '--' * l.nonnewline_esc^0 -local block_comment = '{-' * (l.any - '-}')^0 * P('-}')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match( + 'if then else case of let in module import as exposing type alias port'))) + +-- Types & type constructors. +local word = (lexer.alnum + S("._'#"))^0 +local op = lexer.punct - S('()[]{}') +lex:add_rule('type', token(lexer.TYPE, lexer.upper * word + ':' * (op^1 - ':'))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * word)) -- Strings. -local string = token(l.STRING, l.delimited_range('"')) +lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) -- Chars. -local char = token(l.STRING, l.delimited_range("'", true)) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('character', token(lexer.STRING, lexer.range("'", true))) --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'if', 'then', 'else', - 'case', 'of', - 'let', 'in', - 'module', 'import', 'as', 'exposing', - 'type', 'alias', - 'port', -}) +-- Comments. +local line_comment = lexer.to_eol('--', true) +local block_comment = lexer.range('{-', '-}', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) --- Identifiers. -local word = (l.alnum + S("._'#"))^0 -local identifier = token(l.IDENTIFIER, (l.alpha + '_') * word) +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. -local op = l.punct - S('()[]{}') -local operator = token(l.OPERATOR, op) - --- Types & type constructors. -local constructor = token(l.TYPE, (l.upper * word) + (P(":") * (op^1 - P(":")))) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', constructor}, - {'identifier', identifier}, - {'string', string}, - {'char', char}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} - -M._FOLDBYINDENTATION = true +lex:add_rule('operator', token(lexer.OPERATOR, op)) -return M +return lex diff --git a/lua/lexers/erlang.lua b/lua/lexers/erlang.lua @@ -1,81 +1,90 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Erlang LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'erlang'} +local lex = lexer.new('erlang') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local comment = token(l.COMMENT, '%' * l.nonnewline^0) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive', + 'try', 'when', + -- Operators. + 'div', 'rem', 'or', 'xor', 'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not', 'bnot', 'badarg', + 'nocookie', 'orelse', 'andalso', 'false', 'true' +})) --- Strings. -local string = token(l.STRING, l.delimited_range('"')) +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'abs', 'alive', 'apply', 'atom_to_list', 'binary_to_list', 'binary_to_term', 'concat_binary', + 'date', 'disconnect_node', 'element', 'erase', 'exit', 'float', 'float_to_list', 'get', + 'get_keys', 'group_leader', 'halt', 'hd', 'integer_to_list', 'is_alive', 'is_record', 'length', + 'link', 'list_to_atom', 'list_to_binary', 'list_to_float', 'list_to_integer', 'list_to_pid', + 'list_to_tuple', 'load_module', 'make_ref', 'monitor_node', 'node', 'nodes', 'now', 'open_port', + 'pid_to_list', 'process_flag', 'process_info', 'process', 'put', 'register', 'registered', + 'round', 'self', 'setelement', 'size', 'spawn', 'spawn_link', 'split_binary', 'statistics', + 'term_to_binary', 'throw', 'time', 'tl', 'trunc', 'tuple_to_list', 'unlink', 'unregister', + 'whereis', + -- Others. + 'any', 'atom', 'binary', 'bitstring', 'byte', 'constant', 'function', 'integer', 'list', 'map', + 'mfa', 'non_neg_integer', 'number', 'pid', 'ports', 'port_close', 'port_info', 'pos_integer', + 'reference', 'record', + -- Erlang. + 'check_process_code', 'delete_module', 'get_cookie', 'hash', 'math', 'module_loaded', 'preloaded', + 'processes', 'purge_module', 'set_cookie', 'set_node', + -- Math. + 'acos', 'asin', 'atan', 'atan2', 'cos', 'cosh', 'exp', 'log', 'log10', 'min', 'max', 'pi', 'pow', + 'power', 'sin', 'sinh', 'sqrt', 'tan', 'tanh' +})) --- Numbers. -local const_char = '$' * (('\\' * l.ascii) + l.any) -local number = token(l.NUMBER, const_char + l.float + l.integer) +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * ('_' + lexer.alnum)^0)) --- Atoms. -local atom_pat = (l.lower * (l.alnum + '_')^0) + l.delimited_range("'") -local atom = token(l.LABEL, atom_pat) +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, P('_')^0 * lexer.upper * ('_' + lexer.alnum)^0)) --- Functions. -local func = token(l.FUNCTION, atom_pat * #l.delimited_range("()", false, false, true)) +-- Directives. +lex:add_rule('directive', token('directive', '-' * word_match{ + 'author', 'behaviour', 'behavior', 'compile', 'copyright', 'define', 'doc', 'else', 'endif', + 'export', 'file', 'ifdef', 'ifndef', 'import', 'include', 'include_lib', 'module', 'record', + 'spec', 'type', 'undef' +})) +lex:add_style('directive', lexer.styles.preprocessor) --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', - 'query', 'receive', 'when' -}) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + '$' * lexer.any * lexer.alnum^0)) --- Identifiers. -local identifier = token(l.IDENTIFIER, ((l.upper + '_') * (l.alnum + '_')^0)) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%'))) --- Operators. -local named_operator = word_match{ - 'div', 'rem', 'or', 'xor', 'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not', - 'bnot' -} -local operator = token(l.OPERATOR, S('-<>.;=/|#+*:,?!()[]{}') + named_operator) +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) --- Directives. -local directive = token('directive', '-' * word_match{ - 'author', 'compile', 'copyright', 'define', 'doc', 'else', 'endif', 'export', - 'file', 'ifdef', 'ifndef', 'import', 'include_lib', 'include', 'module', - 'record', 'undef' -}) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'function', func}, - {'operator', operator}, - {'atom', atom}, - {'identifier', identifier}, - {'directive', directive}, - {'string', string}, - {'comment', comment}, - {'number', number} -} - -M._tokenstyles = { - directive = l.STYLE_PREPROCESSOR -} - -M._foldsymbols = { - _patterns = {'[a-z]+', '[%(%)%[%]{}]', '%%'}, - [l.KEYWORD] = { - case = 1, fun = 1, ['if'] = 1, query = 1, receive = 1, ['end'] = -1 - }, - [l.OPERATOR] = { - ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1 - }, - [l.COMMENT] = {['%'] = l.fold_line_comments('%')} -} - -return M +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('-<>.;=/|+*:,!()[]{}'))) + +-- Preprocessor. +lex:add_rule('preprocessor', token(lexer.TYPE, '?' * lexer.word)) + +-- Records. +lex:add_rule('type', token(lexer.TYPE, '#' * lexer.word)) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'case', 'end') +lex:add_fold_point(lexer.KEYWORD, 'fun', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'query', 'end') +lex:add_fold_point(lexer.KEYWORD, 'receive', 'end') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('%')) + +return lex diff --git a/lua/lexers/fantom.lua b/lua/lexers/fantom.lua @@ -1,32 +1,27 @@ +-- Copyright 2018-2022 Simeon Maryasin (MarSoft). See LICENSE. -- Fantom LPeg lexer. --- Based on Java LPeg lexer by Mitchell mitchell.att.foicica.com and Vim's Fantom syntax. --- By MarSoft. +-- Based on Java LPeg lexer by Mitchell and Vim's Fantom syntax. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'fantom'} +local lex = lexer.new('fantom') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^2) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local doc_comment = '**' * l.nonnewline_esc^0 -local comment = token(l.COMMENT, line_comment + block_comment + doc_comment) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1) +-- Classes. +local type = token(lexer.TYPE, lexer.word) +lex:add_rule('class_sequence', + token(lexer.KEYWORD, 'class') * ws * type * ( -- at most one inheritance spec + ws * token(lexer.OPERATOR, ':') * ws * type * + ( -- at least 0 (i.e. any number) of additional classes + ws^-1 * token(lexer.OPERATOR, ',') * ws^-1 * type)^0)^-1) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ 'using', 'native', -- external 'goto', 'void', 'serializable', 'volatile', -- error 'if', 'else', 'switch', -- conditional @@ -35,7 +30,9 @@ local keyword = token(l.KEYWORD, word_match{ 'null', -- constant 'this', 'super', -- typedef 'new', 'is', 'isnot', 'as', -- operator - 'plus', 'minus', 'mult', 'div', 'mod', 'get', 'set', 'slice', 'lshift', 'rshift', 'and', 'or', 'xor', 'inverse', 'negate', 'increment', 'decrement', 'equals', 'compare', -- long operator + 'plus', 'minus', 'mult', 'div', 'mod', 'get', 'set', 'slice', 'lshift', 'rshift', 'and', 'or', + 'xor', 'inverse', 'negate', -- + 'increment', 'decrement', 'equals', 'compare', -- long operator 'return', -- stmt 'static', 'const', 'final', -- storage class 'virtual', 'override', 'once', -- slot @@ -44,62 +41,44 @@ local keyword = token(l.KEYWORD, word_match{ 'assert', -- assert 'class', 'enum', 'mixin', -- typedef 'break', 'continue', -- branch - 'default', 'case', -- labels - 'public', 'internal', 'protected', 'private', 'abstract', -- scope decl -}) + 'default', 'case', -- labels + 'public', 'internal', 'protected', 'private', 'abstract' -- scope decl +})) -- Types. -local type = token(l.TYPE, word_match{ - 'Void', 'Bool', 'Int', 'Float', 'Decimal', - 'Str', 'Duration', 'Uri', 'Type', 'Range', - 'List', 'Map', 'Obj', - 'Err', 'Env', -}) +lex:add_rule('type', token(lexer.TYPE, word_match( + 'Void Bool Int Float Decimal Str Duration Uri Type Range List Map Obj Err Env'))) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Functions. +-- lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('(')) --- Operators. -local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#')) +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Annotations. -local facet = token('facet', '@' * l.word) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local bq_str = lexer.range('`', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str)) --- Functions. -local func = token(l.FUNCTION, l.word) * #P('(') +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) --- Classes. -local class_sequence = token(l.KEYWORD, P('class')) * ws^1 * - token(l.TYPE, l.word) * ( -- at most one inheritance spec - ws^1 * token(l.OPERATOR, P(':')) * ws^1 * - token(l.TYPE, l.word) * - ( -- at least 0 (i.e. any number) of additional classes - ws^0 * token(l.OPERATOR, P(',')) * ws^0 * token(l.TYPE, l.word) - )^0 - )^-1 +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1)) -M._rules = { - {'whitespace', ws}, - {'class', class_sequence}, - {'keyword', keyword}, - {'type', type}, - {'function', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'facet', facet}, - {'operator', operator}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#'))) -M._tokenstyles = { - facet = l.STYLE_PREPROCESSOR -} +-- Annotations. +lex:add_rule('facet', token('facet', '@' * lexer.word)) +lex:add_style('facet', lexer.styles.preprocessor) -M._foldsymbols = { - _patterns = {'[{}]', '/%*', '%*/', '//'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/faust.lua b/lua/lexers/faust.lua @@ -1,58 +1,44 @@ --- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE. +-- Copyright 2015-2022 David B. Lamkins <david@lamkins.net>. See LICENSE. -- Faust LPeg lexer, see http://faust.grame.fr/ -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'faust'} +local lex = lexer.new('faust') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local line_comment = '//' * l.nonnewline^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'declare', 'import', 'mdoctags', 'dependencies', 'distributed', 'inputs', 'outputs', 'par', 'seq', + 'sum', 'prod', 'xor', 'with', 'environment', 'library', 'component', 'ffunction', 'fvariable', + 'fconstant', 'int', 'float', 'case', 'waveform', 'h:', 'v:', 't:' +})) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local string = token(l.STRING, l.delimited_range('"', true)) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) + +-- Comments. +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -local int = R('09')^1 +local int = lexer.digit^1 local rad = P('.') local exp = (P('e') * S('+-')^-1 * int)^-1 local flt = int * (rad * int)^-1 * exp + int^-1 * rad * int * exp -local number = token(l.NUMBER, flt + int) +lex:add_rule('number', token(lexer.NUMBER, flt + int)) --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'declare', 'import', 'mdoctags', 'dependencies', 'distributed', 'inputs', - 'outputs', 'par', 'seq', 'sum', 'prod', 'xor', 'with', 'environment', - 'library', 'component', 'ffunction', 'fvariable', 'fconstant', 'int', 'float', - 'case', 'waveform', 'h:', 'v:', 't:' -}) - --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Pragmas. +lex:add_rule('pragma', token(lexer.PREPROCESSOR, lexer.range('<mdoc>', '</mdoc>'))) -- Operators. -local punct = S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'') -local operator = token(l.OPERATOR, punct) +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\''))) --- Pragmas. -local mdoc = P('<mdoc>') * (l.any - P('</mdoc>'))^0 * P('</mdoc>') -local pragma = token(l.PREPROCESSOR, mdoc) - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'pragma', pragma}, - {'keyword', keyword}, - {'number', number}, - {'operator', operator}, - {'identifier', identifier}, - {'string', string}, -} - -return M +return lex diff --git a/lua/lexers/fennel.lua b/lua/lexers/fennel.lua @@ -1,88 +1,43 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. --- Lua LPeg lexer. --- Original written by Peter Odding, 2007/04/04. +-- Copyright 2006-2022 Mitchell. See LICENSE. +-- Fennel LPeg lexer. +-- Contributed by Momohime Honda. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'fennel'} +local lex = lexer.new('fennel', {inherit = lexer.load('lua')}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = ';' * l.nonnewline^0 -local comment = token(l.COMMENT, line_comment) - --- Strings. -local dq_str = l.delimited_range('"') -local string = token(l.STRING, dq_str) - --- Numbers. -local lua_integer = P('-')^-1 * (l.hex_num + l.dec_num) -local number = token(l.NUMBER, l.float + lua_integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - '%', '*', '+', '-', '->', '->>', '-?>', '-?>>', '.', '..', '/', '//', ':', '<', '<=', '=', '>', '>=', '^', '~=', 'λ', - 'and', 'comment', 'do', 'doc', 'doto', 'each', 'eval-compiler', 'fn', 'for', 'global', 'hashfn', 'if', 'include', 'lambda', - 'length', 'let', 'local', 'lua', 'macro', 'macros', 'match', 'not', 'not=', 'or', 'partial', 'quote', 'require-macros', - 'set', 'set-forcibly!', 'tset', 'values', 'var', 'when', 'while' -}, "%*+-./:<=>?~^λ!")) - --- Libraries. -local library = token('library', word_match({ - -- Coroutine. - 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running', - 'coroutine.status', 'coroutine.wrap', 'coroutine.yield', - -- Module. - 'package', 'package.cpath', 'package.loaded', 'package.loadlib', - 'package.path', 'package.preload', - -- String. - 'string', 'string.byte', 'string.char', 'string.dump', 'string.find', - 'string.format', 'string.gmatch', 'string.gsub', 'string.len', 'string.lower', - 'string.match', 'string.rep', 'string.reverse', 'string.sub', 'string.upper', - -- Table. - 'table', 'table.concat', 'table.insert', 'table.remove', 'table.sort', - -- Math. - 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil', - 'math.cos', 'math.deg', 'math.exp', 'math.floor', 'math.fmod', 'math.huge', - 'math.log', 'math.max', 'math.min', 'math.modf', 'math.pi', 'math.rad', - 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt', 'math.tan', - -- IO. - 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output', - 'io.popen', 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile', - 'io.type', 'io.write', - -- OS. - 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit', - 'os.getenv', 'os.remove', 'os.rename', 'os.setlocale', 'os.time', - 'os.tmpname', - -- Debug. - 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal', - 'debug.getmetatable', 'debug.getregistry', 'debug.getupvalue', - 'debug.sethook', 'debug.setlocal', 'debug.setmetatable', 'debug.setupvalue', - 'debug.traceback', -}, '.')) - -local initial = l.alpha + S"|$%&#*+-./:<=>?~^_λ!" -local subsequent = initial + l.digit +lex:modify_rule('keyword', token(lexer.KEYWORD, word_match{ + '#', '%', '*', '+', '-', '->>', '->', '-?>>', '-?>', '..', '.', '//', '/', ':', '<=', '<', '=', + '>=', '>', '?.', '^', '~=', 'λ', 'accumulate', 'and', 'band', 'bnot', 'bor', 'bxor', 'collect', + 'comment', 'do', 'doto', 'each', 'eval-compiler', 'fn', 'for', 'global', 'hashfn', 'icollect', + 'if', 'import-macros', 'include', 'lambda', 'length', 'let', 'local', 'lshift', 'lua', 'macro', + 'macrodebug', 'macros', 'match', 'not', 'not=', 'or', 'partial', 'pick-args', 'pick-values', + 'quote', 'require-macros', 'rshift', 'set', 'set-forcibly!', 'tset', 'values', 'var', 'when', + 'while', 'with-open' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, initial * subsequent^0) +local initial = lexer.alpha + S('|$%&#*+-/<=>?~^_λ!') +local subsequent = initial + lexer.digit +lex:modify_rule('identifier', token(lexer.IDENTIFIER, initial * subsequent^0 * P('#')^-1)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'library', library}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number} -} +-- Strings. +local dq_str = lexer.range('"') +local kw_str = lpeg.B(1 - subsequent) * ':' * subsequent^1 +lex:modify_rule('string', token(lexer.STRING, dq_str + kw_str)) + +-- Comments. +lex:modify_rule('comment', token(lexer.COMMENT, lexer.to_eol(';'))) -M._tokenstyles = { - library = l.STYLE_TYPE, -} +-- Ignore these rules. +lex:modify_rule('longstring', P(false)) +lex:modify_rule('label', P(false)) +lex:modify_rule('operator', P(false)) -return M +return lex diff --git a/lua/lexers/fish.lua b/lua/lexers/fish.lua @@ -1,76 +1,57 @@ --- Copyright 2015-2017 Jason Schindler. See LICENSE. +-- Copyright 2015-2022 Jason Schindler. See LICENSE. -- Fish (http://fishshell.com/) script LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'fish'} +local lex = lexer.new('fish') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- shebang -local shebang = token('shebang', '#!/' * l.nonnewline^0) - --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) - --- Strings. -local sq_str = l.delimited_range("'", false, true) -local dq_str = l.delimited_range('"') - -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'alias', 'and', 'begin', 'bg', 'bind', 'block', 'break', 'breakpoint', - 'builtin', 'case', 'cd', 'command', 'commandline', 'complete', 'contains', - 'continue', 'count', 'dirh', 'dirs', 'echo', 'else', 'emit', 'end', 'eval', - 'exec', 'exit', 'fg', 'fish', 'fish_config', 'fish_indent', 'fish_pager', - 'fish_prompt', 'fish_right_prompt', 'fish_update_completions', 'fishd', 'for', - 'funced', 'funcsave', 'function', 'functions', 'help', 'history', 'if', 'in', - 'isatty', 'jobs', 'math', 'mimedb', 'nextd', 'not', 'open', 'or', 'popd', - 'prevd', 'psub', 'pushd', 'pwd', 'random', 'read', 'return', 'set', - 'set_color', 'source', 'status', 'switch', 'test', 'trap', 'type', 'ulimit', - 'umask', 'vared', 'while' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'alias', 'and', 'begin', 'bg', 'bind', 'block', 'break', 'breakpoint', 'builtin', 'case', 'cd', + 'command', 'commandline', 'complete', 'contains', 'continue', 'count', 'dirh', 'dirs', 'echo', + 'else', 'emit', 'end', 'eval', 'exec', 'exit', 'fg', 'fish', 'fish_config', 'fishd', + 'fish_indent', 'fish_pager', 'fish_prompt', 'fish_right_prompt', 'fish_update_completions', 'for', + 'funced', 'funcsave', 'function', 'functions', 'help', 'history', 'if', 'in', 'isatty', 'jobs', + 'math', 'mimedb', 'nextd', 'not', 'open', 'or', 'popd', 'prevd', 'psub', 'pushd', 'pwd', 'random', + 'read', 'return', 'set', 'set_color', 'source', 'status', 'switch', 'test', 'trap', 'type', + 'ulimit', 'umask', 'vared', 'while' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Variables. -local variable = token(l.VARIABLE, - '$' * l.word + '$' * l.delimited_range('{}', true, true)) +lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.word + lexer.range('{', '}', true)))) --- Operators. -local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')) +-- Strings. +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -M._rules = { - {'whitespace', ws}, - {'shebang', shebang}, - {'keyword', keyword}, - {'identifier', identifier}, - {'variable', variable}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Shebang. +lex:add_rule('shebang', token('shebang', lexer.to_eol('#!/'))) +lex:add_style('shebang', lexer.styles.label) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -M._tokenstyles = { - shebang = l.STYLE_LABEL -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))) -M._foldsymbols = { - _patterns = {'%l+'}, - [l.KEYWORD] = { - begin = 1, ['for'] = 1, ['function'] = 1, ['if'] = 1, switch = 1, - ['while'] = 1, ['end'] = -1 - } -} +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') +lex:add_fold_point(lexer.KEYWORD, 'for', 'end') +lex:add_fold_point(lexer.KEYWORD, 'function', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'switch', 'end') +lex:add_fold_point(lexer.KEYWORD, 'while', 'end') -return M +return lex diff --git a/lua/lexers/forth.lua b/lua/lexers/forth.lua @@ -1,71 +1,56 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Forth LPeg lexer. +-- Contributions from Joseph Eib. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'forth'} +local lex = lexer.new('forth') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = S('|\\') * l.nonnewline^0 -local block_comment = '(' * (l.any - ')')^0 * P(')')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Strings. -local c_str = 'c' * l.delimited_range('"', true, true) -local s_str = 's' * l.delimited_range('"', true, true) -local s_bs_str = 's\\' * l.delimited_range('"', true, false) -local dot_str = '.' * l.delimited_range('"', true, true) -local dot_paren_str = '.' * l.delimited_range('()', true, true, false) -local abort_str = 'abort' * l.delimited_range('"', true, true) -local string = token( - l.STRING, - c_str + s_str + s_bs_str + dot_str + dot_paren_str + abort_str -) - --- Numbers. -local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1) +local c_str = 'c' * lexer.range('"', true, false) +local s_str = 's' * lexer.range('"', true, false) +local s_bs_str = 's\\' * lexer.range('"', true) +local dot_str = '.' * lexer.range('"', true, false) +local dot_paren_str = '.' * lexer.range('(', ')', true) +local abort_str = 'abort' * lexer.range('"', true, false) +lex:add_rule('string', + token(lexer.STRING, c_str + s_str + s_bs_str + dot_str + dot_paren_str + abort_str)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - '#>', '#s', '*/', '*/mod', '+loop', ',', '.', '.r', '/mod', '0<', '0<>', - '0>', '0=', '1+', '1-', '2!', '2*', '2/', '2>r', '2@', '2drop', '2dup', - '2over', '2r>', '2r@', '2swap', ':noname', '<#', '<>', '>body', '>in', - '>number', '>r', '?do','?dup', '@', 'abort', 'abs', 'accept', 'action-of', - 'again', 'align', 'aligned', 'allot', 'and', 'base', 'begin', 'bl', - 'buffer:', 'c!', 'c,', 'c@', 'case', 'cell+', 'cells', 'char', 'char+', - 'chars', 'compile,', 'constant', 'count', 'cr', 'create', 'decimal', 'defer', - 'defer!', 'defer@', 'depth', 'do', 'does>', 'drop', 'dup', 'else', 'emit', - 'endcase', 'endof', 'environment?', 'erase', 'evaluate', 'execute', 'exit', - 'false', 'fill', 'find', 'fm/mod', 'here', 'hex', 'hold', 'holds', 'i', 'if', - 'immediate', 'invert', 'is', 'j', 'key', 'leave', 'literal', 'loop', - 'lshift', 'm*', 'marker', 'max', 'min', 'mod', 'move', 'negate', 'nip', 'of', - 'or', 'over', 'pad', 'parse', 'parse-name', 'pick', 'postpone', 'quit', 'r>', - 'r@', 'recurse', 'refill', 'restore-input', 'roll', 'rot', 'rshift', 's>d', - 'save-input', 'sign', 'sm/rem', 'source', 'source-id', 'space', 'spaces', - 'state', 'swap', 'to', 'then', 'true', 'tuck', 'type', 'u.', 'u.r', 'u>', - 'u<', 'um*', 'um/mod', 'unloop', 'until', 'unused', 'value', 'variable', - 'while', 'within', 'word', 'xor', '[\']', '[char]', '[compile]' -}, '><-@!?+,=[].\'', true)) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match({ + '#>', '#s', '*/', '*/mod', '+loop', ',', '.', '.r', '/mod', '0<', '0<>', '0>', '0=', '1+', '1-', + '2!', '2*', '2/', '2>r', '2@', '2drop', '2dup', '2over', '2r>', '2r@', '2swap', ':noname', '<#', + '<>', '>body', '>in', '>number', '>r', '?do', '?dup', '@', 'abort', 'abs', 'accept', 'action-of', + 'again', 'align', 'aligned', 'allot', 'and', 'base', 'begin', 'bl', 'buffer:', 'c!', 'c,', 'c@', + 'case', 'cell+', 'cells', 'char', 'char+', 'chars', 'compile,', 'constant,', 'count', 'cr', + 'create', 'decimal', 'defer', 'defer!', 'defer@', 'depth', 'do', 'does>', 'drop', 'dup', 'else', + 'emit', 'endcase', 'endof', 'environment?', 'erase', 'evaluate', 'execute', 'exit', 'false', + 'fill', 'find', 'fm/mod', 'here', 'hex', 'hold', 'holds', 'i', 'if', 'immediate', 'invert', 'is', + 'j', 'key', 'leave', 'literal', 'loop', 'lshift', 'm*', 'marker', 'max', 'min', 'mod', 'move', + 'negate', 'nip', 'of', 'or', 'over', 'pad', 'parse', 'parse-name', 'pick', 'postpone', 'quit', + 'r>', 'r@', 'recurse', 'refill', 'restore-input', 'roll', 'rot', 'rshift', 's>d', 'save-input', + 'sign', 'sm/rem', 'source', 'source-id', 'space', 'spaces', 'state', 'swap', 'to', 'then', 'true', + 'tuck', 'type', 'u.', 'u.r', 'u>', 'u<', 'um*', 'um/mod', 'unloop', 'until', 'unused', 'value', + 'variable', 'while', 'within', 'word', 'xor', "[']", '[char]', '[compile]' +}, true))) -- Identifiers. -local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$#'))^1) +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alnum + S('+-*=<>.?/\'%,_$#'))^1)) --- Operators. -local operator = token(l.OPERATOR, S(':;<>+*-/[]#')) +-- Comments. +local line_comment = lexer.to_eol(S('|\\')) +local block_comment = lexer.range('(', ')') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * (S('./') * lexer.digit^1)^-1)) -M._rules = { - {'whitespace', ws}, - {'string', string}, - {'keyword', keyword}, - {'identifier', identifier}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':;<>+*-/[]#'))) -return M +return lex diff --git a/lua/lexers/fortran.lua b/lua/lexers/fortran.lua @@ -1,91 +1,85 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Fortran LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'fortran'} +local lex = lexer.new('fortran') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local c_comment = l.starts_line(S('Cc')) * l.nonnewline^0 -local d_comment = l.starts_line(S('Dd')) * l.nonnewline^0 -local ex_comment = l.starts_line('!') * l.nonnewline^0 -local ast_comment = l.starts_line('*') * l.nonnewline^0 -local line_comment = '!' * l.nonnewline^0 -local comment = token(l.COMMENT, c_comment + d_comment + ex_comment + - ast_comment + line_comment) - --- Strings. -local sq_str = l.delimited_range("'", true, true) -local dq_str = l.delimited_range('"', true, true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, (l.float + l.integer) * -l.alpha) +local line_comment = lexer.to_eol(lexer.starts_line(S('CcDd!*')) + '!') +lex:add_rule('comment', token(lexer.COMMENT, line_comment)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'include', 'program', 'module', 'subroutine', 'function', 'contains', 'use', - 'call', 'return', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match({ + 'include', 'interface', 'program', 'module', 'subroutine', 'function', 'contains', 'use', 'call', + 'return', -- Statements. - 'case', 'select', 'default', 'continue', 'cycle', 'do', 'while', 'else', 'if', - 'elseif', 'then', 'elsewhere', 'end', 'endif', 'enddo', 'forall', 'where', - 'exit', 'goto', 'pause', 'stop', + 'case', 'select', 'default', 'continue', 'cycle', 'do', 'while', 'else', 'if', 'elseif', 'then', + 'elsewhere', 'end', 'endif', 'enddo', 'equivalence', 'external', 'forall', 'where', 'exit', + 'goto', 'pause', 'save', 'stop', -- Operators. - '.not.', '.and.', '.or.', '.xor.', '.eqv.', '.neqv.', '.eq.', '.ne.', '.gt.', - '.ge.', '.lt.', '.le.', + '.not.', '.and.', '.or.', '.xor.', '.eqv.', '.neqv.', '.eq.', '.ne.', '.gt.', '.ge.', '.lt.', + '.le.', -- Logical. - '.false.', '.true.' -}, '.', true)) + '.false.', '.true.', + -- Attributes and other keywords. + 'access', 'action', 'advance', 'assignment', 'block', 'entry', 'in', 'inout', 'intent', 'only', + 'out', 'optional', 'pointer', 'precision', 'procedure', 'recursive', 'result', 'sequence', 'size', + 'stat', 'target', 'type' +}, true))) -- Functions. -local func = token(l.FUNCTION, word_match({ +lex:add_rule('function', token(lexer.FUNCTION, word_match({ -- I/O. - 'backspace', 'close', 'endfile', 'inquire', 'open', 'print', 'read', 'rewind', - 'write', 'format', - -- Type conversion, utility, and math. - 'aimag', 'aint', 'amax0', 'amin0', 'anint', 'ceiling', 'cmplx', 'conjg', - 'dble', 'dcmplx', 'dfloat', 'dim', 'dprod', 'float', 'floor', 'ifix', 'imag', - 'int', 'logical', 'modulo', 'nint', 'real', 'sign', 'sngl', 'transfer', - 'zext', 'abs', 'acos', 'aimag', 'aint', 'alog', 'alog10', 'amax0', 'amax1', - 'amin0', 'amin1', 'amod', 'anint', 'asin', 'atan', 'atan2', 'cabs', 'ccos', - 'char', 'clog', 'cmplx', 'conjg', 'cos', 'cosh', 'csin', 'csqrt', 'dabs', - 'dacos', 'dasin', 'datan', 'datan2', 'dble', 'dcos', 'dcosh', 'ddim', 'dexp', - 'dim', 'dint', 'dlog', 'dlog10', 'dmax1', 'dmin1', 'dmod', 'dnint', 'dprod', - 'dreal', 'dsign', 'dsin', 'dsinh', 'dsqrt', 'dtan', 'dtanh', 'exp', 'float', - 'iabs', 'ichar', 'idim', 'idint', 'idnint', 'ifix', 'index', 'int', 'isign', - 'len', 'lge', 'lgt', 'lle', 'llt', 'log', 'log10', 'max', 'max0', 'max1', - 'min', 'min0', 'min1', 'mod', 'nint', 'real', 'sign', 'sin', 'sinh', 'sngl', - 'sqrt', 'tan', 'tanh' -}, nil, true)) + 'backspace', 'close', 'endfile', 'inquire', 'open', 'print', 'read', 'rewind', 'write', 'format', + -- Type conversion utility and math. + 'aimag', 'aint', 'amax0', 'amin0', 'anint', 'ceiling', 'cmplx', 'conjg', 'dble', 'dcmplx', + 'dfloat', 'dim', 'dprod', 'float', 'floor', 'ifix', 'imag', 'int', 'logical', 'modulo', 'nint', + 'real', 'sign', 'sngl', 'transfer', 'zext', 'abs', 'acos', 'aimag', 'aint', 'alog', 'alog10', + 'amax0', 'amax1', 'amin0', 'amin1', 'amod', 'anint', 'asin', 'atan', 'atan2', 'cabs', 'ccos', + 'char', 'clog', 'cmplx', 'conjg', 'cos', 'cosh', 'csin', 'csqrt', 'dabs', 'dacos', 'dasin', + 'datan', 'datan2', 'dble', 'dcos', 'dcosh', 'ddim', 'dexp', 'dim', 'dint', 'dlog', 'dlog10', + 'dmax1', 'dmin1', 'dmod', 'dnint', 'dprod', 'dreal', 'dsign', 'dsin', 'dsinh', 'dsqrt', 'dtan', + 'dtanh', 'exp', 'float', 'iabs', 'ichar', 'idim', 'idint', 'idnint', 'ifix', 'index', 'int', + 'isign', 'len', 'lge', 'lgt', 'lle', 'llt', 'log', 'log10', 'max', 'max0', 'max1', 'min', 'min0', + 'min1', 'mod', 'nint', 'real', 'sign', 'sin', 'sinh', 'sngl', 'sqrt', 'tan', 'tanh', + -- Matrix math. + 'matmul', 'transpose', 'reshape', + -- Other frequently used built-in statements. + 'assign', 'nullify', + -- ISO C binding from Fortran 2003. + 'c_sizeof', 'c_f_pointer', 'c_associated' +}, true))) -- Types. -local type = token(l.TYPE, word_match({ - 'implicit', 'explicit', 'none', 'data', 'parameter', 'allocate', - 'allocatable', 'allocated', 'deallocate', 'integer', 'real', 'double', - 'precision', 'complex', 'logical', 'character', 'dimension', 'kind', -}, nil, true)) +lex:add_rule('type', token(lexer.TYPE, word_match({ + 'implicit', 'explicit', 'none', 'data', 'parameter', 'allocate', 'allocatable', 'allocated', + 'deallocate', 'integer', 'real', 'double', 'precision', 'complex', 'logical', 'character', + 'dimension', 'kind', + -- ISO C binding from Fortran 2003 + 'bind', 'c_int', 'c_short', 'c_long', 'c_long_long', 'c_signed_char', 'c_size_t', 'c_int8_t', + 'c_int16_t', 'c_int32_t', 'c_int64_t', 'c_int128_t', 'c_intptr_t', 'c_float', 'c_double', + 'c_long_double', 'c_float128', 'c_float_complex', 'c_double_complex', 'c_long_double_complex', + 'c_float128_complex', 'c_bool', 'c_char', 'c_null_char', 'c_new_line', 'c_null_ptr', 'c_funptr' +}, true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number * -lexer.alpha)) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.alnum^1) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alnum^1)) --- Operators. -local operator = token(l.OPERATOR, S('<>=&+-/*,()')) +-- Strings. +local sq_str = lexer.range("'", true, false) +local dq_str = lexer.range('"', true, false) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'keyword', keyword}, - {'function', func}, - {'type', type}, - {'number', number}, - {'identifier', identifier}, - {'string', string}, - {'operator', operator}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('<>=&+-/*,()'))) -return M +return lex diff --git a/lua/lexers/fsharp.lua b/lua/lexers/fsharp.lua @@ -1,76 +1,57 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- F# LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'fsharp'} +local lex = lexer.new('fsharp', {fold_by_indentation = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = P('//') * l.nonnewline^0 -local block_comment = l.nested_pair('(*', '*)') -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, (l.float + l.integer * S('uUlL')^-1)) - --- Preprocessor. -local preproc_word = word_match{ - 'ifndef', 'ifdef', 'if', 'else', 'endif', 'light', 'region', 'endregion' -} -local preproc = token(l.PREPROCESSOR, - l.starts_line('#') * S('\t ')^0 * preproc_word * - (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'abstract', 'and', 'as', 'assert', 'asr', 'begin', 'class', 'default', - 'delegate', 'do', 'done', 'downcast', 'downto', 'else', 'end', 'enum', - 'exception', 'false', 'finaly', 'for', 'fun', 'function', 'if', 'in', - 'iherit', 'interface', 'land', 'lazy', 'let', 'lor', 'lsl', 'lsr', 'lxor', - 'match', 'member', 'mod', 'module', 'mutable', 'namespace', 'new', 'null', - 'of', 'open', 'or', 'override', 'sig', 'static', 'struct', 'then', 'to', - 'true', 'try', 'type', 'val', 'when', 'inline', 'upcast', 'while', 'with', - 'async', 'atomic', 'break', 'checked', 'component', 'const', 'constructor', - 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include', - 'method', 'mixin', 'process', 'property', 'protected', 'public', 'pure', - 'readonly', 'return', 'sealed', 'switch', 'virtual', 'void', 'volatile', - 'where', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'abstract', 'and', 'as', 'assert', 'asr', 'begin', 'class', 'default', 'delegate', 'do', 'done', + 'downcast', 'downto', 'else', 'end', 'enum', 'exception', 'false', 'finaly', 'for', 'fun', + 'function', 'if', 'in', 'iherit', 'interface', 'land', 'lazy', 'let', 'lor', 'lsl', 'lsr', 'lxor', + 'match', 'member', 'mod', 'module', 'mutable', 'namespace', 'new', 'null', 'of', 'open', 'or', + 'override', 'sig', 'static', 'struct', 'then', 'to', 'true', 'try', 'type', 'val', 'when', + 'inline', 'upcast', 'while', 'with', 'async', 'atomic', 'break', 'checked', 'component', 'const', + 'constructor', 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include', 'method', + 'mixin', 'process', 'property', 'protected', 'public', 'pure', 'readonly', 'return', 'sealed', + 'switch', 'virtual', 'void', 'volatile', 'where', -- Booleans. 'true', 'false' -}) +})) -- Types. -local type = token(l.TYPE, word_match{ - 'bool', 'byte', 'sbyte', 'int16', 'uint16', 'int', 'uint32', 'int64', - 'uint64', 'nativeint', 'unativeint', 'char', 'string', 'decimal', 'unit', - 'void', 'float32', 'single', 'float', 'double' -}) +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'byte', 'sbyte', 'int16', 'uint16', 'int', 'uint32', 'int64', 'uint64', 'nativeint', + 'unativeint', 'char', 'string', 'decimal', 'unit', 'void', 'float32', 'single', 'float', 'double' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('=<>+-*/^.,:;~!@#%^&|?[](){}')) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('(*', '*)', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer * S('uUlL')^-1)) + +-- Preprocessor. +lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * S('\t ')^0 * + word_match('else endif endregion if ifdef ifndef light region'))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/^.,:;~!@#%^&|?[](){}'))) -return M +return lex diff --git a/lua/lexers/fstab.lua b/lua/lexers/fstab.lua @@ -1,569 +1,126 @@ --- Copyright 2016 Christian Hesse +-- Copyright 2016-2022 Christian Hesse. See LICENSE. -- fstab LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'fstab'} +local lex = lexer.new('fstab', {lex_by_line = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, l.starts_line('#') * l.nonnewline^0) - --- Numbers. -local dec = l.digit^1 * ('_' * l.digit^1)^0 -local oct_num = '0' * S('01234567_')^1 -local integer = S('+-')^-1 * (l.hex_num + oct_num + dec) -local number = token(l.NUMBER, (l.float + integer)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - -- basic filesystem-independent mount options - 'async', - 'atime', - 'auto', - 'comment', - 'context', - 'defaults', - 'defcontext', - 'dev', - 'dirsync', - 'exec', - 'fscontext', - 'group', - 'iversion', - 'lazytime', - 'loud', - 'mand', - '_netdev', - 'noatime', - 'noauto', - 'nodev', - 'nodiratime', - 'noexec', - 'nofail', - 'noiversion', - 'nolazytime', - 'nomand', - 'norelatime', - 'nostrictatime', - 'nosuid', - 'nouser', - 'owner', - 'relatime', - 'remount', - 'ro', - 'rootcontext', - 'rw', - 'silent', - 'strictatime', - 'suid', - 'sync', - 'user', - 'users', - - -- mount options for systemd, see systemd.mount(5) - 'x-systemd.automount', - 'x-systemd.device-timeout', - 'x-systemd.idle-timeout', - 'x-systemd.mount-timeout', - 'x-systemd.requires', - 'x-systemd.requires-mounts-for', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + -- Basic filesystem-independent mount options. + 'async', 'atime', 'auto', 'comment', 'context', 'defaults', 'defcontext', 'dev', 'dirsync', + 'exec', 'fscontext', 'group', 'iversion', 'lazytime', 'loud', 'mand', '_netdev', 'noatime', + 'noauto', 'nodev', 'nodiratime', 'noexec', 'nofail', 'noiversion', 'nolazytime', 'nomand', + 'norelatime', 'nostrictatime', 'nosuid', 'nouser', 'owner', 'relatime', 'remount', 'ro', + 'rootcontext', 'rw', 'silent', 'strictatime', 'suid', 'sync', 'user', 'users', + -- Mount options for systemd see systemd.mount(5). + 'x-systemd.automount', 'x-systemd.device-timeout', 'x-systemd.idle-timeout', + 'x-systemd.mount-timeout', 'x-systemd.requires', 'x-systemd.requires-mounts-for', 'x-initrd.mount', - - -- mount options for adfs - 'uid', - 'gid', - 'ownmask', - 'othmask', - - -- mount options for affs - 'uid', - 'gid', - 'setuid', - 'setgid', - 'mode', - 'protect', - 'usemp', - 'verbose', - 'prefix', - 'volume', - 'reserved', - 'root', - 'bs', - 'grpquota', - 'noquota', - 'quota', - 'usrquota', - - -- mount options for btrfs - 'alloc_start', - 'autodefrag', - 'check_int', - 'check_int_data', - 'check_int_print_mask', - 'commit', - 'compress', - 'zlib', - 'lzo', - 'no', - 'compress-force', - 'degraded', - 'device', - 'discard', - 'enospc_debug', - 'fatal_errors', - 'bug', - 'panic', - 'flushoncommit', - 'inode_cache', - 'max_inline', - 'metadata_ratio', - 'noacl', - 'nobarrier', - 'nodatacow', - 'nodatasum', - 'notreelog', - 'recovery', - 'rescan_uuid_tree', - 'skip_balance', - 'nospace_cache', - 'clear_cache', - 'ssd', - 'nossd', - 'ssd_spread', - 'subvol', - 'subvolid', - 'subvolrootid', - 'thread_pool', - 'user_subvol_rm_allowed', - - -- mount options for devpts - 'uid', - 'gid', - 'mode', - 'newinstance', - 'ptmxmode', - - -- mount options for ext2 - 'acl', - 'noacl', - 'bsddf', - 'minixdf', - 'check', - 'nocheck', - 'debug', - 'errors', - 'continue', - 'remount-ro', - 'panic', - 'grpid', - 'bsdgroups', - 'nogrpid', - 'sysvgroups', - 'grpquota', - 'noquota', - 'quota', - 'usrquota', - 'nouid32', - 'oldalloc', - 'orlov', - 'resgid', - 'resuid', - 'sb', - 'user_xattr', + -- Mount options for adfs. + 'uid', 'gid', 'ownmask', 'othmask', + -- Mount options for affs. + 'uid', 'gid', 'setuid', 'setgid', 'mode', 'protect', 'usemp', 'verbose', 'prefix', 'volume', + 'reserved', 'root', 'bs', 'grpquota', 'noquota', 'quota', 'usrquota', + -- Mount options for btrfs. + 'alloc_start', 'autodefrag', 'check_int', 'check_int_data', 'check_int_print_mask', 'commit', + 'compress', 'zlib', 'lzo', 'no', 'compress-force', 'degraded', 'device', 'discard', + 'enospc_debug', 'fatal_errors', 'bug', 'panic', 'flushoncommit', 'inode_cache', 'max_inline', + 'metadata_ratio', 'noacl', 'nobarrier', 'nodatacow', 'nodatasum', 'notreelog', 'recovery', + 'rescan_uuid_tree', 'skip_balance', 'nospace_cache', 'clear_cache', 'ssd', 'nossd', 'ssd_spread', + 'subvol', 'subvolid', 'subvolrootid', 'thread_pool', 'user_subvol_rm_allowed', + -- Mount options for devpts. + 'uid', 'gid', 'mode', 'newinstance', 'ptmxmode', + -- Mount options for ext2. + 'acl', 'noacl', 'bsddf', 'minixdf', 'check', 'nocheck', 'debug', 'errors', 'continue', + 'remount-ro', 'panic', 'grpid', 'bsdgroups', 'nogrpid', 'sysvgroups', 'grpquota', 'noquota', + 'quota', 'usrquota', 'nouid32', 'oldalloc', 'orlov', 'resgid', 'resuid', 'sb', 'user_xattr', 'nouser_xattr', - - -- mount options for ext3 - 'journal', - 'update', - 'journal_dev', - 'journal_path', - 'norecoverynoload', - 'data', - 'journal', - 'ordered', - 'writeback', - 'data_err', - 'ignore', - 'abort', - 'barrier', - 'commit', - 'user_xattr', - 'acl', - 'usrjquota', - 'grpjquota', - 'jqfmt', - - -- mount options for ext4 - 'journal_checksum', - 'journal_async_commit', - 'barrier', - 'nobarrier', - 'inode_readahead_blks', - 'stripe', - 'delalloc', - 'nodelalloc', - 'max_batch_time', - 'min_batch_time', - 'journal_ioprio', - 'abort', - 'auto_da_alloc', - 'noauto_da_alloc', - 'noinit_itable', - 'init_itable', - 'discard', - 'nodiscard', - 'nouid32', - 'block_validity', - 'noblock_validity', - 'dioread_lock', - 'dioread_nolock', - 'max_dir_size_kb', - 'i_version', - - -- mount options for fat (common part of msdos, umsdos and vfat) - 'blocksize', - 'uid', - 'gid', - 'umask', - 'dmask', - 'fmask', - 'allow_utime', - 'check', - 'relaxed', - 'normal', - 'strict', - 'codepage', - 'conv', - 'binary', - 'text', - 'auto', - 'cvf_format', - 'cvf_option', - 'debug', - 'discard', - 'dos1xfloppy', - 'errors', - 'panic', - 'continue', - 'remount-ro', - 'fat', - 'iocharset', - 'nfs', - 'stale_rw', - 'nostale_ro', - 'tz', - 'time_offset', - 'quiet', - 'rodir', - 'showexec', - 'sys_immutable', + -- Mount options for ext3. + 'journal', 'update', 'journal_dev', 'journal_path', 'norecoverynoload', 'data', 'journal', + 'ordered', 'writeback', 'data_err', 'ignore', 'abort', 'barrier', 'commit', 'user_xattr', 'acl', + 'usrjquota', 'grpjquota', 'jqfmt', + -- Mount options for ext4. + 'journal_checksum', 'journal_async_commit', 'barrier', 'nobarrier', 'inode_readahead_blks', + 'stripe', 'delalloc', 'nodelalloc', 'max_batch_time', 'min_batch_time', 'journal_ioprio', 'abort', + 'auto_da_alloc', 'noauto_da_alloc', 'noinit_itable', 'init_itable', 'discard', 'nodiscard', + 'nouid32', 'block_validity', 'noblock_validity', 'dioread_lock', 'dioread_nolock', + 'max_dir_size_kb', 'i_version', + -- Mount options for fat (common part of msdos umsdos and vfat). + 'blocksize', 'uid', 'gid', 'umask', 'dmask', 'fmask', 'allow_utime', 'check', 'relaxed', 'normal', + 'strict', 'codepage', 'conv', 'binary', 'text', 'auto', 'cvf_format', 'cvf_option', 'debug', + 'discard', 'dos1xfloppy', 'errors', 'panic', 'continue', 'remount-ro', 'fat', 'iocharset', 'nfs', + 'stale_rw', 'nostale_ro', 'tz', 'time_offset', 'quiet', 'rodir', 'showexec', 'sys_immutable', + 'flush', 'usefree', 'dots', 'nodots', 'dotsOK', + -- Mount options for hfs. + 'creator', 'type', 'uid', 'gid', 'dir_umask', 'file_umask', 'umask', 'session', 'part', 'quiet', + -- Mount options for hpfs. + 'uid', 'gid', 'umask', 'case', 'lower', 'asis', 'conv', 'binary', 'text', 'auto', 'nocheck', + -- Mount options for iso9660. + 'norock', 'nojoliet', 'check', 'relaxed', 'strict', 'uid', 'gid', 'map', 'normal', 'offacorn', + 'mode', 'unhide', 'block', 'conv', 'auto', 'binary', 'mtext', 'text', 'cruft', 'session', + 'sbsector', 'iocharset', 'utf8', + -- Mount options for jfs. + 'iocharset', 'resize', 'nointegrity', 'integrity', 'errors', 'continue', 'remount-ro', 'panic', + 'noquota', 'quota', 'usrquota', 'grpquota', + -- Mount options for ntfs. + 'iocharset', 'nls', 'utf8', 'uni_xlate', 'posix', 'uid', 'gid', 'umask', + -- Mount options for overlay. + 'lowerdir', 'upperdir', 'workdir', + -- Mount options for reiserfs. + 'conv', 'hash', 'rupasov', 'tea', 'r5', 'detect', 'hashed_relocation', 'no_unhashed_relocation', + 'noborder', 'nolog', 'notail', 'replayonly', 'resize', 'user_xattr', 'acl', 'barrier', 'none', 'flush', - 'usefree', - 'dots', - 'nodots', - 'dotsOK', - - -- mount options for hfs - 'creator', - 'type', - 'uid', - 'gid', - 'dir_umask', - 'file_umask', - 'umask', - 'session', - 'part', - 'quiet', - - -- mount options for hpfs - 'uid', - 'gid', - 'umask', - 'case', - 'lower', - 'asis', - 'conv', - 'binary', - 'text', - 'auto', - 'nocheck', + -- Mount options for tmpfs. + 'size', 'nr_blocks', 'nr_inodes', 'mode', 'uid', 'gid', 'mpol', 'default', 'prefer', 'bind', + 'interleave', + -- Mount options for ubifs. + 'bulk_read', 'no_bulk_read', 'chk_data_crc', 'no_chk_data_crc.', 'compr', 'none', 'lzo', 'zlib', + -- Mount options for udf. + 'gid', 'umask', 'uid', 'unhide', 'undelete', 'nostrict', 'iocharset', 'bs', 'novrs', 'session', + 'anchor', 'volume', 'partition', 'lastblock', 'fileset', 'rootdir', + -- Mount options for ufs. + 'ufstype', 'old', '44bsd', 'ufs2', '5xbsd', 'sun', 'sunx86', 'hp', 'nextstep', 'nextstep-cd', + 'openstep', 'onerror', 'lock', 'umount', 'repair', + -- Mount options for vfat. + 'uni_xlate', 'posix', 'nonumtail', 'utf8', 'shortname', 'lower', 'win95', 'winnt', 'mixed', + -- Mount options for usbfs. + 'devuid', 'devgid', 'devmode', 'busuid', 'busgid', 'busmode', 'listuid', 'listgid', 'listmode', + -- Filesystems. + 'adfs', 'ados', 'affs', 'anon_inodefs', 'atfs', 'audiofs', 'auto', 'autofs', 'bdev', 'befs', + 'bfs', 'btrfs', 'binfmt_misc', 'cd9660', 'cfs', 'cgroup', 'cifs', 'coda', 'configfs', 'cpuset', + 'cramfs', 'devfs', 'devpts', 'devtmpfs', 'e2compr', 'efs', 'ext2', 'ext2fs', 'ext3', 'ext4', + 'fdesc', 'ffs', 'filecore', 'fuse', 'fuseblk', 'fusectl', 'hfs', 'hpfs', 'hugetlbfs', 'iso9660', + 'jffs', 'jffs2', 'jfs', 'kernfs', 'lfs', 'linprocfs', 'mfs', 'minix', 'mqueue', 'msdos', 'ncpfs', + 'nfs', 'nfsd', 'nilfs2', 'none', 'ntfs', 'null', 'nwfs', 'overlay', 'ovlfs', 'pipefs', 'portal', + 'proc', 'procfs', 'pstore', 'ptyfs', 'qnx4', 'reiserfs', 'ramfs', 'romfs', 'securityfs', 'shm', + 'smbfs', 'squashfs', 'sockfs', 'sshfs', 'std', 'subfs', 'swap', 'sysfs', 'sysv', 'tcfs', 'tmpfs', + 'udf', 'ufs', 'umap', 'umsdos', 'union', 'usbfs', 'userfs', 'vfat', 'vs3fs', 'vxfs', 'wrapfs', + 'wvfs', 'xenfs', 'xfs', 'zisofs' +})) - -- mount options for iso9660 - 'norock', - 'nojoliet', - 'check', - 'relaxed', - 'strict', - 'uid', - 'gid', - 'map', - 'normal', - 'offacorn', - 'mode', - 'unhide', - 'block', - 'conv', - 'auto', - 'binary', - 'mtext', - 'text', - 'cruft', - 'session', - 'sbsector', - 'iocharset', - 'utf8', - - -- mount options for jfs - 'iocharset', - 'resize', - 'nointegrity', - 'integrity', - 'errors', - 'continue', - 'remount-ro', - 'panic', - 'noquota', - 'quota', - 'usrquota', - 'grpquota', - - -- mount options for ntfs - 'iocharset', - 'nls', - 'utf8', - 'uni_xlate', - 'posix', - 'uid', - 'gid', - 'umask', - - -- mount options for overlay - 'lowerdir', - 'upperdir', - 'workdir', - - -- mount options for reiserfs - 'conv', - 'hash', - 'rupasov', - 'tea', - 'r5', - 'detect', - 'hashed_relocation', - 'no_unhashed_relocation', - 'noborder', - 'nolog', - 'notail', - 'replayonly', - 'resize', - 'user_xattr', - 'acl', - 'barrier', - 'none', - 'flush', - - -- mount options for tmpfs - 'size', - 'nr_blocks', - 'nr_inodes', - 'mode', - 'uid', - 'gid', - 'mpol', - 'default', - 'prefer', - 'bind', - 'interleave', - - -- mount options for ubifs - 'bulk_read', - 'no_bulk_read', - 'chk_data_crc', - 'no_chk_data_crc.', - 'compr', - 'none', - 'lzo', - 'zlib', - - -- mount options for udf - 'gid', - 'umask', - 'uid', - 'unhide', - 'undelete', - 'nostrict', - 'iocharset', - 'bs', - 'novrs', - 'session', - 'anchor', - 'volume', - 'partition', - 'lastblock', - 'fileset', - 'rootdir', - - -- mount options for ufs - 'ufstype', - 'old', - '44bsd', - 'ufs2', - '5xbsd', - 'sun', - 'sunx86', - 'hp', - 'nextstep', - 'nextstep-cd', - 'openstep', - 'onerror', - 'lock', - 'umount', - 'repair', - - -- mount options for vfat - 'uni_xlate', - 'posix', - 'nonumtail', - 'utf8', - 'shortname', - 'lower', - 'win95', - 'winnt', - 'mixed', - - -- mount options for usbfs - 'devuid', - 'devgid', - 'devmode', - 'busuid', - 'busgid', - 'busmode', - 'listuid', - 'listgid', - 'listmode', - - -- filesystems - 'adfs', - 'ados', - 'affs', - 'anon_inodefs', - 'atfs', - 'audiofs', - 'auto', - 'autofs', - 'bdev', - 'befs', - 'bfs', - 'btrfs', - 'binfmt_misc', - 'cd9660', - 'cfs', - 'cgroup', - 'cifs', - 'coda', - 'configfs', - 'cpuset', - 'cramfs', - 'devfs', - 'devpts', - 'devtmpfs', - 'e2compr', - 'efs', - 'ext2', - 'ext2fs', - 'ext3', - 'ext4', - 'fdesc', - 'ffs', - 'filecore', - 'fuse', - 'fuseblk', - 'fusectl', - 'hfs', - 'hpfs', - 'hugetlbfs', - 'iso9660', - 'jffs', - 'jffs2', - 'jfs', - 'kernfs', - 'lfs', - 'linprocfs', - 'mfs', - 'minix', - 'mqueue', - 'msdos', - 'ncpfs', - 'nfs', - 'nfsd', - 'nilfs2', - 'none', - 'ntfs', - 'null', - 'nwfs', - 'overlay', - 'ovlfs', - 'pipefs', - 'portal', - 'proc', - 'procfs', - 'pstore', - 'ptyfs', - 'qnx4', - 'reiserfs', - 'ramfs', - 'romfs', - 'securityfs', - 'shm', - 'smbfs', - 'squashfs', - 'sockfs', - 'sshfs', - 'std', - 'subfs', - 'swap', - 'sysfs', - 'sysv', - 'tcfs', - 'tmpfs', - 'udf', - 'ufs', - 'umap', - 'umsdos', - 'union', - 'usbfs', - 'userfs', - 'vfat', - 'vs3fs', - 'vxfs', - 'wrapfs', - 'wvfs', - 'xenfs', - 'xfs', - 'zisofs', -}, '.-')) +-- Numbers. +local uuid = lexer.xdigit^8 * ('-' * lexer.xdigit^4)^-3 * '-' * lexer.xdigit^12 +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 +local oct_num = '0' * S('01234567_')^1 +local integer = S('+-')^-1 * (lexer.hex_num + oct_num + dec) +lex:add_rule('number', token(lexer.NUMBER, uuid + lexer.float + integer)) -- Identifiers. -local word = (l.alpha + '_') * (l.alnum + S('_.'))^0 -local identifier = token(l.IDENTIFIER, word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * (lexer.alnum + S('_.'))^0)) --- Operators. -local operator = token(l.OPERATOR, S('=,')) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(lexer.to_eol('#')))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'identifier', identifier}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Directories. +lex:add_rule('directory', token(lexer.VARIABLE, '/' * (1 - lexer.space)^0)) -M._LEXBYLINE = true +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=,'))) -return M +return lex diff --git a/lua/lexers/gap.lua b/lua/lexers/gap.lua @@ -1,56 +1,44 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Gap LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'gap'} +local lex = lexer.new('gap') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'and', 'break', 'continue', 'do', 'elif', 'else', 'end', 'fail', 'false', 'fi', 'for', 'function', + 'if', 'in', 'infinity', 'local', 'not', 'od', 'or', 'rec', 'repeat', 'return', 'then', 'true', + 'until', 'while' +})) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -local number = token(l.NUMBER, l.digit^1 * -l.alpha) +lex:add_rule('number', token(lexer.NUMBER, lexer.dec_num * -lexer.alpha)) --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'and', 'break', 'continue', 'do', 'elif', 'else', 'end', 'fail', 'false', - 'fi', 'for', 'function', 'if', 'in', 'infinity', 'local', 'not', 'od', 'or', - 'rec', 'repeat', 'return', 'then', 'true', 'until', 'while' -}) +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('*+-,./:;<=>~^#()[]{}'))) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'function', 'end') +lex:add_fold_point(lexer.KEYWORD, 'do', 'od') +lex:add_fold_point(lexer.KEYWORD, 'if', 'fi') +lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#')) --- Operators. -local operator = token(l.OPERATOR, S('*+-,./:;<=>~^#()[]{}')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} - -M._foldsymbols = { - _patterns = {'[a-z]+', '#'}, - [l.KEYWORD] = { - ['function'] = 1, ['end'] = -1, ['do'] = 1, od = -1, ['if'] = 1, fi = -1, - ['repeat'] = 1, ['until'] = -1 - }, - [l.COMMENT] = {['#'] = l.fold_line_comments('#')} -} - -return M +return lex diff --git a/lua/lexers/gettext.lua b/lua/lexers/gettext.lua @@ -1,39 +1,29 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Gettext LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'gettext'} +local lex = lexer.new('gettext') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '#' * S(': .~') * l.nonnewline^0) - --- Strings. -local string = token(l.STRING, l.delimited_range('"', true)) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match({ - 'msgid', 'msgid_plural', 'msgstr', 'fuzzy', 'c-format', 'no-c-format' -}, '-', true)) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match( + 'msgid msgid_plural msgstr fuzzy c-format no-c-format', true))) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Variables. -local variable = token(l.VARIABLE, S('%$@') * l.word) - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'string', string}, - {'keyword', keyword}, - {'identifier', identifier}, - {'variable', variable}, -} - -return M +lex:add_rule('variable', token(lexer.VARIABLE, S('%$@') * lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#' * S(': .~')))) + +return lex diff --git a/lua/lexers/gherkin.lua b/lua/lexers/gherkin.lua @@ -1,64 +1,40 @@ --- Copyright 2015-2017 Jason Schindler. See LICENSE. +-- Copyright 2015-2022 Jason Schindler. See LICENSE. -- Gherkin (https://github.com/cucumber/cucumber/wiki/Gherkin) LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'gherkin'} +local lex = lexer.new('gherkin', {fold_by_indentation = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Tags. -local tag = token('tag', '@' * l.word^0) - --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) - --- Strings. -local doc_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 -local dq_str = l.delimited_range('"') - -local string = token(l.STRING, doc_str + dq_str) - --- Placeholders. -local placeholder = token('placeholder', l.nested_pair('<', '>')) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'Given', 'When', 'Then', 'And', 'But' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match( + 'And Background But Examples Feature Given Outline Scenario Scenarios Then When'))) --- Identifiers. -local identifier = token(l.KEYWORD, P('Scenario Outline') + word_match{ - 'Feature', 'Background', 'Scenario', 'Scenarios', 'Examples' -}) +-- Strings. +local doc_str = lexer.range('"""') +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, doc_str + dq_str)) --- Examples. -local example = token('example', '|' * l.nonnewline^0) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +-- lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'tag', tag}, - {'placeholder', placeholder}, - {'keyword', keyword}, - {'identifier', identifier}, - {'example', example}, - {'string', string}, - {'number', number} -} +-- Tags. +lex:add_rule('tag', token('tag', '@' * lexer.word^0)) +lex:add_style('tag', lexer.styles.label) -M._tokenstyles = { - tag = l.STYLE_LABEL, - placeholder = l.STYLE_NUMBER, - example = l.STYLE_NUMBER -} +-- Placeholders. +lex:add_rule('placeholder', token('placeholder', lexer.range('<', '>', false, false, true))) +lex:add_style('placeholder', lexer.styles.variable) -M._FOLDBYINDENTATION = true +-- Examples. +lex:add_rule('example', token('example', lexer.to_eol('|'))) +lex:add_style('example', lexer.styles.number) -return M +return lex diff --git a/lua/lexers/glsl.lua b/lua/lexers/glsl.lua @@ -1,132 +1,104 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- GLSL LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S -local table = _G.table +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S, R = lpeg.P, lpeg.S, lpeg.R -local M = {_NAME = 'glsl'} +local lex = lexer.new('glsl', {inherit = lexer.load('cpp')}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'attribute', 'const', 'in', 'inout', 'out', 'uniform', 'varying', 'invariant', - 'centroid', 'flat', 'smooth', 'noperspective', 'layout', 'patch', 'sample', - 'subroutine', 'lowp', 'mediump', 'highp', 'precision', +lex:modify_rule('keyword', token(lexer.KEYWORD, word_match{ + 'attribute', 'const', 'in', 'inout', 'out', 'uniform', 'varying', 'invariant', 'centroid', 'flat', + 'smooth', 'noperspective', 'layout', 'patch', 'sample', 'subroutine', 'lowp', 'mediump', 'highp', + 'precision', -- Macros. - '__VERSION__', '__LINE__', '__FILE__', -}) - --- Functions. -local func = token(l.FUNCTION, word_match{ - 'radians', 'degrees', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sinh', - 'cosh', 'tanh', 'asinh', 'acosh', 'atanh', 'pow', 'exp', 'log', 'exp2', - 'log2', 'sqrt', 'inversesqrt', 'abs', 'sign', 'floor', 'trunc', 'round', - 'roundEven', 'ceil', 'fract', 'mod', 'modf', 'min', 'max', 'clamp', 'mix', - 'step', 'smoothstep', 'isnan', 'isinf', 'floatBitsToInt', 'floatBitsToUint', - 'intBitsToFloat', 'uintBitsToFloat', 'fma', 'frexp', 'ldexp', 'packUnorm2x16', - 'packUnorm4x8', 'packSnorm4x8', 'unpackUnorm2x16', 'unpackUnorm4x8', - 'unpackSnorm4x8', 'packDouble2x32', 'unpackDouble2x32', 'length', 'distance', - 'dot', 'cross', 'normalize', 'ftransform', 'faceforward', 'reflect', - 'refract', 'matrixCompMult', 'outerProduct', 'transpose', 'determinant', - 'inverse', 'lessThan', 'lessThanEqual', 'greaterThan', 'greaterThanEqual', - 'equal', 'notEqual', 'any', 'all', 'not', 'uaddCarry', 'usubBorrow', - 'umulExtended', 'imulExtended', 'bitfieldExtract', 'bitfildInsert', - 'bitfieldReverse', 'bitCount', 'findLSB', 'findMSB', 'textureSize', - 'textureQueryLOD', 'texture', 'textureProj', 'textureLod', 'textureOffset', - 'texelFetch', 'texelFetchOffset', 'textureProjOffset', 'textureLodOffset', - 'textureProjLod', 'textureProjLodOffset', 'textureGrad', 'textureGradOffset', - 'textureProjGrad', 'textureProjGradOffset', 'textureGather', - 'textureGatherOffset', 'texture1D', 'texture2D', 'texture3D', 'texture1DProj', - 'texture2DProj', 'texture3DProj', 'texture1DLod', 'texture2DLod', - 'texture3DLod', 'texture1DProjLod', 'texture2DProjLod', 'texture3DProjLod', - 'textureCube', 'textureCubeLod', 'shadow1D', 'shadow2D', 'shadow1DProj', - 'shadow2DProj', 'shadow1DLod', 'shadow2DLod', 'shadow1DProjLod', - 'shadow2DProjLod', 'dFdx', 'dFdy', 'fwidth', 'interpolateAtCentroid', - 'interpolateAtSample', 'interpolateAtOffset', 'noise1', 'noise2', 'noise3', - 'noise4', 'EmitStreamVertex', 'EndStreamPrimitive', 'EmitVertex', - 'EndPrimitive', 'barrier' -}) + '__VERSION__', '__LINE__', '__FILE__' +}) + lex:get_rule('keyword')) -- Types. -local type = token(l.TYPE, - S('bdiu')^-1 * 'vec' * R('24') + - P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) + - S('iu')^-1 * 'sampler' * R('13') * 'D' + - 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' + - S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + word_match{ - 'Cube', '2DRect', 'Buffer', '2DMS', '2DMSArray', - '2DMSCubeArray' - }) + - word_match{ - 'samplerCubeShadow', 'sampler2DRectShadow', - 'samplerCubeArrayShadow' - }) +-- LuaFormatter off +lex:modify_rule('type', token(lexer.TYPE, + S('bdiu')^-1 * 'vec' * R('24') + + P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) + + S('iu')^-1 * 'sampler' * R('13') * 'D' + + 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' + + (S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + + word_match('Cube 2DRect Buffer 2DMS 2DMSArray 2DMSCubeArray'))) + + word_match('samplerCubeShadow sampler2DRectShadow samplerCubeArrayShadow')) + +-- LuaFormatter on + lex:get_rule('type') + + +-- Functions. +token(lexer.FUNCTION, word_match{ + 'radians', 'degrees', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sinh', 'cosh', 'tanh', + 'asinh', 'acosh', 'atanh', 'pow', 'exp', 'log', 'exp2', 'log2', 'sqrt', 'inversesqrt', 'abs', + 'sign', 'floor', 'trunc', 'round', 'roundEven', 'ceil', 'fract', 'mod', 'modf', 'min', 'max', + 'clamp', 'mix', 'step', 'smoothstep', 'isnan', 'isinf', 'floatBitsToInt', 'floatBitsToUint', + 'intBitsToFloat', 'uintBitsToFloat', 'fma', 'frexp', 'ldexp', 'packUnorm2x16', 'packUnorm4x8', + 'packSnorm4x8', 'unpackUnorm2x16', 'unpackUnorm4x8', 'unpackSnorm4x8', 'packDouble2x32', + 'unpackDouble2x32', 'length', 'distance', 'dot', 'cross', 'normalize', 'ftransform', + 'faceforward', 'reflect', 'refract', 'matrixCompMult', 'outerProduct', 'transpose', 'determinant', + 'inverse', 'lessThan', 'lessThanEqual', 'greaterThan', 'greaterThanEqual', 'equal', 'notEqual', + 'any', 'all', 'not', 'uaddCarry', 'usubBorrow', 'umulExtended', 'imulExtended', 'bitfieldExtract', + 'bitfildInsert', 'bitfieldReverse', 'bitCount', 'findLSB', 'findMSB', 'textureSize', + 'textureQueryLOD', 'texture', 'textureProj', 'textureLod', 'textureOffset', 'texelFetch', + 'texelFetchOffset', 'textureProjOffset', 'textureLodOffset', 'textureProjLod', + 'textureProjLodOffset', 'textureGrad', 'textureGradOffset', 'textureProjGrad', + 'textureProjGradOffset', 'textureGather', 'textureGatherOffset', 'texture1D', 'texture2D', + 'texture3D', 'texture1DProj', 'texture2DProj', 'texture3DProj', 'texture1DLod', 'texture2DLod', + 'texture3DLod', 'texture1DProjLod', 'texture2DProjLod', 'texture3DProjLod', 'textureCube', + 'textureCubeLod', 'shadow1D', 'shadow2D', 'shadow1DProj', 'shadow2DProj', 'shadow1DLod', + 'shadow2DLod', 'shadow1DProjLod', 'shadow2DProjLod', 'dFdx', 'dFdy', 'fwidth', + 'interpolateAtCentroid', 'interpolateAtSample', 'interpolateAtOffset', 'noise1', 'noise2', + 'noise3', 'noise4', 'EmitStreamVertex', 'EndStreamPrimitive', 'EmitVertex', 'EndPrimitive', + 'barrier' +}) + -- Variables. -local variable = token(l.VARIABLE, word_match{ - 'gl_VertexID', 'gl_InstanceID', 'gl_Position', 'gl_PointSize', - 'gl_ClipDistance', 'gl_PrimitiveIDIn', 'gl_InvocationID', 'gl_PrimitiveID', - 'gl_Layer', 'gl_PatchVerticesIn', 'gl_TessLevelOuter', 'gl_TessLevelInner', - 'gl_TessCoord', 'gl_FragCoord', 'gl_FrontFacing', 'gl_PointCoord', - 'gl_SampleID', 'gl_SamplePosition', 'gl_FragColor', 'gl_FragData', - 'gl_FragDepth', 'gl_SampleMask', 'gl_ClipVertex', 'gl_FrontColor', - 'gl_BackColor', 'gl_FrontSecondaryColor', 'gl_BackSecondaryColor', - 'gl_TexCoord', 'gl_FogFragCoord', 'gl_Color', 'gl_SecondaryColor', - 'gl_Normal', 'gl_Vertex', 'gl_MultiTexCoord0', 'gl_MultiTexCoord1', - 'gl_MultiTexCoord2', 'gl_MultiTexCoord3', 'gl_MultiTexCoord4', - 'gl_MultiTexCoord5', 'gl_MultiTexCoord6', 'gl_MultiTexCoord7', 'gl_FogCoord' -}) +token(lexer.VARIABLE, word_match{ + 'gl_VertexID', 'gl_InstanceID', 'gl_Position', 'gl_PointSize', 'gl_ClipDistance', + 'gl_PrimitiveIDIn', 'gl_InvocationID', 'gl_PrimitiveID', 'gl_Layer', 'gl_PatchVerticesIn', + 'gl_TessLevelOuter', 'gl_TessLevelInner', 'gl_TessCoord', 'gl_FragCoord', 'gl_FrontFacing', + 'gl_PointCoord', 'gl_SampleID', 'gl_SamplePosition', 'gl_FragColor', 'gl_FragData', + 'gl_FragDepth', 'gl_SampleMask', 'gl_ClipVertex', 'gl_FrontColor', 'gl_BackColor', + 'gl_FrontSecondaryColor', 'gl_BackSecondaryColor', 'gl_TexCoord', 'gl_FogFragCoord', 'gl_Color', + 'gl_SecondaryColor', 'gl_Normal', 'gl_Vertex', 'gl_MultiTexCoord0', 'gl_MultiTexCoord1', + 'gl_MultiTexCoord2', 'gl_MultiTexCoord3', 'gl_MultiTexCoord4', 'gl_MultiTexCoord5', + 'gl_MultiTexCoord6', 'gl_MultiTexCoord7', 'gl_FogCoord' +}) + -- Constants. -local constant = token(l.CONSTANT, word_match{ +token(lexer.CONSTANT, word_match{ 'gl_MaxVertexAttribs', 'gl_MaxVertexUniformComponents', 'gl_MaxVaryingFloats', - 'gl_MaxVaryingComponents', 'gl_MaxVertexOutputComponents', - 'gl_MaxGeometryInputComponents', 'gl_MaxGeometryOutputComponents', - 'gl_MaxFragmentInputComponents', 'gl_MaxVertexTextureImageUnits', - 'gl_MaxCombinedTextureImageUnits', 'gl_MaxTextureImageUnits', + 'gl_MaxVaryingComponents', 'gl_MaxVertexOutputComponents', 'gl_MaxGeometryInputComponents', + 'gl_MaxGeometryOutputComponents', 'gl_MaxFragmentInputComponents', + 'gl_MaxVertexTextureImageUnits', 'gl_MaxCombinedTextureImageUnits', 'gl_MaxTextureImageUnits', 'gl_MaxFragmentUniformComponents', 'gl_MaxDrawBuffers', 'gl_MaxClipDistances', 'gl_MaxGeometryTextureImageUnits', 'gl_MaxGeometryOutputVertices', 'gl_MaxGeometryTotalOutputComponents', 'gl_MaxGeometryUniformComponents', 'gl_MaxGeometryVaryingComponents', 'gl_MaxTessControlInputComponents', 'gl_MaxTessControlOutputComponents', 'gl_MaxTessControlTextureImageUnits', - 'gl_MaxTessControlUniformComponents', - 'gl_MaxTessControlTotalOutputComponents', + 'gl_MaxTessControlUniformComponents', 'gl_MaxTessControlTotalOutputComponents', 'gl_MaxTessEvaluationInputComponents', 'gl_MaxTessEvaluationOutputComponents', - 'gl_MaxTessEvaluationTextureImageUnits', - 'gl_MaxTessEvaluationUniformComponents', 'gl_MaxTessPatchComponents', - 'gl_MaxPatchVertices', 'gl_MaxTessGenLevel', 'gl_MaxTextureUnits', - 'gl_MaxTextureCoords', 'gl_MaxClipPlanes', - - 'gl_DepthRange', 'gl_ModelViewMatrix', 'gl_ProjectionMatrix', - 'gl_ModelViewProjectionMatrix', 'gl_TextureMatrix', 'gl_NormalMatrix', - 'gl_ModelViewMatrixInverse', 'gl_ProjectionMatrixInverse', - 'gl_ModelViewProjectionMatrixInverse', 'gl_TextureMatrixInverse', - 'gl_ModelViewMatrixTranspose', 'gl_ProjectionMatrixTranspose', - 'gl_ModelViewProjectionMatrixTranspose', 'gl_TextureMatrixTranspose', - 'gl_ModelViewMatrixInverseTranspose', 'gl_ProjectionMatrixInverseTranspose', - 'gl_ModelViewProjectionMatrixInverseTranspose', - 'gl_TextureMatrixInverseTranspose', 'gl_NormalScale', 'gl_ClipPlane', - 'gl_Point', 'gl_FrontMaterial', 'gl_BackMaterial', 'gl_LightSource', - 'gl_LightModel', 'gl_FrontLightModelProduct', 'gl_BackLightModelProduct', - 'gl_FrontLightProduct', 'gl_BackLightProduct', 'gl_TextureEnvColor', - 'gl_EyePlaneS', 'gl_EyePlaneT', 'gl_EyePlaneR', 'gl_EyePlaneQ', - 'gl_ObjectPlaneS', 'gl_ObjectPlaneT', 'gl_ObjectPlaneR', 'gl_ObjectPlaneQ', + 'gl_MaxTessEvaluationTextureImageUnits', 'gl_MaxTessEvaluationUniformComponents', + 'gl_MaxTessPatchComponents', 'gl_MaxPatchVertices', 'gl_MaxTessGenLevel', 'gl_MaxTextureUnits', + 'gl_MaxTextureCoords', 'gl_MaxClipPlanes', -- + 'gl_DepthRange', 'gl_ModelViewMatrix', 'gl_ProjectionMatrix', 'gl_ModelViewProjectionMatrix', + 'gl_TextureMatrix', 'gl_NormalMatrix', 'gl_ModelViewMatrixInverse', 'gl_ProjectionMatrixInverse', + 'gl_ModelViewProjectionMatrixInverse', 'gl_TextureMatrixInverse', 'gl_ModelViewMatrixTranspose', + 'gl_ProjectionMatrixTranspose', 'gl_ModelViewProjectionMatrixTranspose', + 'gl_TextureMatrixTranspose', 'gl_ModelViewMatrixInverseTranspose', + 'gl_ProjectionMatrixInverseTranspose', 'gl_ModelViewProjectionMatrixInverseTranspose', + 'gl_TextureMatrixInverseTranspose', 'gl_NormalScale', 'gl_ClipPlane', 'gl_Point', + 'gl_FrontMaterial', 'gl_BackMaterial', 'gl_LightSource', 'gl_LightModel', + 'gl_FrontLightModelProduct', 'gl_BackLightModelProduct', 'gl_FrontLightProduct', + 'gl_BackLightProduct', 'gl_TextureEnvColor', 'gl_EyePlaneS', 'gl_EyePlaneT', 'gl_EyePlaneR', + 'gl_EyePlaneQ', 'gl_ObjectPlaneS', 'gl_ObjectPlaneT', 'gl_ObjectPlaneR', 'gl_ObjectPlaneQ', 'gl_Fog' -}) - --- Extend cpp lexer to include GLSL elements. -local cpp = l.load('cpp') -local _rules = cpp._rules -_rules[1] = {'whitespace', ws} -table.insert(_rules, 2, {'glsl_keyword', keyword}) -table.insert(_rules, 3, {'glsl_function', func}) -table.insert(_rules, 4, {'glsl_type', type}) -table.insert(_rules, 5, {'glsl_variable', variable}) -M._rules = _rules -M._foldsymbols = cpp._foldsymbols +})) -return M +return lex diff --git a/lua/lexers/gnuplot.lua b/lua/lexers/gnuplot.lua @@ -1,80 +1,61 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Gnuplot LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'gnuplot'} +local lex = lexer.new('gnuplot') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) - --- Strings. -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') -local bk_str = l.delimited_range('[]', true) -local bc_str = l.delimited_range('{}', true) -local string = token(l.STRING, sq_str + dq_str + bk_str + bc_str) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'cd', 'call', 'clear', 'exit', 'fit', 'help', 'history', 'if', 'load', - 'pause', 'plot', 'using', 'with', 'index', 'every', 'smooth', 'thru', 'print', - 'pwd', 'quit', 'replot', 'reread', 'reset', 'save', 'set', 'show', 'unset', - 'shell', 'splot', 'system', 'test', 'unset', 'update' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'cd', 'call', 'clear', 'exit', 'fit', 'help', 'history', 'if', 'load', 'pause', 'plot', 'using', + 'with', 'index', 'every', 'smooth', 'thru', 'print', 'pwd', 'quit', 'replot', 'reread', 'reset', + 'save', 'set', 'show', 'unset', 'shell', 'splot', 'system', 'test', 'unset', 'update' +})) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'abs', 'acos', 'acosh', 'arg', 'asin', 'asinh', 'atan', 'atan2', 'atanh', - 'besj0', 'besj1', 'besy0', 'besy1', 'ceil', 'cos', 'cosh', 'erf', 'erfc', - 'exp', 'floor', 'gamma', 'ibeta', 'inverf', 'igamma', 'imag', 'invnorm', - 'int', 'lambertw', 'lgamma', 'log', 'log10', 'norm', 'rand', 'real', 'sgn', - 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'column', 'defined', 'tm_hour', - 'tm_mday', 'tm_min', 'tm_mon', 'tm_sec', 'tm_wday', 'tm_yday', 'tm_year', - 'valid' -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'abs', 'acos', 'acosh', 'arg', 'asin', 'asinh', 'atan', 'atan2', 'atanh', 'besj0', 'besj1', + 'besy0', 'besy1', 'ceil', 'cos', 'cosh', 'erf', 'erfc', 'exp', 'floor', 'gamma', 'ibeta', + 'inverf', 'igamma', 'imag', 'invnorm', 'int', 'lambertw', 'lgamma', 'log', 'log10', 'norm', + 'rand', 'real', 'sgn', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'column', 'defined', 'tm_hour', + 'tm_mday', 'tm_min', 'tm_mon', 'tm_sec', 'tm_wday', 'tm_yday', 'tm_year', 'valid' +})) -- Variables. -local variable = token(l.VARIABLE, word_match{ - 'angles', 'arrow', 'autoscale', 'bars', 'bmargin', 'border', 'boxwidth', - 'clabel', 'clip', 'cntrparam', 'colorbox', 'contour', 'datafile ', - 'decimalsign', 'dgrid3d', 'dummy', 'encoding', 'fit', 'fontpath', 'format', - 'functions', 'function', 'grid', 'hidden3d', 'historysize', 'isosamples', - 'key', 'label', 'lmargin', 'loadpath', 'locale', 'logscale', 'mapping', - 'margin', 'mouse', 'multiplot', 'mx2tics', 'mxtics', 'my2tics', 'mytics', - 'mztics', 'offsets', 'origin', 'output', 'parametric', 'plot', 'pm3d', - 'palette', 'pointsize', 'polar', 'print', 'rmargin', 'rrange', 'samples', - 'size', 'style', 'surface', 'terminal', 'tics', 'ticslevel', 'ticscale', - 'timestamp', 'timefmt', 'title', 'tmargin', 'trange', 'urange', 'variables', - 'version', 'view', 'vrange', 'x2data', 'x2dtics', 'x2label', 'x2mtics', - 'x2range', 'x2tics', 'x2zeroaxis', 'xdata', 'xdtics', 'xlabel', 'xmtics', - 'xrange', 'xtics', 'xzeroaxis', 'y2data', 'y2dtics', 'y2label', 'y2mtics', - 'y2range', 'y2tics', 'y2zeroaxis', 'ydata', 'ydtics', 'ylabel', 'ymtics', - 'yrange', 'ytics', 'yzeroaxis', 'zdata', 'zdtics', 'cbdata', 'cbdtics', - 'zero', 'zeroaxis', 'zlabel', 'zmtics', 'zrange', 'ztics', 'cblabel', - 'cbmtics', 'cbrange', 'cbtics' -}) +lex:add_rule('variable', token(lexer.VARIABLE, word_match{ + 'angles', 'arrow', 'autoscale', 'bars', 'bmargin', 'border', 'boxwidth', 'clabel', 'clip', + 'cntrparam', 'colorbox', 'contour', 'datafile', 'decimalsign', 'dgrid3d', 'dummy', 'encoding', + 'fit', 'fontpath', 'format', 'functions', 'function', 'grid', 'hidden3d', 'historysize', + 'isosamples', 'key', 'label', 'lmargin', 'loadpath', 'locale', 'logscale', 'mapping', 'margin', + 'mouse', 'multiplot', 'mx2tics', 'mxtics', 'my2tics', 'mytics', 'mztics', 'offsets', 'origin', + 'output', 'parametric', 'plot', 'pm3d', 'palette', 'pointsize', 'polar', 'print', 'rmargin', + 'rrange', 'samples', 'size', 'style', 'surface', 'terminal', 'tics', 'ticslevel', 'ticscale', + 'timestamp', 'timefmt', 'title', 'tmargin', 'trange', 'urange', 'variables', 'version', 'view', + 'vrange', 'x2data', 'x2dtics', 'x2label', 'x2mtics', 'x2range', 'x2tics', 'x2zeroaxis', 'xdata', + 'xdtics', 'xlabel', 'xmtics', 'xrange', 'xtics', 'xzeroaxis', 'y2data', 'y2dtics', 'y2label', + 'y2mtics', 'y2range', 'y2tics', 'y2zeroaxis', 'ydata', 'ydtics', 'ylabel', 'ymtics', 'yrange', + 'ytics', 'yzeroaxis', 'zdata', 'zdtics', 'cbdata', 'cbdtics', 'zero', 'zeroaxis', 'zlabel', + 'zmtics', 'zrange', 'ztics', 'cblabel', 'cbmtics', 'cbrange', 'cbtics' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('-+~!$*%=<>&|^?:()')) +-- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local br_str = lexer.range('[', ']', true) + lexer.range('{', '}', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + br_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'function', func}, - {'variable', variable}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'operator', operator}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('-+~!$*%=<>&|^?:()'))) -return M +return lex diff --git a/lua/lexers/go.lua b/lua/lexers/go.lua @@ -1,78 +1,60 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Go LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'go'} +local lex = lexer.new('go') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline^0 -local block_comment = '/*' * (l.any - '*/')^0 * '*/' -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local raw_str = l.delimited_range('`', false, true) -local string = token(l.STRING, sq_str + dq_str + raw_str) - --- Numbers. -local number = token(l.NUMBER, (l.float + l.integer) * P('i')^-1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else', - 'fallthrough', 'for', 'func', 'go', 'goto', 'if', 'import', 'interface', - 'map', 'package', 'range', 'return', 'select', 'struct', 'switch', 'type', - 'var' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else', 'fallthrough', 'for', + 'func', 'go', 'goto', 'if', 'import', 'interface', 'map', 'package', 'range', 'return', 'select', + 'struct', 'switch', 'type', 'var' +})) -- Constants. -local constant = token(l.CONSTANT, word_match{ - 'true', 'false', 'iota', 'nil' -}) +lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false iota nil'))) -- Types. -local type = token(l.TYPE, word_match{ - 'bool', 'byte', 'complex64', 'complex128', 'error', 'float32', 'float64', - 'int', 'int8', 'int16', 'int32', 'int64', 'rune', 'string', 'uint', 'uint8', - 'uint16', 'uint32', 'uint64', 'uintptr' -}) +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'byte', 'complex64', 'complex128', 'error', 'float32', 'float64', 'int', 'int8', 'int16', + 'int32', 'int64', 'rune', 'string', 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'uintptr' +})) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'append', 'cap', 'close', 'complex', 'copy', 'delete', 'imag', 'len', 'make', - 'new', 'panic', 'print', 'println', 'real', 'recover' -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'append', 'cap', 'close', 'complex', 'copy', 'delete', 'imag', 'len', 'make', 'new', 'panic', + 'print', 'println', 'real', 'recover' +})) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}')) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local raw_str = lexer.range('`', false, false) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str)) + +-- Comments. +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'constant', constant}, - {'type', type}, - {'function', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number * P('i')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}'))) -M._foldsymbols = { - _patterns = {'[{}]', '/%*', '%*/', '//'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/groovy.lua b/lua/lexers/groovy.lua @@ -1,89 +1,67 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Groovy LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'groovy'} +local lex = lexer.new('groovy') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') -local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1 -local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 -local regex_str = #P('/') * l.last_char_includes('=~|!<>+-*?&,:;([{') * - l.delimited_range('/', true) -local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str + - dq_str) + - token(l.REGEX, regex_str) - --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', - 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof', - 'native', 'new', 'private', 'protected', 'public', 'return', 'static', - 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile', - 'while', 'strictfp', 'package', 'import', 'as', 'assert', 'def', 'mixin', - 'property', 'test', 'using', 'in', - 'false', 'null', 'super', 'this', 'true', 'it' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'extends', 'final', + 'finally', 'for', 'if', 'implements', 'instanceof', 'native', 'new', 'private', 'protected', + 'public', 'return', 'static', 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', + 'volatile', 'while', 'strictfp', 'package', 'import', 'as', 'assert', 'def', 'mixin', 'property', + 'test', 'using', 'in', 'false', 'null', 'super', 'this', 'true', 'it' +})) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'abs', 'any', 'append', 'asList', 'asWritable', 'call', 'collect', - 'compareTo', 'count', 'div', 'dump', 'each', 'eachByte', 'eachFile', - 'eachLine', 'every', 'find', 'findAll', 'flatten', 'getAt', 'getErr', 'getIn', - 'getOut', 'getText', 'grep', 'immutable', 'inject', 'inspect', 'intersect', - 'invokeMethods', 'isCase', 'join', 'leftShift', 'minus', 'multiply', - 'newInputStream', 'newOutputStream', 'newPrintWriter', 'newReader', - 'newWriter', 'next', 'plus', 'pop', 'power', 'previous', 'print', 'println', - 'push', 'putAt', 'read', 'readBytes', 'readLines', 'reverse', 'reverseEach', - 'round', 'size', 'sort', 'splitEachLine', 'step', 'subMap', 'times', - 'toInteger', 'toList', 'tokenize', 'upto', 'waitForOrKill', 'withPrintWriter', - 'withReader', 'withStream', 'withWriter', 'withWriterAppend', 'write', - 'writeLine' -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'abs', 'any', 'append', 'asList', 'asWritable', 'call', 'collect', 'compareTo', 'count', 'div', + 'dump', 'each', 'eachByte', 'eachFile', 'eachLine', 'every', 'find', 'findAll', 'flatten', + 'getAt', 'getErr', 'getIn', 'getOut', 'getText', 'grep', 'immutable', 'inject', 'inspect', + 'intersect', 'invokeMethods', 'isCase', 'join', 'leftShift', 'minus', 'multiply', + 'newInputStream', 'newOutputStream', 'newPrintWriter', 'newReader', 'newWriter', 'next', 'plus', + 'pop', 'power', 'previous', 'print', 'println', 'push', 'putAt', 'read', 'readBytes', 'readLines', + 'reverse', 'reverseEach', 'round', 'size', 'sort', 'splitEachLine', 'step', 'subMap', 'times', + 'toInteger', 'toList', 'tokenize', 'upto', 'waitForOrKill', 'withPrintWriter', 'withReader', + 'withStream', 'withWriter', 'withWriterAppend', 'write', 'writeLine' +})) -- Types. -local type = token(l.TYPE, word_match{ - 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface', - 'long', 'short', 'void' -}) +lex:add_rule('type', token(lexer.TYPE, word_match( + 'boolean byte char class double float int interface long short void'))) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Operators. -local operator = token(l.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}')) +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local tq_str = lexer.range("'''") + lexer.range('"""') +local string = token(lexer.STRING, tq_str + sq_str + dq_str) +local regex_str = #P('/') * lexer.last_char_includes('=~|!<>+-*?&,:;([{') * lexer.range('/', true) +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'function', func}, - {'type', type}, - {'identifier', identifier}, - {'comment', comment}, - {'string', string}, - {'number', number}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}'))) -M._foldsymbols = { - _patterns = {'[{}]', '/%*', '%*/', '//'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex diff --git a/lua/lexers/gtkrc.lua b/lua/lexers/gtkrc.lua @@ -1,71 +1,52 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Gtkrc LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'gtkrc'} +local lex = lexer.new('gtkrc') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline^0) - --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local number = token(l.NUMBER, l.digit^1 * ('.' * l.digit^1)^-1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'binding', 'class', 'include', 'module_path', 'pixmap_path', 'im_module_file', - 'style', 'widget', 'widget_class' -}) +lex:add_rule('keyword', token(lexer.KEYWORD, word_match( + 'binding class include module_path pixmap_path im_module_file style widget widget_class'))) -- Variables. -local variable = token(l.VARIABLE, word_match{ - 'bg', 'fg', 'base', 'text', 'xthickness', 'ythickness', 'bg_pixmap', 'font', - 'fontset', 'font_name', 'stock', 'color', 'engine' -}) +lex:add_rule('variable', token(lexer.VARIABLE, word_match{ + 'bg', 'fg', 'base', 'text', 'xthickness', 'ythickness', 'bg_pixmap', 'font', 'fontset', + 'font_name', 'stock', 'color', 'engine' +})) -- States. -local state = token(l.CONSTANT, word_match{ - 'ACTIVE', 'SELECTED', 'NORMAL', 'PRELIGHT', 'INSENSITIVE', 'TRUE', 'FALSE' -}) +lex:add_rule('state', + token('state', word_match('ACTIVE SELECTED NORMAL PRELIGHT INSENSITIVE TRUE FALSE'))) +lex:add_style('state', lexer.styles.constant) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'mix', 'shade', 'lighter', 'darker' -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match('mix shade lighter darker'))) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + S('_-'))^0)) --- Operators. -local operator = token(l.OPERATOR, S(':=,*()[]{}')) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'variable', variable}, - {'state', state}, - {'function', func}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * ('.' * lexer.digit^1)^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':=,*()[]{}'))) -M._foldsymbols = { - _patterns = {'[{}]', '#'}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = {['#'] = l.fold_line_comments('#')} -} +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#')) -return M +return lex diff --git a/lua/lexers/haskell.lua b/lua/lexers/haskell.lua @@ -1,60 +1,45 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Haskell LPeg lexer. -- Modified by Alex Suraci. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'haskell'} +local lex = lexer.new('haskell', {fold_by_indentation = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local line_comment = '--' * l.nonnewline_esc^0 -local block_comment = '{-' * (l.any - '-}')^0 * P('-}')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if', 'import', 'in', 'infix', + 'infixl', 'infixr', 'instance', 'let', 'module', 'newtype', 'of', 'then', 'type', 'where', '_', + 'as', 'qualified', 'hiding' +})) --- Strings. -local string = token(l.STRING, l.delimited_range('"')) +-- Types & type constructors. +local word = (lexer.alnum + S("._'#"))^0 +local op = lexer.punct - S('()[]{}') +lex:add_rule('type', token(lexer.TYPE, (lexer.upper * word) + (':' * (op^1 - ':')))) --- Chars. -local char = token(l.STRING, l.delimited_range("'", true)) +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * word)) --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if', 'import', - 'in', 'infix', 'infixl', 'infixr', 'instance', 'let', 'module', 'newtype', - 'of', 'then', 'type', 'where', '_', 'as', 'qualified', 'hiding' -}) +-- Comments. +local line_comment = lexer.to_eol('--', true) +local block_comment = lexer.range('{-', '-}') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) --- Identifiers. -local word = (l.alnum + S("._'#"))^0 -local identifier = token(l.IDENTIFIER, (l.alpha + '_') * word) +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. -local op = l.punct - S('()[]{}') -local operator = token(l.OPERATOR, op) +lex:add_rule('operator', token(lexer.OPERATOR, op)) --- Types & type constructors. -local constructor = token(l.TYPE, (l.upper * word) + (P(":") * (op^1 - P(":")))) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', constructor}, - {'identifier', identifier}, - {'string', string}, - {'char', char}, - {'comment', comment}, - {'number', number}, - {'operator', operator}, -} - -M._FOLDBYINDENTATION = true - -return M +return lex diff --git a/lua/lexers/html.lua b/lua/lexers/html.lua @@ -1,162 +1,148 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- HTML LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'html'} - -case_insensitive_tags = true +local lex = lexer.new('html') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) -- Comments. -local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1) - --- Strings. -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') -local string = #S('\'"') * l.last_char_includes('=') * - token(l.STRING, sq_str + dq_str) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) -local in_tag = #P((1 - S'><')^0 * '>') - --- Numbers. -local number = #l.digit * l.last_char_includes('=') * - token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag +-- Doctype. +lex:add_rule('doctype', token('doctype', lexer.range('<!' * word_match('doctype', true), '>'))) +lex:add_style('doctype', lexer.styles.comment) -- Elements. -local known_element = token('element', '<' * P('/')^-1 * word_match({ - 'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'base', - 'bdi', 'bdo', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', - 'cite', 'code', 'col', 'colgroup', 'content', 'data', 'datalist', 'dd', - 'decorator', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'element', 'em', - 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', - 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'i', 'iframe', 'img', - 'input', 'ins', 'kbd', 'keygen', 'label', 'legend', 'li', 'link', 'main', - 'map', 'mark', 'menu', 'menuitem', 'meta', 'meter', 'nav', 'noscript', - 'object', 'ol', 'optgroup', 'option', 'output', 'p', 'param', 'pre', - 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'section', - 'select', 'shadow', 'small', 'source', 'spacer', 'spacer', 'span', 'strong', - 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'template', - 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'track', 'u', 'ul', - 'var', 'video', 'wbr' -}, nil, case_insensitive_tags)) -local unknown_element = token('unknown_element', '<' * P('/')^-1 * l.word) -local element = known_element + unknown_element - --- Attributes. -local known_attribute = token('attribute', word_match({ - 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'async', - 'autocomplete', 'autofocus', 'autoplay', 'bgcolor', 'border', 'buffered', - 'challenge', 'charset', 'checked', 'cite', 'class', 'code', 'codebase', - 'color', 'cols', 'colspan', 'content', 'contenteditable', 'contextmenu', - 'controls', 'coords', 'data', 'data-', 'datetime', 'default', 'defer', 'dir', - 'dirname', 'disabled', 'download', 'draggable', 'dropzone', 'enctype', 'for', - 'form', 'headers', 'height', 'hidden', 'high', 'href', 'hreflang', - 'http-equiv', 'icon', 'id', 'ismap', 'itemprop', 'keytype', 'kind', 'label', - 'lang', 'language', 'list', 'loop', 'low', 'manifest', 'max', 'maxlength', - 'media', 'method', 'min', 'multiple', 'name', 'novalidate', 'open', 'optimum', - 'pattern', 'ping', 'placeholder', 'poster', 'preload', 'pubdate', - 'radiogroup', 'readonly', 'rel', 'required', 'reversed', 'role', 'rows', - 'rowspan', 'sandbox', 'spellcheck', 'scope', 'scoped', 'seamless', 'selected', - 'shape', 'size', 'sizes', 'span', 'src', 'srcdoc', 'srclang', 'start', - 'step', 'style', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap', - 'value', 'width', 'wrap' -}, '-', case_insensitive_tags) + ((P('data-') + 'aria-') * (l.alnum + '-')^1)) -local unknown_attribute = token('unknown_attribute', l.word) -local attribute = (known_attribute + unknown_attribute) * #(l.space^0 * '=') +local single_element = token('single_element', '<' * P('/')^-1 * word_match( + { + 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', + 'param', 'source', 'track', 'wbr' + }, true)) +local paired_element = token('element', '<' * P('/')^-1 * word_match({ + 'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b', 'bdi', 'bdo', 'blockquote', 'body', + 'button', 'canvas', 'caption', 'cite', 'code', 'colgroup', 'content', 'data', 'datalist', 'dd', + 'decorator', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'element', 'em', 'fieldset', + 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', + 'html', 'i', 'iframe', 'ins', 'kbd', 'label', 'legend', 'li', 'main', 'map', 'mark', 'menu', + 'menuitem', 'meter', 'nav', 'noscript', 'object', 'ol', 'optgroup', 'option', 'output', 'p', + 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'section', 'select', 'shadow', + 'small', 'spacer', 'span', 'strong', 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', + 'template', 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'u', 'ul', 'var', 'video' +}, true)) +local known_element = single_element + paired_element +local unknown_element = token('unknown_element', '<' * P('/')^-1 * (lexer.alnum + '-')^1) +local element = (known_element + unknown_element) * -P(':') +lex:add_rule('element', element) +lex:add_style('single_element', lexer.styles.keyword) +lex:add_style('element', lexer.styles.keyword) +lex:add_style('unknown_element', lexer.styles.keyword .. {italics = true}) -- Closing tags. local tag_close = token('element', P('/')^-1 * '>') +lex:add_rule('tag_close', tag_close) + +-- Attributes. +local known_attribute = token('attribute', word_match({ + 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'async', 'autocomplete', + 'autofocus', 'autoplay', 'bgcolor', 'border', 'buffered', 'challenge', 'charset', 'checked', + 'cite', 'class', 'code', 'codebase', 'color', 'cols', 'colspan', 'content', 'contenteditable', + 'contextmenu', 'controls', 'coords', 'data', 'data-', 'datetime', 'default', 'defer', 'dir', + 'dirname', 'disabled', 'download', 'draggable', 'dropzone', 'enctype', 'for', 'form', 'headers', + 'height', 'hidden', 'high', 'href', 'hreflang', 'http-equiv', 'icon', 'id', 'ismap', 'itemprop', + 'keytype', 'kind', 'label', 'lang', 'language', 'list', 'loop', 'low', 'manifest', 'max', + 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'novalidate', 'open', 'optimum', + 'pattern', 'ping', 'placeholder', 'poster', 'preload', 'pubdate', 'radiogroup', 'readonly', 'rel', + 'required', 'reversed', 'role', 'rows', 'rowspan', 'sandbox', 'scope', 'scoped', 'seamless', + 'selected', 'shape', 'size', 'sizes', 'span', 'spellcheck', 'src', 'srcdoc', 'srclang', 'start', + 'step', 'style', 'summary', 'tabindppex', 'target', 'title', 'type', 'usemap', 'value', 'width', + 'wrap' +}, true) + ((P('data-') + 'aria-') * (lexer.alnum + '-')^1)) +local unknown_attribute = token('unknown_attribute', (lexer.alnum + '-')^1) +local attribute = (known_attribute + unknown_attribute) * #(lexer.space^0 * '=') +lex:add_rule('attribute', attribute) +lex:add_style('attribute', lexer.styles.type) +lex:add_style('unknown_attribute', lexer.styles.type .. {italics = true}) -- Equals. -local equals = token(l.OPERATOR, '=') * in_tag +-- TODO: performance is terrible on large files. +local in_tag = P(function(input, index) + local before = input:sub(1, index - 1) + local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') + if s and e then return s > e and index or nil end + if s then return index end + return input:find('^[^<]->', index) and index or nil +end) + +local equals = token(lexer.OPERATOR, '=') -- * in_tag +-- lex:add_rule('equals', equals) --- Entities. -local entity = token('entity', '&' * (l.any - l.space - ';')^1 * ';') +-- Strings. +local string = #S('\'"') * lexer.last_char_includes('=') * + token(lexer.STRING, lexer.range("'") + lexer.range('"')) +lex:add_rule('string', string) --- Doctype. -local doctype = token('doctype', '<!' * - word_match({'doctype'}, nil, case_insensitive_tags) * - (l.any - '>')^1 * '>') - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'doctype', doctype}, - {'element', element}, - {'tag_close', tag_close}, - {'attribute', attribute}, --- {'equals', equals}, - {'string', string}, - {'number', number}, - {'entity', entity}, -} - -M._tokenstyles = { - element = l.STYLE_KEYWORD, - unknown_element = l.STYLE_KEYWORD..',italics', - attribute = l.STYLE_TYPE, - unknown_attribute = l.STYLE_TYPE..',italics', - entity = l.STYLE_OPERATOR, - doctype = l.STYLE_COMMENT -} - -M._foldsymbols = { - _patterns = {'</?', '/>', '<!%-%-', '%-%->'}, - element = {['<'] = 1, ['/>'] = -1, ['</'] = -1}, - unknown_element = {['<'] = 1, ['/>'] = -1, ['</'] = -1}, - [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1} -} +-- Numbers. +local number = token(lexer.NUMBER, lexer.dec_num * P('%')^-1) +lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * number) -- *in_tag) --- Tags that start embedded languages. -M.embed_start_tag = element * - (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * - ws^0 * tag_close -M.embed_end_tag = element * tag_close - --- Embedded CSS. -local css = l.load('css') -local style_element = word_match({'style'}, nil, case_insensitive_tags) -local css_start_rule = #(P('<') * style_element * - ('>' + P(function(input, index) - if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then - return index - end -end))) * M.embed_start_tag -- <style type="text/css"> -local css_end_rule = #('</' * style_element * ws^0 * '>') * - M.embed_end_tag -- </style> -l.embed_lexer(M, css, css_start_rule, css_end_rule) - --- Embedded JavaScript. -local js = l.load('javascript') -local script_element = word_match({'script'}, nil, case_insensitive_tags) -local js_start_rule = #(P('<') * script_element * - ('>' + P(function(input, index) - if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then - return index - end -end))) * M.embed_start_tag -- <script type="text/javascript"> -local js_end_rule = #('</' * script_element * ws^0 * '>') * - M.embed_end_tag -- </script> -local js_line_comment = '//' * (l.nonnewline_esc - js_end_rule)^0 -local js_block_comment = '/*' * (l.any - '*/' - js_end_rule)^0 * P('*/')^-1 -js._RULES['comment'] = token(l.COMMENT, js_line_comment + js_block_comment) -l.embed_lexer(M, js, js_start_rule, js_end_rule) - --- Embedded CoffeeScript. -local cs = l.load('coffeescript') -local script_element = word_match({'script'}, nil, case_insensitive_tags) -local cs_start_rule = #(P('<') * script_element * P(function(input, index) - if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then - return index +-- Entities. +lex:add_rule('entity', token('entity', '&' * (lexer.any - lexer.space - ';')^1 * ';')) +lex:add_style('entity', lexer.styles.comment) + +-- Fold points. +local function disambiguate_lt(text, pos, line, s) + if line:find('/>', s) then + return 0 + elseif line:find('^</', s) then + return -1 + else + return 1 end -end)) * M.embed_start_tag -- <script type="text/coffeescript"> -local cs_end_rule = #('</' * script_element * ws^0 * '>') * - M.embed_end_tag -- </script> -l.embed_lexer(M, cs, cs_start_rule, cs_end_rule) +end +lex:add_fold_point('element', '<', disambiguate_lt) +lex:add_fold_point('unknown_element', '<', disambiguate_lt) +lex:add_fold_point(lexer.COMMENT, '<!--', '-->') -return M +-- Tags that start embedded languages. +-- Export these patterns for proxy lexers (e.g. ASP) that need them. +lex.embed_start_tag = element * (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * ws^-1 * + tag_close +lex.embed_end_tag = element * tag_close + +-- Embedded CSS (<style type="text/css"> ... </style>). +local css = lexer.load('css') +local style_element = word_match('style', true) +local css_start_rule = #('<' * style_element * ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then return index end +end))) * lex.embed_start_tag +local css_end_rule = #('</' * style_element * ws^-1 * '>') * lex.embed_end_tag +lex:embed(css, css_start_rule, css_end_rule) + +-- Embedded JavaScript (<script type="text/javascript"> ... </script>). +local js = lexer.load('javascript') +local script_element = word_match('script', true) +local js_start_rule = #('<' * script_element * ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then return index end +end))) * lex.embed_start_tag +local js_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag +local js_line_comment = '//' * (lexer.nonnewline - js_end_rule)^0 +local js_block_comment = '/*' * (lexer.any - '*/' - js_end_rule)^0 * P('*/')^-1 +js:modify_rule('comment', token(lexer.COMMENT, js_line_comment + js_block_comment)) +lex:embed(js, js_start_rule, js_end_rule) + +-- Embedded CoffeeScript (<script type="text/coffeescript"> ... </script>). +local cs = lexer.load('coffeescript') +script_element = word_match('script', true) +local cs_start_rule = #('<' * script_element * P(function(input, index) + if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then return index end +end)) * lex.embed_start_tag +local cs_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag +lex:embed(cs, cs_start_rule, cs_end_rule) + +return lex diff --git a/lua/lexers/icon.lua b/lua/lexers/icon.lua @@ -1,78 +1,60 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- LPeg lexer for the Icon programming language. -- http://www.cs.arizona.edu/icon -- Contributed by Carl Sturtivant. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'icon'} +local lex = lexer.new('icon') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) ---Comments -local line_comment = '#' * l.nonnewline_esc^0 -local comment = token(l.COMMENT, line_comment) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'break', 'by', 'case', 'create', 'default', 'do', 'else', 'end', 'every', 'fail', 'global', 'if', + 'initial', 'invocable', 'link', 'local', 'next', 'not', 'of', 'procedure', 'record', 'repeat', + 'return', 'static', 'suspend', 'then', 'to', 'until', 'while' +})) + +-- Icon Keywords: unique to Icon. +lex:add_rule('special_keyword', token('special_keyword', '&' * word_match{ + 'allocated', 'ascii', 'clock', 'collections', 'cset', 'current', 'date', 'dateline', 'digits', + 'dump', 'e', 'error', 'errornumber', 'errortext', 'errorvalue', 'errout', 'fail', 'features', + 'file', 'host', 'input', 'lcase', 'letters', 'level', 'line', 'main', 'null', 'output', 'phi', + 'pi', 'pos', 'progname', 'random', 'regions', 'source', 'storage', 'subject', 'time', 'trace', + 'ucase', 'version' +})) +lex:add_style('special_keyword', lexer.styles.type) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local cset = l.delimited_range("'") -local str = l.delimited_range('"') -local string = token(l.STRING, cset + str) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -- Numbers. -local radix_literal = P('-')^-1 * l.dec_num * S('rR') * l.alnum^1 -local number = token(l.NUMBER, radix_literal + l.float + l.integer) +local radix_literal = P('-')^-1 * lexer.dec_num * S('rR') * lexer.alnum^1 +lex:add_rule('number', token(lexer.NUMBER, radix_literal + lexer.number)) -- Preprocessor. -local preproc_word = word_match{ - 'include', 'line', 'define', 'undef', 'ifdef', 'ifndef', 'else', 'endif', - 'error' -} -local preproc = token(l.PREPROCESSOR, S(' \t')^0 * P('$') * preproc_word) - --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'break', 'by', 'case', 'create', 'default', 'do', 'else', 'end', 'every', - 'fail', 'global', 'if', 'initial', 'invocable', 'link', 'local', 'next', - 'not', 'of', 'procedure', 'record', 'repeat', 'return', 'static', 'suspend', - 'then', 'to', 'until', 'while' -}) - --- Icon Keywords: unique to Icon; use l.TYPE, as Icon is dynamically typed -local type = token(l.TYPE, P('&') * word_match{ - 'allocated', 'ascii', 'clock', 'collections', 'cset', 'current', 'date', - 'dateline', 'digits', 'dump', 'e', 'error', 'errornumber', 'errortext', - 'errorvalue', 'errout', 'fail', 'features', 'file', 'host', 'input', 'lcase', - 'letters', 'level', 'line', 'main', 'null', 'output', 'phi', 'pi', 'pos', - 'progname', 'random', 'regions', 'source', 'storage', 'subject', 'time', - 'trace', 'ucase', 'version' -}) - --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +lex:add_rule('preproc', token(lexer.PREPROCESSOR, '$' * + word_match('define else endif error ifdef ifndef include line undef'))) -- Operators. -local operator = token(l.OPERATOR, S('+-/*%<>~!=^&|?~@:;,.()[]{}')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'comment', comment}, - {'string', string}, - {'number', number}, - {'preproc', preproc}, - {'operator', operator}, -} +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~@:;,.()[]{}'))) -M._foldsymbols = { - _patterns = {'%l+', '#'}, - [l.PREPROCESSOR] = {ifdef = 1, ifndef = 1, endif = -1}, - [l.KEYWORD] = { procedure = 1, ['end'] = -1}, - [l.COMMENT] = {['#'] = l.fold_line_comments('#')} -} +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.KEYWORD, 'procedure', 'end') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#')) -return M +return lex diff --git a/lua/lexers/idl.lua b/lua/lexers/idl.lua @@ -1,68 +1,50 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- IDL LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'idl'} +local lex = lexer.new('idl') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 -local comment = token(l.COMMENT, line_comment + block_comment) +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'abstract', 'attribute', 'case', 'const', 'context', 'custom', 'default', 'enum', 'exception', + 'factory', 'FALSE', 'in', 'inout', 'interface', 'local', 'module', 'native', 'oneway', 'out', + 'private', 'public', 'raises', 'readonly', 'struct', 'support', 'switch', 'TRUE', 'truncatable', + 'typedef', 'union', 'valuetype' +})) --- Strings. -local sq_str = l.delimited_range("'", true) -local dq_str = l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'any', 'boolean', 'char', 'double', 'fixed', 'float', 'long', 'Object', 'octet', 'sequence', + 'short', 'string', 'unsigned', 'ValueBase', 'void', 'wchar', 'wstring' +})) --- Numbers. -local number = token(l.NUMBER, l.float + l.integer) +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Preprocessor. -local preproc_word = word_match{ - 'define', 'undef', 'ifdef', 'ifndef', 'if', 'elif', 'else', 'endif', - 'include', 'warning', 'pragma' -} -local preproc = token(l.PREPROCESSOR, - l.starts_line('#') * preproc_word * l.nonnewline^0) +-- Strings. +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) --- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'abstract', 'attribute', 'case', 'const', 'context', 'custom', 'default', - 'exception', 'enum', 'factory', 'FALSE', 'in', 'inout', 'interface', 'local', - 'module', 'native', 'oneway', 'out', 'private', 'public', 'raises', - 'readonly', 'struct', 'support', 'switch', 'TRUE', 'truncatable', 'typedef', - 'union', 'valuetype' -}) +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) --- Types. -local type = token(l.TYPE, word_match{ - 'any', 'boolean', 'char', 'double', 'fixed', 'float', 'long', 'Object', - 'octet', 'sequence', 'short', 'string', 'unsigned', 'ValueBase', 'void', - 'wchar', 'wstring' -}) +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Preprocessor. +lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * + word_match('define undef ifdef ifndef if elif else endif include warning pragma'))) -- Operators. -local operator = token(l.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}')) - -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'type', type}, - {'identifier', identifier}, - {'string', string}, - {'comment', comment}, - {'number', number}, - {'preprocessor', preproc}, - {'operator', operator}, -} +lex:add_rule('operator', token(lexer.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}'))) -return M +return lex diff --git a/lua/lexers/inform.lua b/lua/lexers/inform.lua @@ -1,97 +1,75 @@ --- Copyright 2010-2017 Jeff Stone. See LICENSE. +-- Copyright 2010-2022 Jeff Stone. See LICENSE. -- Inform LPeg lexer for Scintillua. -- JMS 2010-04-25. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'inform'} +local lex = lexer.new('inform') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local comment = token(l.COMMENT, '!' * l.nonnewline^0) - --- Strings. -local sq_str = l.delimited_range("'") -local dq_str = l.delimited_range('"') -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local inform_hex = '$' * l.xdigit^1 -local inform_bin = '$$' * S('01')^1 -local number = token(l.NUMBER, l.integer + inform_hex + inform_bin) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'Abbreviate', 'Array', 'Attribute', 'Class', 'Constant', 'Default', 'End', - 'Endif', 'Extend', 'Global', 'Ifdef', 'Iffalse', 'Ifndef', 'Ifnot', 'Iftrue', - 'Import', 'Include', 'Link', 'Lowstring', 'Message', 'Object', 'Property', - 'Release', 'Replace', 'Serial', 'StartDaemon', 'Statusline', 'StopDaemon', - 'Switches', 'Verb', 'absent', 'action', 'actor', 'add_to_scope', 'address', - 'additive', 'after', 'and', 'animate', 'article', 'articles', 'before', - 'bold', 'box', 'break', 'cant_go', 'capacity', 'char', 'class', 'child', - 'children', 'clothing', 'concealed', 'container', 'continue', 'creature', - 'daemon', 'deadflag', 'default', 'describe', 'description', 'do', 'door', - 'door_dir', 'door_to', 'd_to', 'd_obj', 'e_to', 'e_obj', 'each_turn', - 'edible', 'else', 'enterable', 'false', 'female', 'first', 'font', 'for', - 'found_in', 'general', 'give', 'grammar', 'has', 'hasnt', 'held', 'if', 'in', - 'in_to', 'in_obj', 'initial', 'inside_description', 'invent', 'jump', 'last', - 'life', 'light', 'list_together', 'location', 'lockable', 'locked', 'male', - 'move', 'moved', 'multi', 'multiexcept', 'multiheld', 'multiinside', 'n_to', - 'n_obj', 'ne_to', 'ne_obj', 'nw_to', 'nw_obj', 'name', 'neuter', 'new_line', - 'nothing', 'notin', 'noun', 'number', 'objectloop', 'ofclass', 'off', 'on', - 'only',