clp

command line syntax highlighter
git clone git://jeskin.net/clp.git
README | Log | Files | Refs | LICENSE

commit cadbad125db5e4f304f7fe57a637b277d7e50353
Author: Jon Eskin <eskinjp@gmail.com>
Date:   Tue, 12 Jul 2022 14:43:53 -0400

initial commit

Diffstat:
A.gitignore | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE | 20++++++++++++++++++++
AMakefile | 42++++++++++++++++++++++++++++++++++++++++++
AREADME.md | 23+++++++++++++++++++++++
ATHIRD-PARTY-NOTICE | 287+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aclp.c | 191+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aconfigure | 336+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainclude/optparse.h | 403+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/clp.lua | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/colors.lua | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/ftdetect.lua | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexer.lua | 1675+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/actionscript.lua | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/ada.lua | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/ansi_c.lua | 154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/antlr.lua | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/apdl.lua | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/apl.lua | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/applescript.lua | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/asm.lua | 486+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/asp.lua | 42++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/autoit.lua | 168+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/awk.lua | 334+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/bash.lua | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/batch.lua | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/bibtex.lua | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/boo.lua | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/caml.lua | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/chuck.lua | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/clojure.lua | 193+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/cmake.lua | 173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/coffeescript.lua | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/container.lua | 7+++++++
Alua/lexers/context.lua | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/cpp.lua | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/crystal.lua | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/csharp.lua | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/css.lua | 166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/cuda.lua | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/dart.lua | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/desktop.lua | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/diff.lua | 44++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/django.lua | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/dmd.lua | 176+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/dockerfile.lua | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/dot.lua | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/dsv.lua | 17+++++++++++++++++
Alua/lexers/eiffel.lua | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/elixir.lua | 123+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/elm.lua | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/erlang.lua | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/fantom.lua | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/faust.lua | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/fennel.lua | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/fish.lua | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/forth.lua | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/fortran.lua | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/fsharp.lua | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/fstab.lua | 569+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/gap.lua | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/gemini.lua | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/gettext.lua | 39+++++++++++++++++++++++++++++++++++++++
Alua/lexers/gherkin.lua | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/git-rebase.lua | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/gleam.lua | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/glsl.lua | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/gnuplot.lua | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/go.lua | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/groovy.lua | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/gtkrc.lua | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/hare.lua | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/haskell.lua | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/html.lua | 162+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/icon.lua | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/idl.lua | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/inform.lua | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/ini.lua | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/io_lang.lua | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/java.lua | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/javascript.lua | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/jq.lua | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/json.lua | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/jsp.lua | 29+++++++++++++++++++++++++++++
Alua/lexers/julia.lua | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/latex.lua | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/ledger.lua | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/less.lua | 27+++++++++++++++++++++++++++
Alua/lexers/lexer.lua | 1675+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/lilypond.lua | 40++++++++++++++++++++++++++++++++++++++++
Alua/lexers/lisp.lua | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/litcoffee.lua | 21+++++++++++++++++++++
Alua/lexers/logtalk.lua | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/lpeg.properties | 282+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/lua.lua | 190+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/makefile.lua | 109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/man.lua | 37+++++++++++++++++++++++++++++++++++++
Alua/lexers/markdown.lua | 109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/matlab.lua | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/mediawiki.lua | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/meson.lua | 161+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/moonscript.lua | 170+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/myrddin.lua | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/nemerle.lua | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/networkd.lua | 274+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/nim.lua | 124+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/nsis.lua | 182+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/null.lua | 6++++++
Alua/lexers/objective_c.lua | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/pascal.lua | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/perl.lua | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/php.lua | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/pico8.lua | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/pike.lua | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/pkgbuild.lua | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/pony.lua | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/powershell.lua | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/prolog.lua | 167+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/props.lua | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/protobuf.lua | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/ps.lua | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/pure.lua | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/python.lua | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rails.lua | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rc.lua | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/reason.lua | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rebol.lua | 129+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rest.lua | 259+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rexx.lua | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rhtml.lua | 29+++++++++++++++++++++++++++++
Alua/lexers/routeros.lua | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rpmspec.lua | 31+++++++++++++++++++++++++++++++
Alua/lexers/rstats.lua | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/ruby.lua | 148+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/rust.lua | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/sass.lua | 32++++++++++++++++++++++++++++++++
Alua/lexers/scala.lua | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/scheme.lua | 236+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/smalltalk.lua | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/sml.lua | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/snobol4.lua | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/spin.lua | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/sql.lua | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/strace.lua | 34++++++++++++++++++++++++++++++++++
Alua/lexers/systemd.lua | 443+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/taskpaper.lua | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/tcl.lua | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/template.txt | 40++++++++++++++++++++++++++++++++++++++++
Alua/lexers/tex.lua | 45+++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/texinfo.lua | 270+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/text.lua | 15+++++++++++++++
Alua/lexers/toml.lua | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/txt2tags.lua | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/typescript.lua | 23+++++++++++++++++++++++
Alua/lexers/vala.lua | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/vb.lua | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/vbscript.lua | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/vcard.lua | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/verilog.lua | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/vhdl.lua | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/wsf.lua | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/xml.lua | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/xs.lua | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/xtend.lua | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/yaml.lua | 110+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/lexers/zig.lua | 130+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alua/theme.lua | 35+++++++++++++++++++++++++++++++++++
Aman/clp.1 | 16++++++++++++++++
167 files changed, 20556 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,55 @@ +main +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf +tags +.DS_Store diff --git a/LICENSE b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2022 Jon Eskin + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile @@ -0,0 +1,42 @@ +-include config.mk + +CFLAGS += -Wall -pedantic +CFLAGS += -I $(CURDIR)/include +CFLAGS += $(CFLAGS_LUA) +CFLAGS += $(CFLAGS_AUTO) +CFLAGS += -DCLP_PATH=\"${SHAREPREFIX}/clp\" +LDFLAGS = $(LDFLAGS_LUA) +LDFLAGS += $(LDFLAGS_AUTO) +SRC = clp.c +ELF = clp + +ALL: $(ELF) + +config.mk: + @touch $@ + +clp: config.mk clp.c + $(CC) $(SRC) $(CFLAGS) $(LDFLAGS) -o clp + +install: $(ELF) + @echo installing executable files to ${DESTDIR}${PREFIX}/bin + @mkdir -p ${DESTDIR}${PREFIX}/bin + cp -f clp ${DESTDIR}${PREFIX}/bin && \ + chmod 755 ${DESTDIR}${PREFIX}/bin/clp; + echo installing support files to ${DESTDIR}${SHAREPREFIX}/clp; \ + mkdir -p ${DESTDIR}${SHAREPREFIX}/clp; \ + cp -r lua/* ${DESTDIR}${SHAREPREFIX}/clp; + @echo installing manual pages to ${DESTDIR}${MANPREFIX}/man1 + @mkdir -p ${DESTDIR}${MANPREFIX}/man1 + sed -e "s/VERSION/${VERSION}/" < "man/clp.1" > \ + "${DESTDIR}${MANPREFIX}/man1/clp.1" && \ + chmod 644 "${DESTDIR}${MANPREFIX}/man1/clp.1"; \ + +uninstall: + @echo removing executable file from ${DESTDIR}${PREFIX}/bin + @rm -f ${DESTDIR}${PREFIX}/bin/clp + @echo removing manual pages from ${DESTDIR}${MANPREFIX}/man1 + @rm -f ${DESTDIR}${MANPREFIX}/man1/clp.1 + +clean: + rm clp diff --git a/README.md b/README.md @@ -0,0 +1,23 @@ +# clp + +clp writes input files to stdout with syntax highlighting. +It aims to be relatively fast, provide wide language support, and be easy +to extend with new languages. It currently supports over 140 languages. + +Language support is implemented with LPEG, a tool developed by PUC which uses +parsing expression grammars to improve upon traditional regex parsers +(described in depth in [this article](http://www.inf.puc-rio.br/~roberto/docs/peg.pdf)). + +## Installation + +``` +$ ./configure +$ make +# make install +``` + +## Usage + +``` +$ clp [-t FILETYPE] filename +``` diff --git a/THIRD-PARTY-NOTICE b/THIRD-PARTY-NOTICE @@ -0,0 +1,287 @@ +clp uses the following third-party libraries. The copyright notices can be +found below. + +If you are a copyright owner of code that I'm using an have a concern, +please create an issue or contact me directly at eskinjp@gmail.com. + +1. vis (https://github.com/martanne/vis) +2. musl (https://git.musl-libc.org/cgit/musl) +3. Scintillua (https://github.com/orbitalquark/scintillua) + + +%% vis NOTICES AND INFORMATION BEGIN HERE +========================================= + +vis as a whole is licensed under the following standard ISC license: + + Copyright © 2014-2020 Marc André Tanner, et al. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Portions of this software are derived from third-party works licensed +under terms compatible with the above ISC license: + + - ./configure is based on the corresponding script from the musl libc + project and is MIT licensed + + - map.[ch] originate from the Comprehensive C Archive Network strmap + module and are public domain / CC0 licensed + + - libutf.[ch] originate from libutf a port of Plan 9's Unicode library + to Unix and are MIT licensed + + - sam.[ch] is heavily inspired (and partially based upon) the X11 + version of Rob Pike's sam text editor originally written for Plan 9 + and distributed under an ISC-like license + + - lua/lexers/* the LPeg based lexers used for syntax highlighting are + imported from the Scintillua project licensed under the MIT license + +Check the individual source files for more specific copyright information. + +Details on authorship of individual files can be found in the git version +control history of the project. The omission of copyright and license +comments in each file is in the interest of source tree size. + + +========================================= +END OF vis NOTICES AND INFORMATION + +%% musl NOTICES AND INFORMATION BEGIN HERE +========================================= + +musl as a whole is licensed under the following standard MIT license: + +---------------------------------------------------------------------- +Copyright © 2005-2020 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +---------------------------------------------------------------------- + +Authors/contributors include: + +A. Wilcox +Ada Worcester +Alex Dowad +Alex Suykov +Alexander Monakov +Andre McCurdy +Andrew Kelley +Anthony G. Basile +Aric Belsito +Arvid Picciani +Bartosz Brachaczek +Benjamin Peterson +Bobby Bingham +Boris Brezillon +Brent Cook +Chris Spiegel +Clément Vasseur +Daniel Micay +Daniel Sabogal +Daurnimator +David Carlier +David Edelsohn +Denys Vlasenko +Dmitry Ivanov +Dmitry V. Levin +Drew DeVault +Emil Renner Berthing +Fangrui Song +Felix Fietkau +Felix Janda +Gianluca Anzolin +Hauke Mehrtens +He X +Hiltjo Posthuma +Isaac Dunham +Jaydeep Patil +Jens Gustedt +Jeremy Huntwork +Jo-Philipp Wich +Joakim Sindholt +John Spencer +Julien Ramseier +Justin Cormack +Kaarle Ritvanen +Khem Raj +Kylie McClain +Leah Neukirchen +Luca Barbato +Luka Perkov +M Farkas-Dyck (Strake) +Mahesh Bodapati +Markus Wichmann +Masanori Ogino +Michael Clark +Michael Forney +Mikhail Kremnyov +Natanael Copa +Nicholas J. Kain +orc +Pascal Cuoq +Patrick Oppenlander +Petr Hosek +Petr Skocik +Pierre Carrier +Reini Urban +Rich Felker +Richard Pennington +Ryan Fairfax +Samuel Holland +Segev Finer +Shiz +sin +Solar Designer +Stefan Kristiansson +Stefan O'Rear +Szabolcs Nagy +Timo Teräs +Trutz Behn +Valentin Ochs +Will Dietz +William Haddon +William Pitcock + +Portions of this software are derived from third-party works licensed +under terms compatible with the above MIT license: + +The TRE regular expression implementation (src/regex/reg* and +src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed +under a 2-clause BSD license (license text in the source files). The +included version has been heavily modified by Rich Felker in 2012, in +the interests of size, simplicity, and namespace cleanliness. + +Much of the math library code (src/math/* and src/complex/*) is +Copyright © 1993,2004 Sun Microsystems or +Copyright © 2003-2011 David Schultz or +Copyright © 2003-2009 Steven G. Kargl or +Copyright © 2003-2009 Bruce D. Evans or +Copyright © 2008 Stephen L. Moshier or +Copyright © 2017-2018 Arm Limited +and labelled as such in comments in the individual source files. All +have been licensed under extremely permissive terms. + +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 +The Android Open Source Project and is licensed under a two-clause BSD +license. It was taken from Bionic libc, used on Android. + +The AArch64 memcpy and memset code (src/string/aarch64/*) are +Copyright © 1999-2019, Arm Limited. + +The implementation of DES for crypt (src/crypt/crypt_des.c) is +Copyright © 1994 David Burren. It is licensed under a BSD license. + +The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was +originally written by Solar Designer and placed into the public +domain. The code also comes with a fallback permissive license for use +in jurisdictions that may not recognize the public domain. + +The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 +Valentin Ochs and is licensed under an MIT-style license. + +The x86_64 port was written by Nicholas J. Kain and is licensed under +the standard MIT terms. + +The mips and microblaze ports were originally written by Richard +Pennington for use in the ellcc project. The original code was adapted +by Rich Felker for build system and code conventions during upstream +integration. It is licensed under the standard MIT terms. + +The mips64 port was contributed by Imagination Technologies and is +licensed under the standard MIT terms. + +The powerpc port was also originally written by Richard Pennington, +and later supplemented and integrated by John Spencer. It is licensed +under the standard MIT terms. + +All other files which have no copyright comments are original works +produced specifically for use as part of this library, written either +by Rich Felker, the main author of the library, or by one or more +contibutors listed above. Details on authorship of individual files +can be found in the git version control history of the project. The +omission of copyright and license comments in each file is in the +interest of source tree size. + +In addition, permission is hereby granted for all public header files +(include/* and arch/*/bits/*) and crt files intended to be linked into +applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit +the copyright notice and permission notice otherwise required by the +license, and to use these files without any requirement of +attribution. These files include substantial contributions from: + +Bobby Bingham +John Spencer +Nicholas J. Kain +Rich Felker +Richard Pennington +Stefan Kristiansson +Szabolcs Nagy + +all of whom have explicitly granted such permission. + +This file previously contained text expressing a belief that most of +the files covered by the above exception were sufficiently trivial not +to be subject to copyright, resulting in confusion over whether it +negated the permissions granted in the license. In the spirit of +permissive licensing, and of not having licensing issues being an +obstacle to adoption, that text has been removed. + +========================================= +END OF musl NOTICES AND INFORMATION + + +%% Scintillua NOTICES AND INFORMATION BEGIN HERE +========================================= + +The MIT License + +Copyright (c) 2007-2022 Mitchell + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +========================================= +END OF Scintillua NOTICES AND INFORMATION diff --git a/clp.c b/clp.c @@ -0,0 +1,191 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <pwd.h> +#include <libgen.h> +#include <signal.h> + +#define OPTPARSE_IMPLEMENTATION +#include "optparse.h" + +#ifndef PATHMAX +#define PATHMAX 4096 +#endif + +#ifndef CLP_PATH +#define CLP_PATH "/usr/local/share/clp" +#endif + +void bail(lua_State *L, char *msg) { + fprintf(stderr, "\nFATAL ERROR:\n %s: %s\n\n", + msg, lua_tostring(L, -1)); + exit(1); +} + +int print_lua_path(lua_State* L) { + lua_getglobal( L, "package" ); + lua_getfield( L, -1, "path" ); + const char *cur_path; + cur_path = lua_tostring(L, -1); + printf("%s\n",cur_path); + return 0; +} + +bool lua_path_add(lua_State *L, const char *path) { + if (!L || !path) + return false; + lua_getglobal(L, "package"); + lua_pushstring(L, path); + lua_pushstring(L, "/?.lua;"); + lua_getfield(L, -3, "path"); + lua_concat(L, 3); + lua_setfield(L, -2, "path"); + lua_pop(L, 1); /* package */ + return true; +} + +bool lua_paths_get(lua_State *L, char **lpath, char **cpath) { + if (!L) + return false; + const char *s; + lua_getglobal(L, "package"); + lua_getfield(L, -1, "path"); + s = lua_tostring(L, -1); + *lpath = s ? strdup(s) : NULL; + lua_getfield(L, -2, "cpath"); + s = lua_tostring(L, -1); + *cpath = s ? strdup(s) : NULL; + return true; +} + +static bool package_exists(lua_State *L, const char *name) { + const char lua[] = + "local name = ...\n" + "for _, searcher in ipairs(package.searchers or package.loaders) do\n" + "local loader = searcher(name)\n" + "if type(loader) == 'function' then\n" + "return true\n" + "end\n" + "end\n" + "return false\n"; + if (luaL_loadstring(L, lua) != LUA_OK) + return false; + lua_pushstring(L, name); + /* an error indicates package exists */ + bool ret = lua_pcall(L, 1, 1, 0) != LUA_OK || lua_toboolean(L, -1); + lua_pop(L, 1); + return ret; +} + +int main(int argc, char *argv[]) { + struct optparse_long longopts[] = { + {"override-filetype", 't', OPTPARSE_OPTIONAL}, + {0}}; + + lua_State *L = luaL_newstate(); + if (!L) { + return -1; + } + + luaL_openlibs(L); + + char path[PATH_MAX]; + lua_path_add(L, CLP_PATH); + + const char *home = getenv("HOME"); + if (!home || !*home) { + struct passwd *pw = getpwuid(getuid()); + if (pw) + home = pw->pw_dir; + } + + lua_path_add(L, CLP_PATH); + + // TODO: make this the install dir + /* lua_path_add(L, "/Users/jon/Development/c/clp"); */ + /* lua_path_add(L, "/Users/jon/Development/c/clp/lua"); */ + /* lua_path_add(L, "/Users/jon/Development/c/clp/lexers"); */ + + + + const char *xdg_config = getenv("XDG_CONFIG_HOME"); + if (xdg_config) { + snprintf(path, sizeof path, "%s/clp", xdg_config); + lua_path_add(L, path); + } + else if (home && *home) { + snprintf(path, sizeof path, "%s/.config/clp", home); + lua_path_add(L, path); + } + + ssize_t len = readlink("/proc/self/exe", path, sizeof(path) - 1); + if (len > 0) { + path[len] = '\0'; + /* some idotic dirname(3) implementations return pointers to statically + * allocated memory, hence we use memmove to copy it back */ + char *dir = dirname(path); + if (dir) { + size_t len = strlen(dir) + 1; + if (len < sizeof(path) - sizeof("/lua")) { + memmove(path, dir, len); + strcat(path, "/lua"); + lua_path_add(L, path); + } + } + } + + int ret = 0; + int status = 0; + if (!package_exists(L, "clp")) { + printf("WARNING: failed to load clp.lua\n"); + exit(1); + } + lua_getglobal(L, "require"); + lua_pushstring(L, "clp"); + status = lua_pcall(L, 1, 1, 0); + if (status != 0) + fprintf(stderr, "%s\n", lua_tostring(L, -1)); + + int option = 0; + char *filename; + char *filetype_override; + struct optparse options; + + optparse_init(&options, argv); + while ((option = optparse_long(&options, longopts, NULL)) != -1) { + switch (option) { + case 't': + lua_pushliteral(L, "filetype_override"); + filetype_override = options.optarg; + lua_pushstring(L, filetype_override); + lua_settable(L, -3); + break; + } + } + + filename = optparse_arg(&options); + + if (!filename) { + printf("Usage: clp [--filetype_override] file"); + return 1; + } + lua_getglobal(L, "write"); + lua_newtable(L); + lua_pushliteral(L, "filename"); + lua_pushstring(L, filename); + lua_settable(L, -3); + + ret = lua_pcall(L, 1, 0, 0); + if (ret != 0) { + fprintf(stderr, "%s\n", lua_tostring(L, -1)); + return 1; + } + lua_close(L); + return 0; +} diff --git a/configure b/configure @@ -0,0 +1,336 @@ +#!/bin/sh +# Based on the configure script from musl libc, MIT licensed + +usage () { +cat <<EOF +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + --srcdir=DIR source directory [detected] + +Installation directories: + --prefix=PREFIX main installation prefix [/usr/local] + --exec-prefix=EPREFIX installation prefix for executable files [PREFIX] + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sharedir=DIR share directories [PREFIX/share] + --docdir=DIR misc. documentation [PREFIX/share/doc] + --mandir=DIR man pages [PREFIX/share/man] + +Some influential environment variables: + CC C compiler command [detected] + CFLAGS C compiler flags [-Os -pipe ...] + LDFLAGS Linker flags + +Use these variables to override the choices made by configure. + +EOF +exit 0 +} + +# helper functions + + +quote () { +tr '\n' ' ' <<EOF | grep '^[-[:alnum:]_=,./:]* $' >/dev/null 2>&1 && { echo "$1" ; return 0 ; } +$1 +EOF +printf %s\\n "$1" | sed -e "s/'/'\\\\''/g" -e "1s/^/'/" -e "\$s/\$/'/" -e "s#^'\([-[:alnum:]_,./:]*\)=\(.*\)\$#\1='\2#" +} +echo () { printf "%s\n" "$*" ; } +fail () { echo "$*" ; exit 1 ; } +fnmatch () { eval "case \"\$2\" in $1) return 0 ;; *) return 1 ;; esac" ; } +cmdexists () { type "$1" >/dev/null 2>&1 ; } +trycc () { test -z "$CC" && cmdexists "$1" && CC=$1 ; } + +stripdir () { +while eval "fnmatch '*/' \"\${$1}\"" ; do eval "$1=\${$1%/}" ; done +} + +# maybe delete +tryflag () { +printf "checking whether compiler accepts %s... " "$2" +echo "typedef int x;" > "$tmpc" +if $CC $CFLAGS_TRY $2 -c -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then +printf "yes\n" +eval "$1=\"\${$1} \$2\"" +eval "$1=\${$1# }" +return 0 +else +printf "no\n" +return 1 +fi +} + +# maybe delete +tryldflag () { +printf "checking whether linker accepts %s... " "$2" +echo "typedef int x;" > "$tmpc" +if $CC $LDFLAGS_TRY -nostdlib -shared "$2" -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then +printf "yes\n" +eval "$1=\"\${$1} \$2\"" +eval "$1=\${$1# }" +return 0 +else +printf "no\n" +return 1 +fi +} + + +# start of configure + +CFLAGS_AUTO= +CFLAGS_TRY= +LDFLAGS_AUTO= +LDFLAGS_TRY= +SRCDIR= +PREFIX=/usr/local +EXEC_PREFIX='$(PREFIX)' +BINDIR='$(EXEC_PREFIX)/bin' +SHAREDIR='$(PREFIX)/share' +DOCDIR='$(PREFIX)/share/doc' +MANDIR='$(PREFIX)/share/man' + +for arg ; do +case "$arg" in +--srcdir=*) SRCDIR=${arg#*=} ;; +--prefix=*) PREFIX=${arg#*=} ;; +--exec-prefix=*) EXEC_PREFIX=${arg#*=} ;; +--bindir=*) BINDIR=${arg#*=} ;; +--sharedir=*) SHAREDIR=${arg#*=} ;; +--docdir=*) DOCDIR=${arg#*=} ;; +--mandir=*) MANDIR=${arg#*=} ;; +--environment-only) environmentonly=yes ;; +--enable-*|--disable-*|--with-*|--without-*|--*dir=*|--build=*) ;; +-* ) echo "$0: unknown option $arg" ;; +CC=*) CC=${arg#*=} ;; +CFLAGS=*) CFLAGS=${arg#*=} ;; +CPPFLAGS=*) CPPFLAGS=${arg#*=} ;; +LDFLAGS=*) LDFLAGS=${arg#*=} ;; +*=*) ;; +*) ;; +esac +done + +# CC=cc + +for i in SRCDIR PREFIX EXEC_PREFIX BINDIR SHAREDIR DOCDIR MANDIR ; do +stripdir $i +done + + +have_pkgconfig=no +printf "checking for pkg-config... " +cmdexists pkg-config && have_pkgconfig=yes +printf "%s\n" "$have_pkgconfig" + + +# +# Get the source dir for out-of-tree builds +# +if test -z "$SRCDIR" ; then +SRCDIR="${0%/configure}" +stripdir SRCDIR +fi +abs_builddir="$(pwd)" || fail "$0: cannot determine working directory" +abs_srcdir="$(cd $SRCDIR && pwd)" || fail "$0: invalid source directory $SRCDIR" +test "$abs_srcdir" = "$abs_builddir" && SRCDIR=. +test "$SRCDIR" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory" + +# +# Get a temp filename we can use +# +i=0 +set -C +while : ; do i=$(($i+1)) +tmpc="./conf$$-$PPID-$i.c" +tmpo="./conf$$-$PPID-$i.o" +2>|/dev/null > "$tmpc" && break +test "$i" -gt 50 && fail "$0: cannot create temporary file $tmpc" +done +set +C +trap 'rm -f "$tmpc" "$tmpo"' EXIT QUIT TERM HUP +trap 'rm -f "$tmpc" "$tmpo" && echo && fail "$0: interrupted"' INT + +# +# Find a C compiler to use +# +printf "checking for C compiler... " +trycc cc +trycc gcc +trycc clang +printf "after checking %s\n" "$CC" +test -n "$CC" || { echo "$0: cannot find a C compiler" ; exit 1 ; } + +printf "checking whether C compiler works... " +echo "typedef int x;" > "$tmpc" +if output=$($CC $CPPFLAGS $CFLAGS -c -o "$tmpo" "$tmpc" 2>&1) ; then +printf "yes\n" +else +printf "no; compiler output follows:\n%s\n" "$output" +exit 1 +fi + +tryflag CFLAGS -pipe + +# Try flags to optimize binary size +tryflag CFLAGS -Wall +tryflag CFLAGS -O2 +tryflag CFLAGS -ffunction-sections +tryflag CFLAGS -fdata-sections +tryldflag LDFLAGS_AUTO -Wl,--gc-sections + + +printf "creating config.mk... " +printf "CC is now %s\n" "$CC" + +cmdline=$(quote "$0") +for i ; do cmdline="$cmdline $(quote "$i")" ; done + +exec 3>&1 1>config.mk + +cat << EOF +# This version of config.mk was generated by: +# $cmdline +# Any changes made here will be lost if configure is re-run +SRCDIR = $SRCDIR +PREFIX = $PREFIX +EXEC_PREFIX = $EXEC_PREFIX +BINDIR = $BINDIR +DOCPREFIX = $DOCDIR +MANPREFIX = $MANDIR +SHAREPREFIX = $SHAREDIR +CC = $CC +CFLAGS = $CFLAGS +LDFLAGS = $LDFLAGS +CFLAGS_STD = $CFLAGS_STD +LDFLAGS_STD = $LDFLAGS_STD +CFLAGS_AUTO = $CFLAGS_AUTO +LDFLAGS_AUTO = $LDFLAGS_AUTO +CFLAGS_DEBUG = -U_FORTIFY_SOURCE -UNDEBUG -O0 -g3 -ggdb -Wall -Wextra -pedantic -Wno-missing-field-initializers -Wno-unused-parameter +EOF +exec 1>&3 3>&- + +printf "done\n" + +printf "checking for liblua >= 5.1 ...\n" + +cat > "$tmpc" <<EOF +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> + +#if LUA_VERSION_NUM < 501 +#error "Need at least Lua 5.1" +#endif + +int main(int argc, char *argv[]) { + lua_State *L = luaL_newstate(); + luaL_openlibs(L); + lua_close(L); + return 0; +} +EOF + +have_lua="no" +for liblua in luajit lua lua5.4 lua5.3 lua5.2 lua-5.3 lua-5.2 lua54 lua53 lua52; do +# for liblua in lua lua5.4 lua5.3 lua5.2 lua-5.3 lua-5.2 lua54 lua53 lua52; do + printf " checking for %s... " "$liblua" + + if test "$have_pkgconfig" = "yes" ; then + CFLAGS_LUA=$(pkg-config --cflags $liblua 2>/dev/null) + LDFLAGS_LUA=$(pkg-config --libs $liblua 2>/dev/null) + if test $? -eq 0 && $CC $CFLAGS $CFLAGS_LUA "$tmpc" \ + $LDFLAGS $LDFLAGS_LUA -o "$tmpo" >/dev/null 2>&1 ; then + have_lua="yes" + printf "yes\n" + break + fi + fi + + CFLAGS_LUA="" + LDFLAGS_LUA="-l$liblua -lm -ldl" + + if $CC $CFLAGS $CFLAGS_LUA "$tmpc" \ + $LDFLAGS $LDFLAGS_LUA -o "$tmpo" >/dev/null 2>&1 ; then + have_lua="yes" + printf "yes\n" + break + else + printf "no\n" + fi +done + +test have_lua = "yes" && fail "$0: cannot find liblua" + +# if $liblua = "luajit" + # CFLAGS_LUA += -Wl,-E +# fi + +have_lpeg="no" +printf "checking for lpeg..." + +cat > "$tmpc" <<EOF +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> +#include <assert.h> + +int main() { + lua_State *L = luaL_newstate(); + luaL_openlibs(L); + int reqRes = luaL_dostring(L, "local t=require('lpeg') return (t~=nil)"); + assert(reqRes == 0); + + return 0; +} +EOF + +if test "$have_pkgconfig" = "yes" ; then + if test $? -eq 0 && $CC $CFLAGS $CFLAGS_LUA "$tmpc" \ + $LDFLAGS $LDFLAGS_LUA -o "$tmpo" >/dev/null 2>&1 ; then + printf "yes\n" + have_lpeg="yes" + break + else + if $CC $CFLAGS $CFLAGS_LUA "$tmpc" \ + $LDFLAGS $LDFLAGS_LUA -o "$tmpo" >/dev/null 2>&1 ; then + printf "yes\n" + have_lpeg="yes" + break + else + printf "no\n" + fi + fi +fi + +test have_lpeg = "yes" && fail "$0: cannot find lpeg" + +printf "completing config.mk... " + +exec 3>&1 1>>config.mk + +cat << EOF +CONFIG_HELP = $CONFIG_HELP +CONFIG_LUA = $CONFIG_LUA +CFLAGS_LUA = $CFLAGS_LUA +LDFLAGS_LUA = $LDFLAGS_LUA +CONFIG_LPEG = $CONFIG_LPEG +CFLAGS_LPEG = $CFLAGS_LPEG +LDFLAGS_LPEG = $LDFLAGS_LPEG +EOF +exec 1>&3 3>&- + +test "$SRCDIR" = "." || ln -sf $SRCDIR/Makefile . + +printf "CC is now %s\n" "$CC" + +printf "done\n" + diff --git a/include/optparse.h b/include/optparse.h @@ -0,0 +1,403 @@ +/* Optparse --- portable, reentrant, embeddable, getopt-like option parser + * + * This is free and unencumbered software released into the public domain. + * + * To get the implementation, define OPTPARSE_IMPLEMENTATION. + * Optionally define OPTPARSE_API to control the API's visibility + * and/or linkage (static, __attribute__, __declspec). + * + * The POSIX getopt() option parser has three fatal flaws. These flaws + * are solved by Optparse. + * + * 1) Parser state is stored entirely in global variables, some of + * which are static and inaccessible. This means only one thread can + * use getopt(). It also means it's not possible to recursively parse + * nested sub-arguments while in the middle of argument parsing. + * Optparse fixes this by storing all state on a local struct. + * + * 2) The POSIX standard provides no way to properly reset the parser. + * This means for portable code that getopt() is only good for one + * run, over one argv with one option string. It also means subcommand + * options cannot be processed with getopt(). Most implementations + * provide a method to reset the parser, but it's not portable. + * Optparse provides an optparse_arg() function for stepping over + * subcommands and continuing parsing of options with another option + * string. The Optparse struct itself can be passed around to + * subcommand handlers for additional subcommand option parsing. A + * full reset can be achieved by with an additional optparse_init(). + * + * 3) Error messages are printed to stderr. This can be disabled with + * opterr, but the messages themselves are still inaccessible. + * Optparse solves this by writing an error message in its errmsg + * field. The downside to Optparse is that this error message will + * always be in English rather than the current locale. + * + * Optparse should be familiar with anyone accustomed to getopt(), and + * it could be a nearly drop-in replacement. The option string is the + * same and the fields have the same names as the getopt() global + * variables (optarg, optind, optopt). + * + * Optparse also supports GNU-style long options with optparse_long(). + * The interface is slightly different and simpler than getopt_long(). + * + * By default, argv is permuted as it is parsed, moving non-option + * arguments to the end. This can be disabled by setting the `permute` + * field to 0 after initialization. + */ +#ifndef OPTPARSE_H +#define OPTPARSE_H + +#ifndef OPTPARSE_API +# define OPTPARSE_API +#endif + +struct optparse { + char **argv; + int permute; + int optind; + int optopt; + char *optarg; + char errmsg[64]; + int subopt; +}; + +enum optparse_argtype { + OPTPARSE_NONE, + OPTPARSE_REQUIRED, + OPTPARSE_OPTIONAL +}; + +struct optparse_long { + const char *longname; + int shortname; + enum optparse_argtype argtype; +}; + +/** + * Initializes the parser state. + */ +OPTPARSE_API +void optparse_init(struct optparse *options, char **argv); + +/** + * Read the next option in the argv array. + * @param optstring a getopt()-formatted option string. + * @return the next option character, -1 for done, or '?' for error + * + * Just like getopt(), a character followed by no colons means no + * argument. One colon means the option has a required argument. Two + * colons means the option takes an optional argument. + */ +OPTPARSE_API +int optparse(struct optparse *options, const char *optstring); + +/** + * Handles GNU-style long options in addition to getopt() options. + * This works a lot like GNU's getopt_long(). The last option in + * longopts must be all zeros, marking the end of the array. The + * longindex argument may be NULL. + */ +OPTPARSE_API +int optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex); + +/** + * Used for stepping over non-option arguments. + * @return the next non-option argument, or NULL for no more arguments + * + * Argument parsing can continue with optparse() after using this + * function. That would be used to parse the options for the + * subcommand returned by optparse_arg(). This function allows you to + * ignore the value of optind. + */ +OPTPARSE_API +char *optparse_arg(struct optparse *options); + +/* Implementation */ +#ifdef OPTPARSE_IMPLEMENTATION + +#define OPTPARSE_MSG_INVALID "invalid option" +#define OPTPARSE_MSG_MISSING "option requires an argument" +#define OPTPARSE_MSG_TOOMANY "option takes no arguments" + +static int +optparse_error(struct optparse *options, const char *msg, const char *data) +{ + unsigned p = 0; + const char *sep = " -- '"; + while (*msg) + options->errmsg[p++] = *msg++; + while (*sep) + options->errmsg[p++] = *sep++; + while (p < sizeof(options->errmsg) - 2 && *data) + options->errmsg[p++] = *data++; + options->errmsg[p++] = '\''; + options->errmsg[p++] = '\0'; + return '?'; +} + +OPTPARSE_API +void +optparse_init(struct optparse *options, char **argv) +{ + options->argv = argv; + options->permute = 1; + options->optind = argv[0] != 0; + options->subopt = 0; + options->optarg = 0; + options->errmsg[0] = '\0'; +} + +static int +optparse_is_dashdash(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] == '\0'; +} + +static int +optparse_is_shortopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] != '-' && arg[1] != '\0'; +} + +static int +optparse_is_longopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] != '\0'; +} + +static void +optparse_permute(struct optparse *options, int index) +{ + char *nonoption = options->argv[index]; + int i; + for (i = index; i < options->optind - 1; i++) + options->argv[i] = options->argv[i + 1]; + options->argv[options->optind - 1] = nonoption; +} + +static int +optparse_argtype(const char *optstring, char c) +{ + int count = OPTPARSE_NONE; + if (c == ':') + return -1; + for (; *optstring && c != *optstring; optstring++); + if (!*optstring) + return -1; + if (optstring[1] == ':') + count += optstring[2] == ':' ? 2 : 1; + return count; +} + +OPTPARSE_API +int +optparse(struct optparse *options, const char *optstring) +{ + int type; + char *next; + char *option = options->argv[options->optind]; + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (!optparse_is_shortopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse(options, optstring); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + option += options->subopt + 1; + options->optopt = option[0]; + type = optparse_argtype(optstring, option[0]); + next = options->argv[options->optind + 1]; + switch (type) { + case -1: { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optind++; + return optparse_error(options, OPTPARSE_MSG_INVALID, str); + } + case OPTPARSE_NONE: + if (option[1]) { + options->subopt++; + } else { + options->subopt = 0; + options->optind++; + } + return option[0]; + case OPTPARSE_REQUIRED: + options->subopt = 0; + options->optind++; + if (option[1]) { + options->optarg = option + 1; + } else if (next != 0) { + options->optarg = next; + options->optind++; + } else { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optarg = 0; + return optparse_error(options, OPTPARSE_MSG_MISSING, str); + } + return option[0]; + case OPTPARSE_OPTIONAL: + options->subopt = 0; + options->optind++; + if (option[1]) + options->optarg = option + 1; + else + options->optarg = 0; + return option[0]; + } + return 0; +} + +OPTPARSE_API +char * +optparse_arg(struct optparse *options) +{ + char *option = options->argv[options->optind]; + options->subopt = 0; + if (option != 0) + options->optind++; + return option; +} + +static int +optparse_longopts_end(const struct optparse_long *longopts, int i) +{ + return !longopts[i].longname && !longopts[i].shortname; +} + +static void +optparse_from_long(const struct optparse_long *longopts, char *optstring) +{ + char *p = optstring; + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + if (longopts[i].shortname && longopts[i].shortname < 127) { + int a; + *p++ = longopts[i].shortname; + for (a = 0; a < (int)longopts[i].argtype; a++) + *p++ = ':'; + } + } + *p = '\0'; +} + +/* Unlike strcmp(), handles options containing "=". */ +static int +optparse_longopts_match(const char *longname, const char *option) +{ + const char *a = option, *n = longname; + if (longname == 0) + return 0; + for (; *a && *n && *a != '='; a++, n++) + if (*a != *n) + return 0; + return *n == '\0' && (*a == '\0' || *a == '='); +} + +/* Return the part after "=", or NULL. */ +static char * +optparse_longopts_arg(char *option) +{ + for (; *option && *option != '='; option++); + if (*option == '=') + return option + 1; + else + return 0; +} + +static int +optparse_long_fallback(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int result; + char optstring[96 * 3 + 1]; /* 96 ASCII printable characters */ + optparse_from_long(longopts, optstring); + result = optparse(options, optstring); + if (longindex != 0) { + *longindex = -1; + if (result != -1) { + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) + if (longopts[i].shortname == options->optopt) + *longindex = i; + } + } + return result; +} + +OPTPARSE_API +int +optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int i; + char *option = options->argv[options->optind]; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (optparse_is_shortopt(option)) { + return optparse_long_fallback(options, longopts, longindex); + } else if (!optparse_is_longopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse_long(options, longopts, longindex); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + + /* Parse as long option. */ + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + option += 2; /* skip "--" */ + options->optind++; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + const char *name = longopts[i].longname; + if (optparse_longopts_match(name, option)) { + char *arg; + if (longindex) + *longindex = i; + options->optopt = longopts[i].shortname; + arg = optparse_longopts_arg(option); + if (longopts[i].argtype == OPTPARSE_NONE && arg != 0) { + return optparse_error(options, OPTPARSE_MSG_TOOMANY, name); + } if (arg != 0) { + options->optarg = arg; + } else if (longopts[i].argtype == OPTPARSE_REQUIRED) { + options->optarg = options->argv[options->optind]; + if (options->optarg == 0) + return optparse_error(options, OPTPARSE_MSG_MISSING, name); + else + options->optind++; + } + return options->optopt; + } + } + return optparse_error(options, OPTPARSE_MSG_INVALID, option); +} + +#endif /* OPTPARSE_IMPLEMENTATION */ +#endif /* OPTPARSE_H */ diff --git a/lua/clp.lua b/lua/clp.lua @@ -0,0 +1,53 @@ +clp = {} + +local theme = require('theme') +local ftdetect = require('ftdetect') +local lexers = require('lexer') +local default_theme = theme.default_theme +local selected_theme = theme.selected_theme + +function write(args) + local filename = args.filename + local filetype_override = args.filetype_override + local syntax + if filetype_override ~= nil then + syntax = filetype_override + else + syntax = ftdetect.lookup_lexer(filename) + end + + local lexer = lexers.load(syntax) + if not lexer then + print(string.format('Failed to load lexer: `%s`', syntax)) + return 1 + end + + local file = assert(io.open(filename, 'r')) + local text = file:read('*all') + local tokens = lexer:lex(text, 1) + local token_start = 1 + local last = '' + + for i = 1, #tokens, 2 do + local token_end = tokens[i + 1] - 1 + local name = tokens[i] + + local style = default_theme[name] + if style ~= nil then + -- Whereas the lexer reports all other syntaxes over + -- the entire span of a token, it reports 'default' + -- byte-by-byte. We emit only the first 'default' of + -- a series in order to properly display multibyte + -- UTF-8 characters. + if not (last == 'default' and name == 'default') then + io.write(tostring(style)) + end + last = name + end + io.write(text:sub(token_start, token_end)) + token_start = token_end + 1 + end + file:close() +end + +return clp diff --git a/lua/colors.lua b/lua/colors.lua @@ -0,0 +1,83 @@ +-- Copyright (c) 2009 Rob Hoelz <rob@hoelzro.net> +-- +-- Permission is hereby granted, free of charge, to any person obtaining a copy +-- of this software and associated documentation files (the "Software"), to deal +-- in the Software without restriction, including without limitation the rights +-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +-- copies of the Software, and to permit persons to whom the Software is +-- furnished to do so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in +-- all copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +-- THE SOFTWARE. + +local pairs = pairs +local tostring = tostring +local setmetatable = setmetatable +local schar = string.char + +local colors = {} + +local colormt = {} + +function colormt:__tostring() + return self.value +end + +function colormt:__concat(other) + return tostring(self) .. tostring(other) +end + +function colormt:__call(s) + return self .. s .. colors.reset +end + +local function makecolor(value) + return setmetatable({ value = schar(27) .. '[' .. tostring(value) .. 'm' }, colormt) +end + +local colorvalues = { + -- attributes + reset = 0, + clear = 0, + default = 0, + bright = 1, + dim = 2, + underscore = 4, + blink = 5, + reverse = 7, + hidden = 8, + + -- foreground + black = 30, + red = 31, + green = 32, + yellow = 33, + blue = 34, + magenta = 35, + cyan = 36, + white = 37, + + -- background + onblack = 40, + onred = 41, + ongreen = 42, + onyellow = 43, + onblue = 44, + onmagenta = 45, + oncyan = 46, + onwhite = 47, +} + +for c, v in pairs(colorvalues) do + colors[c] = makecolor(v) +end + +return colors diff --git a/lua/ftdetect.lua b/lua/ftdetect.lua @@ -0,0 +1,485 @@ +ftdetect = {} + +ftdetect.filetypes = { + actionscript = { + ext = { "%.as$", "%.asc$" }, + }, + ada = { + ext = { "%.adb$", "%.ads$" }, + }, + ansi_c = { + ext = { "%.c$", "%.C$", "%.h$" }, + mime = { "text/x-c" }, + }, + antlr = { + ext = { "%.g$", "%.g4$" }, + }, + apdl = { + ext = { "%.ans$", "%.inp$", "%.mac$" }, + }, + apl = { + ext = { "%.apl$" } + }, + applescript = { + ext = { "%.applescript$" }, + }, + asm = { + ext = { "%.asm$", "%.ASM$", "%.s$", "%.S$" }, + }, + asp = { + ext = { "%.asa$", "%.asp$", "%.hta$" }, + }, + autoit = { + ext = { "%.au3$", "%.a3x$" }, + }, + awk = { + hashbang = { "^/usr/bin/[mng]awk%s+%-f" }, + utility = { "^[mgn]?awk$", "^goawk$" }, + ext = { "%.awk$" }, + }, + bash = { + utility = { "^[db]ash$", "^sh$","^t?csh$","^zsh$" }, + ext = { "%.bash$", "%.csh$", "%.sh$", "%.zsh$" ,"^APKBUILD$", "%.ebuild$", "^.bashrc$", "^.bash_profile$" }, + mime = { "text/x-shellscript", "application/x-shellscript" }, + }, + batch = { + ext = { "%.bat$", "%.cmd$" }, + }, + bibtex = { + ext = { "%.bib$" }, + }, + boo = { + ext = { "%.boo$" }, + }, + caml = { + ext = { "%.caml$", "%.ml$", "%.mli$", "%.mll$", "%.mly$" }, + }, + chuck = { + ext = { "%.ck$" }, + }, + clojure = { + ext = { "%.clj$", "%.cljc$", "%.cljs$", "%.edn$" } + }, + cmake = { + ext = { "%.cmake$", "%.cmake.in$", "%.ctest$", "%.ctest.in$" }, + }, + coffeescript = { + ext = { "%.coffee$" }, + mime = { "text/x-coffee" }, + }, + cpp = { + ext = { "%.cpp$", "%.cxx$", "%.c++$", "%.cc$", "%.hh$", "%.hpp$", "%.hxx$", "%.h++$" }, + mime = { "text/x-c++" }, + }, + crontab = { + ext = { "^crontab.*$" }, + cmd = { "set savemethod inplace" }, + }, + crystal = { + ext = { "%.cr$" }, + }, + csharp = { + ext = { "%.cs$" }, + }, + css = { + ext = { "%.css$" }, + mime = { "text/x-css" }, + }, + cuda = { + ext = { "%.cu$", "%.cuh$" }, + }, + dart = { + ext = { "%.dart$" }, + }, + desktop = { + ext = { "%.desktop$" }, + }, + diff = { + ext = { "%.diff$", "%.patch$", "%.rej$", "^COMMIT_EDITMSG$" }, + cmd = { "set colorcolumn 72" }, + }, + dmd = { + ext = { "%.d$", "%.di$" }, + }, + dockerfile = { + ext = { "^Dockerfile$", "%.Dockerfile$" }, + }, + dot = { + ext = { "%.dot$" }, + }, + dsv = { + ext = { "^group$", "^gshadow$", "^passwd$", "^shadow$" }, + }, + eiffel = { + ext = { "%.e$", "%.eif$" }, + }, + elixir = { + ext = { "%.ex$", "%.exs$" }, + }, + elm = { + ext = { "%.elm$" }, + }, + erlang = { + ext = { "%.erl$", "%.hrl$" }, + }, + fantom = { + ext = { "%.fan$" }, + }, + faust = { + ext = { "%.dsp$" }, + }, + fennel = { + ext = { "%.fnl$" }, + }, + fish = { + utility = { "^fish$" }, + ext = { "%.fish$" }, + }, + forth = { + ext = { "%.forth$", "%.frt$", "%.fs$", "%.fth$" }, + }, + fortran = { + ext = { "%.f$", "%.for$", "%.ftn$", "%.fpp$", "%.f77$", "%.f90$", "%.f95$", "%.f03$", "%.f08$" }, + }, + fsharp = { + ext = { "%.fs$" }, + }, + fstab = { + ext = { "fstab" }, + }, + gap = { + ext = { "%.g$", "%.gd$", "%.gi$", "%.gap$" }, + }, + gemini = { + ext = { "%.gmi" }, + mime = { "text/gemini" }, + }, + gettext = { + ext = { "%.po$", "%.pot$" }, + }, + gherkin = { + ext = { "%.feature$" }, + }, + ['git-rebase'] = { + ext = { "git%-rebase%-todo" }, + }, + glsl = { + ext = { "%.glslf$", "%.glslv$" }, + }, + gnuplot = { + ext = { "%.dem$", "%.plt$" }, + }, + go = { + ext = { "%.go$" }, + }, + groovy = { + ext = { "%.groovy$", "%.gvy$", "^Jenkinsfile$" }, + }, + gtkrc = { + ext = { "%.gtkrc$" }, + }, + hare = { + ext = { "%.ha$" } + }, + haskell = { + ext = { "%.hs$" }, + mime = { "text/x-haskell" }, + }, + html = { + ext = { "%.htm$", "%.html$", "%.shtm$", "%.shtml$", "%.xhtml$" }, + mime = { "text/x-html" }, + }, + icon = { + ext = { "%.icn$" }, + }, + idl = { + ext = { "%.idl$", "%.odl$" }, + }, + inform = { + ext = { "%.inf$", "%.ni$" }, + }, + ini = { + ext = { "%.cfg$", "%.cnf$", "%.conf$", "%.inf$", "%.ini$", "%.reg$" }, + }, + io_lang = { + ext = { "%.io$" }, + }, + java = { + ext = { "%.bsh$", "%.java$" }, + }, + javascript = { + ext = { "%.cjs$", "%.js$", "%.jsfl$", "%.mjs$", "%.ts$", "%.jsx$", "%.tsx$" }, + }, + json = { + ext = { "%.json$" }, + mime = { "text/x-json" }, + }, + jsp = { + ext = { "%.jsp$" }, + }, + julia = { + ext = { "%.jl$" }, + }, + latex = { + ext = { "%.bbl$", "%.cls$", "%.dtx$", "%.ins$", "%.ltx$", "%.tex$", "%.sty$" }, + mime = { "text/x-tex" }, + }, + ledger = { + ext = { "%.ledger$", "%.journal$" }, + }, + less = { + ext = { "%.less$" }, + }, + lilypond = { + ext = { "%.ily$", "%.ly$" }, + }, + lisp = { + ext = { "%.cl$", "%.el$", "%.lisp$", "%.lsp$" }, + mime = { "text/x-lisp" }, + }, + litcoffee = { + ext = { "%.litcoffee$" }, + }, + logtalk = { + ext = { "%.lgt$" }, + }, + lua = { + utility = {"^lua%-?5?%d?$", "^lua%-?5%.%d$" }, + ext = { "%.lua$" }, + mime = { "text/x-lua" }, + }, + makefile = { + hashbang = {"^#!/usr/bin/make"}, + utility = {"^make$"}, + ext = { "%.iface$", "%.mak$", "%.mk$", "GNUmakefile", "makefile", "Makefile" }, + mime = { "text/x-makefile" }, + }, + man = { + ext = { + "%.1$", "%.2$", "%.3$", "%.4$", "%.5$", "%.6$", "%.7$", + "%.8$", "%.9$", "%.1x$", "%.2x$", "%.3x$", "%.4x$", + "%.5x$", "%.6x$", "%.7x$", "%.8x$", "%.9x$" + }, + }, + markdown = { + ext = { "%.md$", "%.markdown$" }, + mime = { "text/x-markdown" }, + }, + meson = { + ext = { "^meson%.build$" }, + }, + moonscript = { + ext = { "%.moon$" }, + mime = { "text/x-moon" }, + }, + myrddin = { + ext = { "%.myr$" }, + }, + nemerle = { + ext = { "%.n$" }, + }, + networkd = { + ext = { "%.link$", "%.network$", "%.netdev$" }, + }, + nim = { + ext = { "%.nim$" }, + }, + nsis = { + ext = { "%.nsh$", "%.nsi$", "%.nsis$" }, + }, + objective_c = { + ext = { "%.m$", "%.mm$", "%.objc$" }, + mime = { "text/x-objc" }, + }, + pascal = { + ext = { "%.dpk$", "%.dpr$", "%.p$", "%.pas$" }, + }, + perl = { + ext = { "%.al$", "%.perl$", "%.pl$", "%.pm$", "%.pod$" }, + mime = { "text/x-perl" }, + }, + php = { + ext = { "%.inc$", "%.php$", "%.php3$", "%.php4$", "%.phtml$" }, + }, + pico8 = { + ext = { "%.p8$" }, + }, + pike = { + ext = { "%.pike$", "%.pmod$" }, + }, + pkgbuild = { + ext = { "^PKGBUILD$", "%.PKGBUILD$" }, + }, + pony = { + ext = { "%.pony$" }, + }, + powershell = { + ext = { "%.ps1$" }, + }, + prolog = { + ext = { "%.pl$", "%.pro$", "%.prolog$" }, + }, + props = { + ext = { "%.props$", "%.properties$" }, + }, + protobuf = { + ext = { "%.proto$" }, + }, + ps = { + ext = { "%.eps$", "%.ps$" }, + }, + pure = { + ext = { "%.pure$" }, + }, + python = { + utility = { "^python%d?" }, + ext = { "%.sc$", "%.py$", "%.pyw$" }, + mime = { "text/x-python", "text/x-script.python" }, + }, + reason = { + ext = { "%.re$" }, + }, + rc = { + utility = {"^rc$"}, + ext = { "%.rc$", "%.es$" }, + }, + rebol = { + ext = { "%.r$", "%.reb$" }, + }, + rest = { + ext = { "%.rst$" }, + }, + rexx = { + ext = { "%.orx$", "%.rex$" }, + }, + rhtml = { + ext = { "%.erb$", "%.rhtml$" }, + }, + routeros = { + ext = { "%.rsc" }, + detect = function(_, data) + return data:match("^#.* by RouterOS") + end + }, + rstats = { + ext = { "%.R$", "%.Rout$", "%.Rhistory$", "%.Rt$", "Rout.save", "Rout.fail" }, + }, + ruby = { + ext = { "%.Rakefile$", "%.rake$", "%.rb$", "%.rbw$", "^Vagrantfile$" }, + mime = { "text/x-ruby" }, + }, + rust = { + ext = { "%.rs$" }, + mime = { "text/x-rust" }, + }, + sass = { + ext = { "%.sass$", "%.scss$" }, + mime = { "text/x-sass", "text/x-scss" }, + }, + scala = { + ext = { "%.scala$" }, + mime = { "text/x-scala" }, + }, + scheme = { + ext = { "%.rkt$", "%.sch$", "%.scm$", "%.sld$", "%.sls$", "%.ss$" }, + }, + smalltalk = { + ext = { "%.changes$", "%.st$", "%.sources$" }, + }, + sml = { + ext = { "%.sml$", "%.fun$", "%.sig$" }, + }, + snobol4 = { + ext = { "%.sno$", "%.SNO$" }, + }, + spin = { + ext = { "%.spin$" } + }, + sql= { + ext = { "%.ddl$", "%.sql$" }, + }, + strace = { + detect = function(_, data) + return data:match("^execve%(") + end + }, + systemd = { + ext = { + "%.automount$", "%.device$", "%.mount$", "%.path$", + "%.scope$", "%.service$", "%.slice$", "%.socket$", + "%.swap$", "%.target$", "%.timer$" + }, + }, + taskpaper = { + ext = { "%.taskpaper$" }, + }, + tcl = { + utility = {"^tclsh$", "^jimsh$" }, + ext = { "%.tcl$", "%.tk$" }, + }, + texinfo = { + ext = { "%.texi$" }, + }, + text = { + ext = { "%.txt$" }, + -- Do *not* list mime "text/plain" here, it is covered below, + -- see 'try text lexer as a last resort' + }, + toml = { + ext = { "%.toml$" }, + }, + vala = { + ext = { "%.vala$" } + }, + vb = { + ext = { + "%.asa$", "%.bas$", "%.ctl$", "%.dob$", + "%.dsm$", "%.dsr$", "%.frm$", "%.pag$", "%.vb$", + "%.vba$", "%.vbs$" + }, + }, + vcard = { + ext = { "%.vcf$", "%.vcard$" }, + }, + verilog = { + ext = { "%.v$", "%.ver$", "%.sv$" }, + }, + vhdl = { + ext = { "%.vh$", "%.vhd$", "%.vhdl$" }, + }, + wsf = { + ext = { "%.wsf$" }, + }, + xs = { + ext = { "%.xs$", "^%.xsin$", "^%.xsrc$" }, + }, + xml = { + ext = { + "%.dtd$", "%.glif$", "%.plist$", "%.svg$", "%.xml$", + "%.xsd$", "%.xsl$", "%.xslt$", "%.xul$" + }, + }, + xtend = { + ext = {"%.xtend$" }, + }, + yaml = { + ext = { "%.yaml$", "%.yml$" }, + mime = { "text/x-yaml" }, + }, + zig = { + ext = { "%.zig$" }, + }, +} + +ftdetect.lookup_lexer = function (filename) + -- uhh + for lang, ft in pairs(ftdetect.filetypes) do + for _, pattern in pairs(ft.ext or {}) do + if filename:match(pattern) then + return lang; + end + end + end +end + +return ftdetect diff --git a/lua/lexer.lua b/lua/lexer.lua @@ -0,0 +1,1675 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. + +local M = {} + +--[=[ This comment is for LuaDoc. +--- +-- Lexes Scintilla documents with Lua and LPeg. +-- +-- ## Overview +-- +-- Lexers highlight the syntax of source code. Scintilla (the editing component +-- behind [Textadept][] and [SciTE][]) traditionally uses static, compiled C++ +-- lexers which are notoriously difficult to create and/or extend. On the other +-- hand, Lua makes it easy to to rapidly create new lexers, extend existing +-- ones, and embed lexers within one another. Lua lexers tend to be more +-- readable than C++ lexers too. +-- +-- Lexers are Parsing Expression Grammars, or PEGs, composed with the Lua +-- [LPeg library][]. The following table comes from the LPeg documentation and +-- summarizes all you need to know about constructing basic LPeg patterns. This +-- module provides convenience functions for creating and working with other +-- more advanced patterns and concepts. +-- +-- Operator | Description +-- ---------------------|------------ +-- `lpeg.P(string)` | Matches `string` literally. +-- `lpeg.P(`_`n`_`)` | Matches exactly _`n`_ characters. +-- `lpeg.S(string)` | Matches any character in set `string`. +-- `lpeg.R("`_`xy`_`")` | Matches any character between range `x` and `y`. +-- `patt^`_`n`_ | Matches at least _`n`_ repetitions of `patt`. +-- `patt^-`_`n`_ | Matches at most _`n`_ repetitions of `patt`. +-- `patt1 * patt2` | Matches `patt1` followed by `patt2`. +-- `patt1 + patt2` | Matches `patt1` or `patt2` (ordered choice). +-- `patt1 - patt2` | Matches `patt1` if `patt2` does not match. +-- `-patt` | Equivalent to `("" - patt)`. +-- `#patt` | Matches `patt` but consumes no input. +-- +-- The first part of this document deals with rapidly constructing a simple +-- lexer. The next part deals with more advanced techniques, such as custom +-- coloring and embedding lexers within one another. Following that is a +-- discussion about code folding, or being able to tell Scintilla which code +-- blocks are "foldable" (temporarily hideable from view). After that are +-- instructions on how to use LPeg lexers with the aforementioned Textadept and +-- SciTE editors. Finally there are comments on lexer performance and +-- limitations. +-- +-- [LPeg library]: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html +-- [Textadept]: http://foicica.com/textadept +-- [SciTE]: http://scintilla.org/SciTE.html +-- +-- ## Lexer Basics +-- +-- The *lexers/* directory contains all lexers, including your new one. Before +-- attempting to write one from scratch though, first determine if your +-- programming language is similar to any of the 80+ languages supported. If so, +-- you may be able to copy and modify that lexer, saving some time and effort. +-- The filename of your lexer should be the name of your programming language in +-- lower case followed by a *.lua* extension. For example, a new Lua lexer has +-- the name *lua.lua*. +-- +-- Note: Try to refrain from using one-character language names like "c", "d", +-- or "r". For example, Scintillua uses "ansi_c", "dmd", and "rstats", +-- respectively. +-- +-- ### New Lexer Template +-- +-- There is a *lexers/template.txt* file that contains a simple template for a +-- new lexer. Feel free to use it, replacing the '?'s with the name of your +-- lexer: +-- +-- -- ? LPeg lexer. +-- +-- local l = require('lexer') +-- local token, word_match = l.token, l.word_match +-- local P, R, S = lpeg.P, lpeg.R, lpeg.S +-- +-- local M = {_NAME = '?'} +-- +-- -- Whitespace. +-- local ws = token(l.WHITESPACE, l.space^1) +-- +-- M._rules = { +-- {'whitespace', ws}, +-- } +-- +-- M._tokenstyles = { +-- +-- } +-- +-- return M +-- +-- The first 3 lines of code simply define often used convenience variables. The +-- 5th and last lines define and return the lexer object Scintilla uses; they +-- are very important and must be part of every lexer. The sixth line defines +-- something called a "token", an essential building block of lexers. You will +-- learn about tokens shortly. The rest of the code defines a set of grammar +-- rules and token styles. You will learn about those later. Note, however, the +-- `M.` prefix in front of `_rules` and `_tokenstyles`: not only do these tables +-- belong to their respective lexers, but any non-local variables need the `M.` +-- prefix too so-as not to affect Lua's global environment. All in all, this is +-- a minimal, working lexer that you can build on. +-- +-- ### Tokens +-- +-- Take a moment to think about your programming language's structure. What kind +-- of key elements does it have? In the template shown earlier, one predefined +-- element all languages have is whitespace. Your language probably also has +-- elements like comments, strings, and keywords. Lexers refer to these elements +-- as "tokens". Tokens are the fundamental "building blocks" of lexers. Lexers +-- break down source code into tokens for coloring, which results in the syntax +-- highlighting familiar to you. It is up to you how specific your lexer is when +-- it comes to tokens. Perhaps only distinguishing between keywords and +-- identifiers is necessary, or maybe recognizing constants and built-in +-- functions, methods, or libraries is desirable. The Lua lexer, for example, +-- defines 11 tokens: whitespace, comments, strings, numbers, keywords, built-in +-- functions, constants, built-in libraries, identifiers, labels, and operators. +-- Even though constants, built-in functions, and built-in libraries are subsets +-- of identifiers, Lua programmers find it helpful for the lexer to distinguish +-- between them all. It is perfectly acceptable to just recognize keywords and +-- identifiers. +-- +-- In a lexer, tokens consist of a token name and an LPeg pattern that matches a +-- sequence of characters recognized as an instance of that token. Create tokens +-- using the [`lexer.token()`]() function. Let us examine the "whitespace" token +-- defined in the template shown earlier: +-- +-- local ws = token(l.WHITESPACE, l.space^1) +-- +-- At first glance, the first argument does not appear to be a string name and +-- the second argument does not appear to be an LPeg pattern. Perhaps you +-- expected something like: +-- +-- local ws = token('whitespace', S('\t\v\f\n\r ')^1) +-- +-- The `lexer` (`l`) module actually provides a convenient list of common token +-- names and common LPeg patterns for you to use. Token names include +-- [`lexer.DEFAULT`](), [`lexer.WHITESPACE`](), [`lexer.COMMENT`](), +-- [`lexer.STRING`](), [`lexer.NUMBER`](), [`lexer.KEYWORD`](), +-- [`lexer.IDENTIFIER`](), [`lexer.OPERATOR`](), [`lexer.ERROR`](), +-- [`lexer.PREPROCESSOR`](), [`lexer.CONSTANT`](), [`lexer.VARIABLE`](), +-- [`lexer.FUNCTION`](), [`lexer.CLASS`](), [`lexer.TYPE`](), [`lexer.LABEL`](), +-- [`lexer.REGEX`](), and [`lexer.EMBEDDED`](). Patterns include +-- [`lexer.any`](), [`lexer.ascii`](), [`lexer.extend`](), [`lexer.alpha`](), +-- [`lexer.digit`](), [`lexer.alnum`](), [`lexer.lower`](), [`lexer.upper`](), +-- [`lexer.xdigit`](), [`lexer.cntrl`](), [`lexer.graph`](), [`lexer.print`](), +-- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](), +-- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](), +-- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](), +-- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if +-- none of the above fit your language, but an advantage to using predefined +-- token names is that your lexer's tokens will inherit the universal syntax +-- highlighting color theme used by your text editor. +-- +-- #### Example Tokens +-- +-- So, how might you define other tokens like comments, strings, and keywords? +-- Here are some examples. +-- +-- **Comments** +-- +-- Line-style comments with a prefix character(s) are easy to express with LPeg: +-- +-- local shell_comment = token(l.COMMENT, '#' * l.nonnewline^0) +-- local c_line_comment = token(l.COMMENT, '//' * l.nonnewline_esc^0) +-- +-- The comments above start with a '#' or "//" and go to the end of the line. +-- The second comment recognizes the next line also as a comment if the current +-- line ends with a '\' escape character. +-- +-- C-style "block" comments with a start and end delimiter are also easy to +-- express: +-- +-- local c_comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1) +-- +-- This comment starts with a "/\*" sequence and contains anything up to and +-- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer +-- can recognize unfinished comments as comments and highlight them properly. +-- +-- **Strings** +-- +-- It is tempting to think that a string is not much different from the block +-- comment shown above in that both have start and end delimiters: +-- +-- local dq_str = '"' * (l.any - '"')^0 * P('"')^-1 +-- local sq_str = "'" * (l.any - "'")^0 * P("'")^-1 +-- local simple_string = token(l.STRING, dq_str + sq_str) +-- +-- However, most programming languages allow escape sequences in strings such +-- that a sequence like "\\&quot;" in a double-quoted string indicates that the +-- '&quot;' is not the end of the string. The above token incorrectly matches +-- such a string. Instead, use the [`lexer.delimited_range()`]() convenience +-- function. +-- +-- local dq_str = l.delimited_range('"') +-- local sq_str = l.delimited_range("'") +-- local string = token(l.STRING, dq_str + sq_str) +-- +-- In this case, the lexer treats '\' as an escape character in a string +-- sequence. +-- +-- **Keywords** +-- +-- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered +-- choices, use another convenience function: [`lexer.word_match()`](). It is +-- much easier and more efficient to write word matches like: +-- +-- local keyword = token(l.KEYWORD, l.word_match{ +-- 'keyword_1', 'keyword_2', ..., 'keyword_n' +-- }) +-- +-- local case_insensitive_keyword = token(l.KEYWORD, l.word_match({ +-- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n' +-- }, nil, true)) +-- +-- local hyphened_keyword = token(l.KEYWORD, l.word_match({ +-- 'keyword-1', 'keyword-2', ..., 'keyword-n' +-- }, '-')) +-- +-- By default, characters considered to be in keywords are in the set of +-- alphanumeric characters and underscores. The last token demonstrates how to +-- allow '-' (hyphen) characters to be in keywords as well. +-- +-- **Numbers** +-- +-- Most programming languages have the same format for integer and float tokens, +-- so it might be as simple as using a couple of predefined LPeg patterns: +-- +-- local number = token(l.NUMBER, l.float + l.integer) +-- +-- However, some languages allow postfix characters on integers. +-- +-- local integer = P('-')^-1 * (l.dec_num * S('lL')^-1) +-- local number = token(l.NUMBER, l.float + l.hex_num + integer) +-- +-- Your language may need other tweaks, but it is up to you how fine-grained you +-- want your highlighting to be. After all, you are not writing a compiler or +-- interpreter! +-- +-- ### Rules +-- +-- Programming languages have grammars, which specify valid token structure. For +-- example, comments usually cannot appear within a string. Grammars consist of +-- rules, which are simply combinations of tokens. Recall from the lexer +-- template the `_rules` table, which defines all the rules used by the lexer +-- grammar: +-- +-- M._rules = { +-- {'whitespace', ws}, +-- } +-- +-- Each entry in a lexer's `_rules` table consists of a rule name and its +-- associated pattern. Rule names are completely arbitrary and serve only to +-- identify and distinguish between different rules. Rule order is important: if +-- text does not match the first rule, the lexer tries the second rule, and so +-- on. This simple grammar says to match whitespace tokens under a rule named +-- "whitespace". +-- +-- To illustrate the importance of rule order, here is an example of a +-- simplified Lua grammar: +-- +-- M._rules = { +-- {'whitespace', ws}, +-- {'keyword', keyword}, +-- {'identifier', identifier}, +-- {'string', string}, +-- {'comment', comment}, +-- {'number', number}, +-- {'label', label}, +-- {'operator', operator}, +-- } +-- +-- Note how identifiers come after keywords. In Lua, as with most programming +-- languages, the characters allowed in keywords and identifiers are in the same +-- set (alphanumerics plus underscores). If the lexer specified the "identifier" +-- rule before the "keyword" rule, all keywords would match identifiers and thus +-- incorrectly highlight as identifiers instead of keywords. The same idea +-- applies to function, constant, etc. tokens that you may want to distinguish +-- between: their rules should come before identifiers. +-- +-- So what about text that does not match any rules? For example in Lua, the '!' +-- character is meaningless outside a string or comment. Normally the lexer +-- skips over such text. If instead you want to highlight these "syntax errors", +-- add an additional end rule: +-- +-- M._rules = { +-- {'whitespace', ws}, +-- {'error', token(l.ERROR, l.any)}, +-- } +-- +-- This identifies and highlights any character not matched by an existing +-- rule as an `lexer.ERROR` token. +-- +-- Even though the rules defined in the examples above contain a single token, +-- rules may consist of multiple tokens. For example, a rule for an HTML tag +-- could consist of a tag token followed by an arbitrary number of attribute +-- tokens, allowing the lexer to highlight all tokens separately. The rule might +-- look something like this: +-- +-- {'tag', tag_start * (ws * attributes)^0 * tag_end^-1} +-- +-- Note however that lexers with complex rules like these are more prone to lose +-- track of their state. +-- +-- ### Summary +-- +-- Lexers primarily consist of tokens and grammar rules. At your disposal are a +-- number of convenience patterns and functions for rapidly creating a lexer. If +-- you choose to use predefined token names for your tokens, you do not have to +-- define how the lexer highlights them. The tokens will inherit the default +-- syntax highlighting color theme your editor uses. +-- +-- ## Advanced Techniques +-- +-- ### Styles and Styling +-- +-- The most basic form of syntax highlighting is assigning different colors to +-- different tokens. Instead of highlighting with just colors, Scintilla allows +-- for more rich highlighting, or "styling", with different fonts, font sizes, +-- font attributes, and foreground and background colors, just to name a few. +-- The unit of this rich highlighting is called a "style". Styles are simply +-- strings of comma-separated property settings. By default, lexers associate +-- predefined token names like `lexer.WHITESPACE`, `lexer.COMMENT`, +-- `lexer.STRING`, etc. with particular styles as part of a universal color +-- theme. These predefined styles include [`lexer.STYLE_CLASS`](), +-- [`lexer.STYLE_COMMENT`](), [`lexer.STYLE_CONSTANT`](), +-- [`lexer.STYLE_ERROR`](), [`lexer.STYLE_EMBEDDED`](), +-- [`lexer.STYLE_FUNCTION`](), [`lexer.STYLE_IDENTIFIER`](), +-- [`lexer.STYLE_KEYWORD`](), [`lexer.STYLE_LABEL`](), [`lexer.STYLE_NUMBER`](), +-- [`lexer.STYLE_OPERATOR`](), [`lexer.STYLE_PREPROCESSOR`](), +-- [`lexer.STYLE_REGEX`](), [`lexer.STYLE_STRING`](), [`lexer.STYLE_TYPE`](), +-- [`lexer.STYLE_VARIABLE`](), and [`lexer.STYLE_WHITESPACE`](). Like with +-- predefined token names and LPeg patterns, you may define your own styles. At +-- their core, styles are just strings, so you may create new ones and/or modify +-- existing ones. Each style consists of the following comma-separated settings: +-- +-- Setting | Description +-- ---------------|------------ +-- font:_name_ | The name of the font the style uses. +-- size:_int_ | The size of the font the style uses. +-- [not]bold | Whether or not the font face is bold. +-- weight:_int_ | The weight or boldness of a font, between 1 and 999. +-- [not]italics | Whether or not the font face is italic. +-- [not]underlined| Whether or not the font face is underlined. +-- fore:_color_ | The foreground color of the font face. +-- back:_color_ | The background color of the font face. +-- [not]eolfilled | Does the background color extend to the end of the line? +-- case:_char_ | The case of the font ('u': upper, 'l': lower, 'm': normal). +-- [not]visible | Whether or not the text is visible. +-- [not]changeable| Whether the text is changeable or read-only. +-- +-- Specify font colors in either "#RRGGBB" format, "0xBBGGRR" format, or the +-- decimal equivalent of the latter. As with token names, LPeg patterns, and +-- styles, there is a set of predefined color names, but they vary depending on +-- the current color theme in use. Therefore, it is generally not a good idea to +-- manually define colors within styles in your lexer since they might not fit +-- into a user's chosen color theme. Try to refrain from even using predefined +-- colors in a style because that color may be theme-specific. Instead, the best +-- practice is to either use predefined styles or derive new color-agnostic +-- styles from predefined ones. For example, Lua "longstring" tokens use the +-- existing `lexer.STYLE_STRING` style instead of defining a new one. +-- +-- #### Example Styles +-- +-- Defining styles is pretty straightforward. An empty style that inherits the +-- default theme settings is simply an empty string: +-- +-- local style_nothing = '' +-- +-- A similar style but with a bold font face looks like this: +-- +-- local style_bold = 'bold' +-- +-- If you want the same style, but also with an italic font face, define the new +-- style in terms of the old one: +-- +-- local style_bold_italic = style_bold..',italics' +-- +-- This allows you to derive new styles from predefined ones without having to +-- rewrite them. This operation leaves the old style unchanged. Thus if you +-- had a "static variable" token whose style you wanted to base off of +-- `lexer.STYLE_VARIABLE`, it would probably look like: +-- +-- local style_static_var = l.STYLE_VARIABLE..',italics' +-- +-- The color theme files in the *lexers/themes/* folder give more examples of +-- style definitions. +-- +-- ### Token Styles +-- +-- Lexers use the `_tokenstyles` table to assign tokens to particular styles. +-- Recall the token definition and `_tokenstyles` table from the lexer template: +-- +-- local ws = token(l.WHITESPACE, l.space^1) +-- +-- ... +-- +-- M._tokenstyles = { +-- +-- } +-- +-- Why is a style not assigned to the `lexer.WHITESPACE` token? As mentioned +-- earlier, lexers automatically associate tokens that use predefined token +-- names with a particular style. Only tokens with custom token names need +-- manual style associations. As an example, consider a custom whitespace token: +-- +-- local ws = token('custom_whitespace', l.space^1) +-- +-- Assigning a style to this token looks like: +-- +-- M._tokenstyles = { +-- custom_whitespace = l.STYLE_WHITESPACE +-- } +-- +-- Do not confuse token names with rule names. They are completely different +-- entities. In the example above, the lexer assigns the "custom_whitespace" +-- token the existing style for `WHITESPACE` tokens. If instead you want to +-- color the background of whitespace a shade of grey, it might look like: +-- +-- local custom_style = l.STYLE_WHITESPACE..',back:$(color.grey)' +-- M._tokenstyles = { +-- custom_whitespace = custom_style +-- } +-- +-- Notice that the lexer peforms Scintilla/SciTE-style "$()" property expansion. +-- You may also use "%()". Remember to refrain from assigning specific colors in +-- styles, but in this case, all user color themes probably define the +-- "color.grey" property. +-- +-- ### Line Lexers +-- +-- By default, lexers match the arbitrary chunks of text passed to them by +-- Scintilla. These chunks may be a full document, only the visible part of a +-- document, or even just portions of lines. Some lexers need to match whole +-- lines. For example, a lexer for the output of a file "diff" needs to know if +-- the line started with a '+' or '-' and then style the entire line +-- accordingly. To indicate that your lexer matches by line, use the +-- `_LEXBYLINE` field: +-- +-- M._LEXBYLINE = true +-- +-- Now the input text for the lexer is a single line at a time. Keep in mind +-- that line lexers do not have the ability to look ahead at subsequent lines. +-- +-- ### Embedded Lexers +-- +-- Lexers embed within one another very easily, requiring minimal effort. In the +-- following sections, the lexer being embedded is called the "child" lexer and +-- the lexer a child is being embedded in is called the "parent". For example, +-- consider an HTML lexer and a CSS lexer. Either lexer stands alone for styling +-- their respective HTML and CSS files. However, CSS can be embedded inside +-- HTML. In this specific case, the CSS lexer is the "child" lexer with the HTML +-- lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This +-- sounds a lot like the case with CSS, but there is a subtle difference: PHP +-- _embeds itself_ into HTML while CSS is _embedded in_ HTML. This fundamental +-- difference results in two types of embedded lexers: a parent lexer that +-- embeds other child lexers in it (like HTML embedding CSS), and a child lexer +-- that embeds itself within a parent lexer (like PHP embedding itself in HTML). +-- +-- #### Parent Lexer +-- +-- Before embedding a child lexer into a parent lexer, the parent lexer needs to +-- load the child lexer. This is done with the [`lexer.load()`]() function. For +-- example, loading the CSS lexer within the HTML lexer looks like: +-- +-- local css = l.load('css') +-- +-- The next part of the embedding process is telling the parent lexer when to +-- switch over to the child lexer and when to switch back. The lexer refers to +-- these indications as the "start rule" and "end rule", respectively, and are +-- just LPeg patterns. Continuing with the HTML/CSS example, the transition from +-- HTML to CSS is when the lexer encounters a "style" tag with a "type" +-- attribute whose value is "text/css": +-- +-- local css_tag = P('<style') * P(function(input, index) +-- if input:find('^[^>]+type="text/css"', index) then +-- return index +-- end +-- end) +-- +-- This pattern looks for the beginning of a "style" tag and searches its +-- attribute list for the text "`type="text/css"`". (In this simplified example, +-- the Lua pattern does not consider whitespace between the '=' nor does it +-- consider that using single quotes is valid.) If there is a match, the +-- functional pattern returns a value instead of `nil`. In this case, the value +-- returned does not matter because we ultimately want to style the "style" tag +-- as an HTML tag, so the actual start rule looks like this: +-- +-- local css_start_rule = #css_tag * tag +-- +-- Now that the parent knows when to switch to the child, it needs to know when +-- to switch back. In the case of HTML/CSS, the switch back occurs when the +-- lexer encounters an ending "style" tag, though the lexer should still style +-- the tag as an HTML tag: +-- +-- local css_end_rule = #P('</style>') * tag +-- +-- Once the parent loads the child lexer and defines the child's start and end +-- rules, it embeds the child with the [`lexer.embed_lexer()`]() function: +-- +-- l.embed_lexer(M, css, css_start_rule, css_end_rule) +-- +-- The first parameter is the parent lexer object to embed the child in, which +-- in this case is `M`. The other three parameters are the child lexer object +-- loaded earlier followed by its start and end rules. +-- +-- #### Child Lexer +-- +-- The process for instructing a child lexer to embed itself into a parent is +-- very similar to embedding a child into a parent: first, load the parent lexer +-- into the child lexer with the [`lexer.load()`]() function and then create +-- start and end rules for the child lexer. However, in this case, swap the +-- lexer object arguments to [`lexer.embed_lexer()`](). For example, in the PHP +-- lexer: +-- +-- local html = l.load('html') +-- local php_start_rule = token('php_tag', '<?php ') +-- local php_end_rule = token('php_tag', '?>') +-- l.embed_lexer(html, M, php_start_rule, php_end_rule) +-- +-- ### Lexers with Complex State +-- +-- A vast majority of lexers are not stateful and can operate on any chunk of +-- text in a document. However, there may be rare cases where a lexer does need +-- to keep track of some sort of persistent state. Rather than using `lpeg.P` +-- function patterns that set state variables, it is recommended to make use of +-- Scintilla's built-in, per-line state integers via [`lexer.line_state`](). It +-- was designed to accommodate up to 32 bit flags for tracking state. +-- [`lexer.line_from_position()`]() will return the line for any position given +-- to an `lpeg.P` function pattern. (Any positions derived from that position +-- argument will also work.) +-- +-- Writing stateful lexers is beyond the scope of this document. +-- +-- ## Code Folding +-- +-- When reading source code, it is occasionally helpful to temporarily hide +-- blocks of code like functions, classes, comments, etc. This is the concept of +-- "folding". In the Textadept and SciTE editors for example, little indicators +-- in the editor margins appear next to code that can be folded at places called +-- "fold points". When the user clicks an indicator, the editor hides the code +-- associated with the indicator until the user clicks the indicator again. The +-- lexer specifies these fold points and what code exactly to fold. +-- +-- The fold points for most languages occur on keywords or character sequences. +-- Examples of fold keywords are "if" and "end" in Lua and examples of fold +-- character sequences are '{', '}', "/\*", and "\*/" in C for code block and +-- comment delimiters, respectively. However, these fold points cannot occur +-- just anywhere. For example, lexers should not recognize fold keywords that +-- appear within strings or comments. The lexer's `_foldsymbols` table allows +-- you to conveniently define fold points with such granularity. For example, +-- consider C: +-- +-- M._foldsymbols = { +-- [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, +-- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1}, +-- _patterns = {'[{}]', '/%*', '%*/'} +-- } +-- +-- The first assignment states that any '{' or '}' that the lexer recognized as +-- an `lexer.OPERATOR` token is a fold point. The integer `1` indicates the +-- match is a beginning fold point and `-1` indicates the match is an ending +-- fold point. Likewise, the second assignment states that any "/\*" or "\*/" +-- that the lexer recognizes as part of a `lexer.COMMENT` token is a fold point. +-- The lexer does not consider any occurences of these characters outside their +-- defined tokens (such as in a string) as fold points. Finally, every +-- `_foldsymbols` table must have a `_patterns` field that contains a list of +-- [Lua patterns][] that match fold points. If the lexer encounters text that +-- matches one of those patterns, the lexer looks up the matched text in its +-- token's table in order to determine whether or not the text is a fold point. +-- In the example above, the first Lua pattern matches any '{' or '}' +-- characters. When the lexer comes across one of those characters, it checks if +-- the match is an `lexer.OPERATOR` token. If so, the lexer identifies the match +-- as a fold point. The same idea applies for the other patterns. (The '%' is in +-- the other patterns because '\*' is a special character in Lua patterns that +-- needs escaping.) How do you specify fold keywords? Here is an example for +-- Lua: +-- +-- M._foldsymbols = { +-- [l.KEYWORD] = { +-- ['if'] = 1, ['do'] = 1, ['function'] = 1, +-- ['end'] = -1, ['repeat'] = 1, ['until'] = -1 +-- }, +-- _patterns = {'%l+'} +-- } +-- +-- Any time the lexer encounters a lower case word, if that word is a +-- `lexer.KEYWORD` token and in the associated list of fold points, the lexer +-- identifies the word as a fold point. +-- +-- If your lexer has case-insensitive keywords as fold points, simply add a +-- `_case_insensitive = true` option to the `_foldsymbols` table and specify +-- keywords in lower case. +-- +-- If your lexer needs to do some additional processing to determine if a match +-- is a fold point, assign a function that returns an integer. Returning `1` or +-- `-1` indicates the match is a fold point. Returning `0` indicates it is not. +-- For example: +-- +-- local function fold_strange_token(text, pos, line, s, match) +-- if ... then +-- return 1 -- beginning fold point +-- elseif ... then +-- return -1 -- ending fold point +-- end +-- return 0 +-- end +-- +-- M._foldsymbols = { +-- ['strange_token'] = {['|'] = fold_strange_token}, +-- _patterns = {'|'} +-- } +-- +-- Any time the lexer encounters a '|' that is a "strange_token", it calls the +-- `fold_strange_token` function to determine if '|' is a fold point. The lexer +-- calls these functions with the following arguments: the text to identify fold +-- points in, the beginning position of the current line in the text to fold, +-- the current line's text, the position in the current line the matched text +-- starts at, and the matched text itself. +-- +-- [Lua patterns]: http://www.lua.org/manual/5.2/manual.html#6.4.1 +-- +-- ### Fold by Indentation +-- +-- Some languages have significant whitespace and/or no delimiters that indicate +-- fold points. If your lexer falls into this category and you would like to +-- mark fold points based on changes in indentation, use the +-- `_FOLDBYINDENTATION` field: +-- +-- M._FOLDBYINDENTATION = true +-- +-- ## Using Lexers +-- +-- ### Textadept +-- +-- Put your lexer in your *~/.textadept/lexers/* directory so you do not +-- overwrite it when upgrading Textadept. Also, lexers in this directory +-- override default lexers. Thus, Textadept loads a user *lua* lexer instead of +-- the default *lua* lexer. This is convenient for tweaking a default lexer to +-- your liking. Then add a [file type][] for your lexer if necessary. +-- +-- [file type]: _M.textadept.file_types.html +-- +-- ### SciTE +-- +-- Create a *.properties* file for your lexer and `import` it in either your +-- *SciTEUser.properties* or *SciTEGlobal.properties*. The contents of the +-- *.properties* file should contain: +-- +-- file.patterns.[lexer_name]=[file_patterns] +-- lexer.$(file.patterns.[lexer_name])=[lexer_name] +-- +-- where `[lexer_name]` is the name of your lexer (minus the *.lua* extension) +-- and `[file_patterns]` is a set of file extensions to use your lexer for. +-- +-- Please note that Lua lexers ignore any styling information in *.properties* +-- files. Your theme file in the *lexers/themes/* directory contains styling +-- information. +-- +-- ## Considerations +-- +-- ### Performance +-- +-- There might be some slight overhead when initializing a lexer, but loading a +-- file from disk into Scintilla is usually more expensive. On modern computer +-- systems, I see no difference in speed between LPeg lexers and Scintilla's C++ +-- ones. Optimize lexers for speed by re-arranging rules in the `_rules` table +-- so that the most common rules match first. Do keep in mind that order matters +-- for similar rules. +-- +-- ### Limitations +-- +-- Embedded preprocessor languages like PHP cannot completely embed in their +-- parent languages in that the parent's tokens do not support start and end +-- rules. This mostly goes unnoticed, but code like +-- +-- <div id="<?php echo $id; ?>"> +-- +-- or +-- +-- <div <?php if ($odd) { echo 'class="odd"'; } ?>> +-- +-- will not style correctly. +-- +-- ### Troubleshooting +-- +-- Errors in lexers can be tricky to debug. Lexers print Lua errors to +-- `io.stderr` and `_G.print()` statements to `io.stdout`. Running your editor +-- from a terminal is the easiest way to see errors as they occur. +-- +-- ### Risks +-- +-- Poorly written lexers have the ability to crash Scintilla (and thus its +-- containing application), so unsaved data might be lost. However, I have only +-- observed these crashes in early lexer development, when syntax errors or +-- pattern errors are present. Once the lexer actually starts styling text +-- (either correctly or incorrectly, it does not matter), I have not observed +-- any crashes. +-- +-- ### Acknowledgements +-- +-- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list +-- that inspired me, and thanks to Roberto Ierusalimschy for LPeg. +-- +-- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html +-- @field LEXERPATH (string) +-- The path used to search for a lexer to load. +-- Identical in format to Lua's `package.path` string. +-- The default value is `package.path`. +-- @field DEFAULT (string) +-- The token name for default tokens. +-- @field WHITESPACE (string) +-- The token name for whitespace tokens. +-- @field COMMENT (string) +-- The token name for comment tokens. +-- @field STRING (string) +-- The token name for string tokens. +-- @field NUMBER (string) +-- The token name for number tokens. +-- @field KEYWORD (string) +-- The token name for keyword tokens. +-- @field IDENTIFIER (string) +-- The token name for identifier tokens. +-- @field OPERATOR (string) +-- The token name for operator tokens. +-- @field ERROR (string) +-- The token name for error tokens. +-- @field PREPROCESSOR (string) +-- The token name for preprocessor tokens. +-- @field CONSTANT (string) +-- The token name for constant tokens. +-- @field VARIABLE (string) +-- The token name for variable tokens. +-- @field FUNCTION (string) +-- The token name for function tokens. +-- @field CLASS (string) +-- The token name for class tokens. +-- @field TYPE (string) +-- The token name for type tokens. +-- @field LABEL (string) +-- The token name for label tokens. +-- @field REGEX (string) +-- The token name for regex tokens. +-- @field STYLE_CLASS (string) +-- The style typically used for class definitions. +-- @field STYLE_COMMENT (string) +-- The style typically used for code comments. +-- @field STYLE_CONSTANT (string) +-- The style typically used for constants. +-- @field STYLE_ERROR (string) +-- The style typically used for erroneous syntax. +-- @field STYLE_FUNCTION (string) +-- The style typically used for function definitions. +-- @field STYLE_KEYWORD (string) +-- The style typically used for language keywords. +-- @field STYLE_LABEL (string) +-- The style typically used for labels. +-- @field STYLE_NUMBER (string) +-- The style typically used for numbers. +-- @field STYLE_OPERATOR (string) +-- The style typically used for operators. +-- @field STYLE_REGEX (string) +-- The style typically used for regular expression strings. +-- @field STYLE_STRING (string) +-- The style typically used for strings. +-- @field STYLE_PREPROCESSOR (string) +-- The style typically used for preprocessor statements. +-- @field STYLE_TYPE (string) +-- The style typically used for static types. +-- @field STYLE_VARIABLE (string) +-- The style typically used for variables. +-- @field STYLE_WHITESPACE (string) +-- The style typically used for whitespace. +-- @field STYLE_EMBEDDED (string) +-- The style typically used for embedded code. +-- @field STYLE_IDENTIFIER (string) +-- The style typically used for identifier words. +-- @field STYLE_DEFAULT (string) +-- The style all styles are based off of. +-- @field STYLE_LINENUMBER (string) +-- The style used for all margins except fold margins. +-- @field STYLE_BRACELIGHT (string) +-- The style used for highlighted brace characters. +-- @field STYLE_BRACEBAD (string) +-- The style used for unmatched brace characters. +-- @field STYLE_CONTROLCHAR (string) +-- The style used for control characters. +-- Color attributes are ignored. +-- @field STYLE_INDENTGUIDE (string) +-- The style used for indentation guides. +-- @field STYLE_CALLTIP (string) +-- The style used by call tips if [`buffer.call_tip_use_style`]() is set. +-- Only the font name, size, and color attributes are used. +-- @field STYLE_FOLDDISPLAYTEXT (string) +-- The style used for fold display text. +-- @field any (pattern) +-- A pattern that matches any single character. +-- @field ascii (pattern) +-- A pattern that matches any ASCII character (codes 0 to 127). +-- @field extend (pattern) +-- A pattern that matches any ASCII extended character (codes 0 to 255). +-- @field alpha (pattern) +-- A pattern that matches any alphabetic character ('A'-'Z', 'a'-'z'). +-- @field digit (pattern) +-- A pattern that matches any digit ('0'-'9'). +-- @field alnum (pattern) +-- A pattern that matches any alphanumeric character ('A'-'Z', 'a'-'z', +-- '0'-'9'). +-- @field lower (pattern) +-- A pattern that matches any lower case character ('a'-'z'). +-- @field upper (pattern) +-- A pattern that matches any upper case character ('A'-'Z'). +-- @field xdigit (pattern) +-- A pattern that matches any hexadecimal digit ('0'-'9', 'A'-'F', 'a'-'f'). +-- @field cntrl (pattern) +-- A pattern that matches any control character (ASCII codes 0 to 31). +-- @field graph (pattern) +-- A pattern that matches any graphical character ('!' to '~'). +-- @field print (pattern) +-- A pattern that matches any printable character (' ' to '~'). +-- @field punct (pattern) +-- A pattern that matches any punctuation character ('!' to '/', ':' to '@', +-- '[' to ''', '{' to '~'). +-- @field space (pattern) +-- A pattern that matches any whitespace character ('\t', '\v', '\f', '\n', +-- '\r', space). +-- @field newline (pattern) +-- A pattern that matches any set of end of line characters. +-- @field nonnewline (pattern) +-- A pattern that matches any single, non-newline character. +-- @field nonnewline_esc (pattern) +-- A pattern that matches any single, non-newline character or any set of end +-- of line characters escaped with '\'. +-- @field dec_num (pattern) +-- A pattern that matches a decimal number. +-- @field hex_num (pattern) +-- A pattern that matches a hexadecimal number. +-- @field oct_num (pattern) +-- A pattern that matches an octal number. +-- @field integer (pattern) +-- A pattern that matches either a decimal, hexadecimal, or octal number. +-- @field float (pattern) +-- A pattern that matches a floating point number. +-- @field word (pattern) +-- A pattern that matches a typical word. Words begin with a letter or +-- underscore and consist of alphanumeric and underscore characters. +-- @field FOLD_BASE (number) +-- The initial (root) fold level. +-- @field FOLD_BLANK (number) +-- Flag indicating that the line is blank. +-- @field FOLD_HEADER (number) +-- Flag indicating the line is fold point. +-- @field fold_level (table, Read-only) +-- Table of fold level bit-masks for line numbers starting from zero. +-- Fold level masks are composed of an integer level combined with any of the +-- following bits: +-- +-- * `lexer.FOLD_BASE` +-- The initial fold level. +-- * `lexer.FOLD_BLANK` +-- The line is blank. +-- * `lexer.FOLD_HEADER` +-- The line is a header, or fold point. +-- @field indent_amount (table, Read-only) +-- Table of indentation amounts in character columns, for line numbers +-- starting from zero. +-- @field line_state (table) +-- Table of integer line states for line numbers starting from zero. +-- Line states can be used by lexers for keeping track of persistent states. +-- @field property (table) +-- Map of key-value string pairs. +-- @field property_expanded (table, Read-only) +-- Map of key-value string pairs with `$()` and `%()` variable replacement +-- performed in values. +-- @field property_int (table, Read-only) +-- Map of key-value pairs with values interpreted as numbers, or `0` if not +-- found. +-- @field style_at (table, Read-only) +-- Table of style names at positions in the buffer starting from 1. +module('lexer')]=] + +lpeg = require('lpeg') +local lpeg_P, lpeg_R, lpeg_S, lpeg_V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local lpeg_Ct, lpeg_Cc, lpeg_Cp = lpeg.Ct, lpeg.Cc, lpeg.Cp +local lpeg_Cmt, lpeg_C, lpeg_Carg = lpeg.Cmt, lpeg.C, lpeg.Carg +local lpeg_match = lpeg.match + +M.LEXERPATH = package.path + +-- Table of loaded lexers. +M.lexers = {} + +-- Keep track of the last parent lexer loaded. This lexer's rules are used for +-- proxy lexers (those that load parent and child lexers to embed) that do not +-- declare a parent lexer. +local parent_lexer + +if not package.searchpath then + -- Searches for the given *name* in the given *path*. + -- This is an implementation of Lua 5.2's `package.searchpath()` function for + -- Lua 5.1. + function package.searchpath(name, path) + local tried = {} + for part in path:gmatch('[^;]+') do + local filename = part:gsub('%?', name) + local f = io.open(filename, 'r') + if f then f:close() return filename end + tried[#tried + 1] = ("no file '%s'"):format(filename) + end + return nil, table.concat(tried, '\n') + end +end + +-- Adds a rule to a lexer's current ordered list of rules. +-- @param lexer The lexer to add the given rule to. +-- @param name The name associated with this rule. It is used for other lexers +-- to access this particular rule from the lexer's `_RULES` table. It does not +-- have to be the same as the name passed to `token`. +-- @param rule The LPeg pattern of the rule. +local function add_rule(lexer, id, rule) + if not lexer._RULES then + lexer._RULES = {} + -- Contains an ordered list (by numerical index) of rule names. This is used + -- in conjunction with lexer._RULES for building _TOKENRULE. + lexer._RULEORDER = {} + end + lexer._RULES[id] = rule + lexer._RULEORDER[#lexer._RULEORDER + 1] = id +end + +-- Adds a new Scintilla style to Scintilla. +-- @param lexer The lexer to add the given style to. +-- @param token_name The name of the token associated with this style. +-- @param style A Scintilla style created from `style()`. +-- @see style +local function add_style(lexer, token_name, style) + local num_styles = lexer._numstyles + if num_styles == 32 then num_styles = num_styles + 8 end -- skip predefined + if num_styles >= 255 then print('Too many styles defined (255 MAX)') end + lexer._TOKENSTYLES[token_name], lexer._numstyles = num_styles, num_styles + 1 + lexer._EXTRASTYLES[token_name] = style +end + +-- (Re)constructs `lexer._TOKENRULE`. +-- @param parent The parent lexer. +local function join_tokens(lexer) + local patterns, order = lexer._RULES, lexer._RULEORDER + local token_rule = patterns[order[1]] + for i = 2, #order do token_rule = token_rule + patterns[order[i]] end + lexer._TOKENRULE = token_rule + M.token(M.DEFAULT, M.any) + return lexer._TOKENRULE +end + +-- Adds a given lexer and any of its embedded lexers to a given grammar. +-- @param grammar The grammar to add the lexer to. +-- @param lexer The lexer to add. +local function add_lexer(grammar, lexer, token_rule) + local token_rule = join_tokens(lexer) + local lexer_name = lexer._NAME + for i = 1, #lexer._CHILDREN do + local child = lexer._CHILDREN[i] + if child._CHILDREN then add_lexer(grammar, child) end + local child_name = child._NAME + local rules = child._EMBEDDEDRULES[lexer_name] + local rules_token_rule = grammar['__'..child_name] or rules.token_rule + grammar[child_name] = (-rules.end_rule * rules_token_rule)^0 * + rules.end_rule^-1 * lpeg_V(lexer_name) + local embedded_child = '_'..child_name + grammar[embedded_child] = rules.start_rule * (-rules.end_rule * + rules_token_rule)^0 * rules.end_rule^-1 + token_rule = lpeg_V(embedded_child) + token_rule + end + grammar['__'..lexer_name] = token_rule -- can contain embedded lexer rules + grammar[lexer_name] = token_rule^0 +end + +-- (Re)constructs `lexer._GRAMMAR`. +-- @param lexer The parent lexer. +-- @param initial_rule The name of the rule to start lexing with. The default +-- value is `lexer._NAME`. Multilang lexers use this to start with a child +-- rule if necessary. +local function build_grammar(lexer, initial_rule) + local children = lexer._CHILDREN + if children then + local lexer_name = lexer._NAME + if not initial_rule then initial_rule = lexer_name end + local grammar = {initial_rule} + add_lexer(grammar, lexer) + lexer._INITIALRULE = initial_rule + lexer._GRAMMAR = lpeg_Ct(lpeg_P(grammar)) + else + local function tmout(_, _, t1, redrawtime_max, flag) + if not redrawtime_max or os.clock() - t1 < redrawtime_max then return true end + if flag then flag.timedout = true end + end + local tokens = join_tokens(lexer) + -- every 500 tokens (approx. a screenful), check whether we have exceeded the timeout + lexer._GRAMMAR = lpeg_Ct((tokens * tokens^-500 * lpeg_Cmt(lpeg_Carg(1) * lpeg_Carg(2) * lpeg_Carg(3), tmout))^0) + end +end + +local string_upper = string.upper +-- Default styles. +local default = { + 'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword', + 'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable', + 'function', 'class', 'type', 'label', 'regex', 'embedded' +} +for i = 1, #default do + local name, upper_name = default[i], string_upper(default[i]) + M[upper_name] = name + if not M['STYLE_'..upper_name] then + M['STYLE_'..upper_name] = '' + end +end +-- Predefined styles. +local predefined = { + 'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar', + 'indentguide', 'calltip', 'folddisplaytext' +} +for i = 1, #predefined do + local name, upper_name = predefined[i], string_upper(predefined[i]) + M[upper_name] = name + if not M['STYLE_'..upper_name] then + M['STYLE_'..upper_name] = '' + end +end + +--- +-- Initializes or loads and returns the lexer of string name *name*. +-- Scintilla calls this function in order to load a lexer. Parent lexers also +-- call this function in order to load child lexers and vice-versa. The user +-- calls this function in order to load a lexer when using Scintillua as a Lua +-- library. +-- @param name The name of the lexing language. +-- @param alt_name The alternate name of the lexing language. This is useful for +-- embedding the same child lexer with multiple sets of start and end tokens. +-- @param cache Flag indicating whether or not to load lexers from the cache. +-- This should only be `true` when initially loading a lexer (e.g. not from +-- within another lexer for embedding purposes). +-- The default value is `false`. +-- @return lexer object +-- @name load +function M.load(name, alt_name, cache) + if cache and M.lexers[alt_name or name] then return M.lexers[alt_name or name] end + parent_lexer = nil -- reset + + -- When using Scintillua as a stand-alone module, the `property` and + -- `property_int` tables do not exist (they are not useful). Create them to + -- prevent errors from occurring. + if not M.property then + M.property, M.property_int = {}, setmetatable({}, { + __index = function(t, k) return tonumber(M.property[k]) or 0 end, + __newindex = function() error('read-only property') end + }) + end + + -- Load the language lexer with its rules, styles, etc. + M.WHITESPACE = (alt_name or name)..'_whitespace' + local lexer_file, error = package.searchpath('lexers/'..name, M.LEXERPATH) + local ok, lexer = pcall(dofile, lexer_file or '') + if not ok then + return nil + end + if alt_name then lexer._NAME = alt_name end + + -- Create the initial maps for token names to style numbers and styles. + local token_styles = {} + for i = 1, #default do token_styles[default[i]] = i - 1 end + for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end + lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default + lexer._EXTRASTYLES = {} + + -- If the lexer is a proxy (loads parent and child lexers to embed) and does + -- not declare a parent, try and find one and use its rules. + if not lexer._rules and not lexer._lexer then lexer._lexer = parent_lexer end + + -- If the lexer is a proxy or a child that embedded itself, add its rules and + -- styles to the parent lexer. Then set the parent to be the main lexer. + if lexer._lexer then + local l, _r, _s = lexer._lexer, lexer._rules, lexer._tokenstyles + if not l._tokenstyles then l._tokenstyles = {} end + if _r then + for i = 1, #_r do + -- Prevent rule id clashes. + l._rules[#l._rules + 1] = {lexer._NAME..'_'.._r[i][1], _r[i][2]} + end + end + if _s then + for token, style in pairs(_s) do l._tokenstyles[token] = style end + end + lexer = l + end + + -- Add the lexer's styles and build its grammar. + if lexer._rules then + if lexer._tokenstyles then + for token, style in pairs(lexer._tokenstyles) do + add_style(lexer, token, style) + end + end + for i = 1, #lexer._rules do + add_rule(lexer, lexer._rules[i][1], lexer._rules[i][2]) + end + build_grammar(lexer) + end + -- Add the lexer's unique whitespace style. + add_style(lexer, lexer._NAME..'_whitespace', M.STYLE_WHITESPACE) + + -- Process the lexer's fold symbols. + if lexer._foldsymbols and lexer._foldsymbols._patterns then + local patterns = lexer._foldsymbols._patterns + for i = 1, #patterns do patterns[i] = '()('..patterns[i]..')' end + end + + lexer.lex, lexer.fold = M.lex, M.fold + M.lexers[alt_name or name] = lexer + return lexer +end + +--- +-- Lexes a chunk of text *text* (that has an initial style number of +-- *init_style*) with lexer *lexer*. +-- If *lexer* has a `_LEXBYLINE` flag set, the text is lexed one line at a time. +-- Otherwise the text is lexed as a whole. +-- @param lexer The lexer object to lex with. +-- @param text The text in the buffer to lex. +-- @param init_style The current style. Multiple-language lexers use this to +-- determine which language to start lexing in. +-- @param redrawtime_max Stop lexing after that many seconds and set the second return value (timedout) to true. +-- @param init Start lexing from this offset in *text* (default is 1). +-- @return table of token names and positions. +-- @return whether the lexing timed out. +-- @name lex +function M.lex(lexer, text, init_style, redrawtime_max, init) + if not lexer._GRAMMAR then return {M.DEFAULT, #text + 1} end + if not lexer._LEXBYLINE then + -- For multilang lexers, build a new grammar whose initial_rule is the + -- current language. + if lexer._CHILDREN then + for style, style_num in pairs(lexer._TOKENSTYLES) do + if style_num == init_style then + local lexer_name = style:match('^(.+)_whitespace') or lexer._NAME + if lexer._INITIALRULE ~= lexer_name then + build_grammar(lexer, lexer_name) + end + break + end + end + end + local flag = {} + return lpeg_match(lexer._GRAMMAR, text, init, os.clock(), redrawtime_max, flag), flag.timedout + else + local tokens = {} + local function append(tokens, line_tokens, offset) + for i = 1, #line_tokens, 2 do + tokens[#tokens + 1] = line_tokens[i] + tokens[#tokens + 1] = line_tokens[i + 1] + offset + end + end + local offset = 0 + local grammar = lexer._GRAMMAR + local flag = {} + for line in text:gmatch('[^\r\n]*\r?\n?') do + local line_tokens = lpeg_match(grammar, line, init, os.clock(), redrawtime_max, flag) + if line_tokens then append(tokens, line_tokens, offset) end + offset = offset + #line + -- Use the default style to the end of the line if none was specified. + if tokens[#tokens] ~= offset then + tokens[#tokens + 1], tokens[#tokens + 2] = 'default', offset + 1 + end + end + return tokens, flag.timedout + end +end + +--- +-- Determines fold points in a chunk of text *text* with lexer *lexer*. +-- *text* starts at position *start_pos* on line number *start_line* with a +-- beginning fold level of *start_level* in the buffer. If *lexer* has a `_fold` +-- function or a `_foldsymbols` table, that field is used to perform folding. +-- Otherwise, if *lexer* has a `_FOLDBYINDENTATION` field set, or if a +-- `fold.by.indentation` property is set, folding by indentation is done. +-- @param lexer The lexer object to fold with. +-- @param text The text in the buffer to fold. +-- @param start_pos The position in the buffer *text* starts at, starting at +-- zero. +-- @param start_line The line number *text* starts on. +-- @param start_level The fold level *text* starts on. +-- @return table of fold levels. +-- @name fold +function M.fold(lexer, text, start_pos, start_line, start_level) + local folds = {} + if text == '' then return folds end + local fold = M.property_int['fold'] > 0 + local FOLD_BASE = M.FOLD_BASE + local FOLD_HEADER, FOLD_BLANK = M.FOLD_HEADER, M.FOLD_BLANK + if fold and lexer._fold then + return lexer._fold(text, start_pos, start_line, start_level) + elseif fold and lexer._foldsymbols then + local lines = {} + for p, l in (text..'\n'):gmatch('()(.-)\r?\n') do + lines[#lines + 1] = {p, l} + end + local fold_zero_sum_lines = M.property_int['fold.on.zero.sum.lines'] > 0 + local fold_symbols = lexer._foldsymbols + local fold_symbols_patterns = fold_symbols._patterns + local fold_symbols_case_insensitive = fold_symbols._case_insensitive + local style_at, fold_level = M.style_at, M.fold_level + local line_num, prev_level = start_line, start_level + local current_level = prev_level + for i = 1, #lines do + local pos, line = lines[i][1], lines[i][2] + if line ~= '' then + if fold_symbols_case_insensitive then line = line:lower() end + local level_decreased = false + for j = 1, #fold_symbols_patterns do + for s, match in line:gmatch(fold_symbols_patterns[j]) do + local symbols = fold_symbols[style_at[start_pos + pos + s - 1]] + local l = symbols and symbols[match] + if type(l) == 'function' then l = l(text, pos, line, s, match) end + if type(l) == 'number' then + current_level = current_level + l + if l < 0 and current_level < prev_level then + -- Potential zero-sum line. If the level were to go back up on + -- the same line, the line may be marked as a fold header. + level_decreased = true + end + end + end + end + folds[line_num] = prev_level + if current_level > prev_level then + folds[line_num] = prev_level + FOLD_HEADER + elseif level_decreased and current_level == prev_level and + fold_zero_sum_lines then + if line_num > start_line then + folds[line_num] = prev_level - 1 + FOLD_HEADER + else + -- Typing within a zero-sum line. + local level = fold_level[line_num - 1] - 1 + if level > FOLD_HEADER then level = level - FOLD_HEADER end + if level > FOLD_BLANK then level = level - FOLD_BLANK end + folds[line_num] = level + FOLD_HEADER + current_level = current_level + 1 + end + end + if current_level < FOLD_BASE then current_level = FOLD_BASE end + prev_level = current_level + else + folds[line_num] = prev_level + FOLD_BLANK + end + line_num = line_num + 1 + end + elseif fold and (lexer._FOLDBYINDENTATION or + M.property_int['fold.by.indentation'] > 0) then + -- Indentation based folding. + -- Calculate indentation per line. + local indentation = {} + for indent, line in (text..'\n'):gmatch('([\t ]*)([^\r\n]*)\r?\n') do + indentation[#indentation + 1] = line ~= '' and #indent + end + -- Find the first non-blank line before start_line. If the current line is + -- indented, make that previous line a header and update the levels of any + -- blank lines inbetween. If the current line is blank, match the level of + -- the previous non-blank line. + local current_level = start_level + for i = start_line - 1, 0, -1 do + local level = M.fold_level[i] + if level >= FOLD_HEADER then level = level - FOLD_HEADER end + if level < FOLD_BLANK then + local indent = M.indent_amount[i] + if indentation[1] and indentation[1] > indent then + folds[i] = FOLD_BASE + indent + FOLD_HEADER + for j = i + 1, start_line - 1 do + folds[j] = start_level + FOLD_BLANK + end + elseif not indentation[1] then + current_level = FOLD_BASE + indent + end + break + end + end + -- Iterate over lines, setting fold numbers and fold flags. + for i = 1, #indentation do + if indentation[i] then + current_level = FOLD_BASE + indentation[i] + folds[start_line + i - 1] = current_level + for j = i + 1, #indentation do + if indentation[j] then + if FOLD_BASE + indentation[j] > current_level then + folds[start_line + i - 1] = current_level + FOLD_HEADER + current_level = FOLD_BASE + indentation[j] -- for any blanks below + end + break + end + end + else + folds[start_line + i - 1] = current_level + FOLD_BLANK + end + end + else + -- No folding, reset fold levels if necessary. + local current_line = start_line + for _ in text:gmatch('\r?\n') do + folds[current_line] = start_level + current_line = current_line + 1 + end + end + return folds +end + +-- The following are utility functions lexers will have access to. + +-- Common patterns. +M.any = lpeg_P(1) +M.ascii = lpeg_R('\000\127') +M.extend = lpeg_R('\000\255') +M.alpha = lpeg_R('AZ', 'az') +M.digit = lpeg_R('09') +M.alnum = lpeg_R('AZ', 'az', '09') +M.lower = lpeg_R('az') +M.upper = lpeg_R('AZ') +M.xdigit = lpeg_R('09', 'AF', 'af') +M.cntrl = lpeg_R('\000\031') +M.graph = lpeg_R('!~') +M.print = lpeg_R(' ~') +M.punct = lpeg_R('!/', ':@', '[\'', '{~') +M.space = lpeg_S('\t\v\f\n\r ') + +M.newline = lpeg_S('\r\n\f')^1 +M.nonnewline = 1 - M.newline +M.nonnewline_esc = 1 - (M.newline + '\\') + '\\' * M.any + +M.dec_num = M.digit^1 +M.hex_num = '0' * lpeg_S('xX') * M.xdigit^1 +M.oct_num = '0' * lpeg_R('07')^1 +M.integer = lpeg_S('+-')^-1 * (M.hex_num + M.oct_num + M.dec_num) +M.float = lpeg_S('+-')^-1 * + ((M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0) * + (lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 + + (M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)) + +M.word = (M.alpha + '_') * (M.alnum + '_')^0 + +--- +-- Creates and returns a token pattern with token name *name* and pattern +-- *patt*. +-- If *name* is not a predefined token name, its style must be defined in the +-- lexer's `_tokenstyles` table. +-- @param name The name of token. If this name is not a predefined token name, +-- then a style needs to be assiciated with it in the lexer's `_tokenstyles` +-- table. +-- @param patt The LPeg pattern associated with the token. +-- @return pattern +-- @usage local ws = token(l.WHITESPACE, l.space^1) +-- @usage local annotation = token('annotation', '@' * l.word) +-- @name token +function M.token(name, patt) + return lpeg_Cc(name) * patt * lpeg_Cp() +end + +--- +-- Creates and returns a pattern that matches a range of text bounded by +-- *chars* characters. +-- This is a convenience function for matching more complicated delimited ranges +-- like strings with escape characters and balanced parentheses. *single_line* +-- indicates whether or not the range must be on a single line, *no_escape* +-- indicates whether or not to ignore '\' as an escape character, and *balanced* +-- indicates whether or not to handle balanced ranges like parentheses and +-- requires *chars* to be composed of two characters. +-- @param chars The character(s) that bound the matched range. +-- @param single_line Optional flag indicating whether or not the range must be +-- on a single line. +-- @param no_escape Optional flag indicating whether or not the range end +-- character may be escaped by a '\\' character. +-- @param balanced Optional flag indicating whether or not to match a balanced +-- range, like the "%b" Lua pattern. This flag only applies if *chars* +-- consists of two different characters (e.g. "()"). +-- @return pattern +-- @usage local dq_str_escapes = l.delimited_range('"') +-- @usage local dq_str_noescapes = l.delimited_range('"', false, true) +-- @usage local unbalanced_parens = l.delimited_range('()') +-- @usage local balanced_parens = l.delimited_range('()', false, false, true) +-- @see nested_pair +-- @name delimited_range +function M.delimited_range(chars, single_line, no_escape, balanced) + local s = chars:sub(1, 1) + local e = #chars == 2 and chars:sub(2, 2) or s + local range + local b = balanced and s or '' + local n = single_line and '\n' or '' + if no_escape then + local invalid = lpeg_S(e..n..b) + range = M.any - invalid + else + local invalid = lpeg_S(e..n..b) + '\\' + range = M.any - invalid + '\\' * M.any + end + if balanced and s ~= e then + return lpeg_P{s * (range + lpeg_V(1))^0 * e} + else + return s * range^0 * lpeg_P(e)^-1 + end +end + +--- +-- Creates and returns a pattern that matches pattern *patt* only at the +-- beginning of a line. +-- @param patt The LPeg pattern to match on the beginning of a line. +-- @return pattern +-- @usage local preproc = token(l.PREPROCESSOR, l.starts_line('#') * +-- l.nonnewline^0) +-- @name starts_line +function M.starts_line(patt) + return lpeg_Cmt(lpeg_C(patt), function(input, index, match, ...) + local pos = index - #match + if pos == 1 then return index, ... end + local char = input:sub(pos - 1, pos - 1) + if char == '\n' or char == '\r' or char == '\f' then return index, ... end + end) +end + +--- +-- Creates and returns a pattern that verifies that string set *s* contains the +-- first non-whitespace character behind the current match position. +-- @param s String character set like one passed to `lpeg.S()`. +-- @return pattern +-- @usage local regex = l.last_char_includes('+-*!%^&|=,([{') * +-- l.delimited_range('/') +-- @name last_char_includes +function M.last_char_includes(s) + s = '['..s:gsub('[-%%%[]', '%%%1')..']' + return lpeg_P(function(input, index) + if index == 1 then return index end + local i = index + while input:sub(i - 1, i - 1):match('[ \t\r\n\f]') do i = i - 1 end + if input:sub(i - 1, i - 1):match(s) then return index end + end) +end + +--- +-- Returns a pattern that matches a balanced range of text that starts with +-- string *start_chars* and ends with string *end_chars*. +-- With single-character delimiters, this function is identical to +-- `delimited_range(start_chars..end_chars, false, true, true)`. +-- @param start_chars The string starting a nested sequence. +-- @param end_chars The string ending a nested sequence. +-- @return pattern +-- @usage local nested_comment = l.nested_pair('/*', '*/') +-- @see delimited_range +-- @name nested_pair +function M.nested_pair(start_chars, end_chars) + local s, e = start_chars, lpeg_P(end_chars)^-1 + return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e} +end + +--- +-- Creates and returns a pattern that matches any single word in list *words*. +-- Words consist of alphanumeric and underscore characters, as well as the +-- characters in string set *word_chars*. *case_insensitive* indicates whether +-- or not to ignore case when matching words. +-- This is a convenience function for simplifying a set of ordered choice word +-- patterns. +-- @param words A table of words. +-- @param word_chars Optional string of additional characters considered to be +-- part of a word. By default, word characters are alphanumerics and +-- underscores ("%w_" in Lua). This parameter may be `nil` or the empty string +-- in order to indicate no additional word characters. +-- @param case_insensitive Optional boolean flag indicating whether or not the +-- word match is case-insensitive. The default is `false`. +-- @return pattern +-- @usage local keyword = token(l.KEYWORD, word_match{'foo', 'bar', 'baz'}) +-- @usage local keyword = token(l.KEYWORD, word_match({'foo-bar', 'foo-baz', +-- 'bar-foo', 'bar-baz', 'baz-foo', 'baz-bar'}, '-', true)) +-- @name word_match +function M.word_match(words, word_chars, case_insensitive) + local word_list = {} + for i = 1, #words do + word_list[case_insensitive and words[i]:lower() or words[i]] = true + end + local chars = M.alnum + '_' + if word_chars then chars = chars + lpeg_S(word_chars) end + return lpeg_Cmt(chars^1, function(input, index, word) + if case_insensitive then word = word:lower() end + return word_list[word] and index or nil + end) +end + +--- +-- Embeds child lexer *child* in parent lexer *parent* using patterns +-- *start_rule* and *end_rule*, which signal the beginning and end of the +-- embedded lexer, respectively. +-- @param parent The parent lexer. +-- @param child The child lexer. +-- @param start_rule The pattern that signals the beginning of the embedded +-- lexer. +-- @param end_rule The pattern that signals the end of the embedded lexer. +-- @usage l.embed_lexer(M, css, css_start_rule, css_end_rule) +-- @usage l.embed_lexer(html, M, php_start_rule, php_end_rule) +-- @usage l.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule) +-- @name embed_lexer +function M.embed_lexer(parent, child, start_rule, end_rule) + -- Add child rules. + if not child._EMBEDDEDRULES then child._EMBEDDEDRULES = {} end + if not child._RULES then -- creating a child lexer to be embedded + if not child._rules then error('Cannot embed language with no rules') end + for i = 1, #child._rules do + add_rule(child, child._rules[i][1], child._rules[i][2]) + end + end + child._EMBEDDEDRULES[parent._NAME] = { + ['start_rule'] = start_rule, + token_rule = join_tokens(child), + ['end_rule'] = end_rule + } + if not parent._CHILDREN then parent._CHILDREN = {} end + local children = parent._CHILDREN + children[#children + 1] = child + -- Add child styles. + if not parent._tokenstyles then parent._tokenstyles = {} end + local tokenstyles = parent._tokenstyles + tokenstyles[child._NAME..'_whitespace'] = M.STYLE_WHITESPACE + if child._tokenstyles then + for token, style in pairs(child._tokenstyles) do + tokenstyles[token] = style + end + end + -- Add child fold symbols. + if not parent._foldsymbols then parent._foldsymbols = {} end + if child._foldsymbols then + for token, symbols in pairs(child._foldsymbols) do + if not parent._foldsymbols[token] then parent._foldsymbols[token] = {} end + for k, v in pairs(symbols) do + if type(k) == 'number' then + parent._foldsymbols[token][#parent._foldsymbols[token] + 1] = v + elseif not parent._foldsymbols[token][k] then + parent._foldsymbols[token][k] = v + end + end + end + end + child._lexer = parent -- use parent's tokens if child is embedding itself + parent_lexer = parent -- use parent's tokens if the calling lexer is a proxy +end + +-- Determines if the previous line is a comment. +-- This is used for determining if the current comment line is a fold point. +-- @param prefix The prefix string defining a comment. +-- @param text The text passed to a fold function. +-- @param pos The pos passed to a fold function. +-- @param line The line passed to a fold function. +-- @param s The s passed to a fold function. +local function prev_line_is_comment(prefix, text, pos, line, s) + local start = line:find('%S') + if start < s and not line:find(prefix, start, true) then return false end + local p = pos - 1 + if text:sub(p, p) == '\n' then + p = p - 1 + if text:sub(p, p) == '\r' then p = p - 1 end + if text:sub(p, p) ~= '\n' then + while p > 1 and text:sub(p - 1, p - 1) ~= '\n' do p = p - 1 end + while text:sub(p, p):find('^[\t ]$') do p = p + 1 end + return text:sub(p, p + #prefix - 1) == prefix + end + end + return false +end + +-- Determines if the next line is a comment. +-- This is used for determining if the current comment line is a fold point. +-- @param prefix The prefix string defining a comment. +-- @param text The text passed to a fold function. +-- @param pos The pos passed to a fold function. +-- @param line The line passed to a fold function. +-- @param s The s passed to a fold function. +local function next_line_is_comment(prefix, text, pos, line, s) + local p = text:find('\n', pos + s) + if p then + p = p + 1 + while text:sub(p, p):find('^[\t ]$') do p = p + 1 end + return text:sub(p, p + #prefix - 1) == prefix + end + return false +end + +--- +-- Returns a fold function (to be used within the lexer's `_foldsymbols` table) +-- that folds consecutive line comments that start with string *prefix*. +-- @param prefix The prefix string defining a line comment. +-- @usage [l.COMMENT] = {['--'] = l.fold_line_comments('--')} +-- @usage [l.COMMENT] = {['//'] = l.fold_line_comments('//')} +-- @name fold_line_comments +function M.fold_line_comments(prefix) + local property_int = M.property_int + return function(text, pos, line, s) + if property_int['fold.line.comments'] == 0 then return 0 end + if s > 1 and line:match('^%s*()') < s then return 0 end + local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s) + local next_line_comment = next_line_is_comment(prefix, text, pos, line, s) + if not prev_line_comment and next_line_comment then return 1 end + if prev_line_comment and not next_line_comment then return -1 end + return 0 + end +end + +M.property_expanded = setmetatable({}, { + -- Returns the string property value associated with string property *key*, + -- replacing any "$()" and "%()" expressions with the values of their keys. + __index = function(t, key) + return M.property[key]:gsub('[$%%]%b()', function(key) + return t[key:sub(3, -2)] + end) + end, + __newindex = function() error('read-only property') end +}) + +--[[ The functions and fields below were defined in C. + +--- +-- Returns the line number of the line that contains position *pos*, which +-- starts from 1. +-- @param pos The position to get the line number of. +-- @return number +local function line_from_position(pos) end + +--- +-- Individual fields for a lexer instance. +-- @field _NAME The string name of the lexer. +-- @field _rules An ordered list of rules for a lexer grammar. +-- Each rule is a table containing an arbitrary rule name and the LPeg pattern +-- associated with the rule. The order of rules is important, as rules are +-- matched sequentially. +-- Child lexers should not use this table to access and/or modify their +-- parent's rules and vice-versa. Use the `_RULES` table instead. +-- @field _tokenstyles A map of non-predefined token names to styles. +-- Remember to use token names, not rule names. It is recommended to use +-- predefined styles or color-agnostic styles derived from predefined styles +-- to ensure compatibility with user color themes. +-- @field _foldsymbols A table of recognized fold points for the lexer. +-- Keys are token names with table values defining fold points. Those table +-- values have string keys of keywords or characters that indicate a fold +-- point whose values are integers. A value of `1` indicates a beginning fold +-- point and a value of `-1` indicates an ending fold point. Values can also +-- be functions that return `1`, `-1`, or `0` (indicating no fold point) for +-- keys which need additional processing. +-- There is also a required `_patterns` key whose value is a table containing +-- Lua pattern strings that match all fold points (the string keys contained +-- in token name table values). When the lexer encounters text that matches +-- one of those patterns, the matched text is looked up in its token's table +-- to determine whether or not it is a fold point. +-- There is also an optional `_case_insensitive` option that indicates whether +-- or not fold point keys are case-insensitive. If `true`, fold point keys +-- should be in lower case. +-- @field _fold If this function exists in the lexer, it is called for folding +-- the document instead of using `_foldsymbols` or indentation. +-- @field _lexer The parent lexer object whose rules should be used. This field +-- is only necessary to disambiguate a proxy lexer that loaded parent and +-- child lexers for embedding and ended up having multiple parents loaded. +-- @field _RULES A map of rule name keys with their associated LPeg pattern +-- values for the lexer. +-- This is constructed from the lexer's `_rules` table and accessible to other +-- lexers for embedded lexer applications like modifying parent or child +-- rules. +-- @field _LEXBYLINE Indicates the lexer can only process one whole line of text +-- (instead of an arbitrary chunk of text) at a time. +-- The default value is `false`. Line lexers cannot look ahead to subsequent +-- lines. +-- @field _FOLDBYINDENTATION Declares the lexer does not define fold points and +-- that fold points should be calculated based on changes in indentation. +-- @class table +-- @name lexer +local lexer +]] + +return M diff --git a/lua/lexers/actionscript.lua b/lua/lexers/actionscript.lua @@ -0,0 +1,75 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Actionscript LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'actionscript'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local ml_str = '<![CDATA[' * (l.any - ']]>')^0 * ']]>' +local string = token(l.STRING, sq_str + dq_str + ml_str) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'break', 'continue', 'delete', 'do', 'else', 'for', 'function', 'if', 'in', + 'new', 'on', 'return', 'this', 'typeof', 'var', 'void', 'while', 'with', + 'NaN', 'Infinity', 'false', 'null', 'true', 'undefined', + -- Reserved for future use. + 'abstract', 'case', 'catch', 'class', 'const', 'debugger', 'default', + 'export', 'extends', 'final', 'finally', 'goto', 'implements', 'import', + 'instanceof', 'interface', 'native', 'package', 'private', 'Void', + 'protected', 'public', 'dynamic', 'static', 'super', 'switch', 'synchonized', + 'throw', 'throws', 'transient', 'try', 'volatile' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'Array', 'Boolean', 'Color', 'Date', 'Function', 'Key', 'MovieClip', 'Math', + 'Mouse', 'Number', 'Object', 'Selection', 'Sound', 'String', 'XML', 'XMLNode', + 'XMLSocket', + -- Reserved for future use. + 'boolean', 'byte', 'char', 'double', 'enum', 'float', 'int', 'long', 'short' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[{}]', '/%*', '%*/', '//', '<!%[CDATA%[', '%]%]>'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = { + ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//') + }, + [l.STRING] = {['<![CDATA['] = 1, [']]>'] = -1} +} + +return M diff --git a/lua/lexers/ada.lua b/lua/lexers/ada.lua @@ -0,0 +1,68 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Ada LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'ada'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '--' * l.nonnewline^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"', true, true)) + +-- Numbers. +local hex_num = 'O' * S('xX') * (l.xdigit + '_')^1 +local integer = l.digit^1 * ('_' * l.digit^1)^0 +local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer +local number = token(l.NUMBER, hex_num + S('+-')^-1 * (float + integer) * + S('LlUuFf')^-3) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abort', 'abs', 'accept', 'all', 'and', 'begin', 'body', 'case', 'declare', + 'delay', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'for', + 'generic', 'goto', 'if', 'in', 'is', 'loop', 'mod', 'new', 'not', 'null', + 'or', 'others', 'out', 'protected', 'raise', 'record', 'rem', 'renames', + 'requeue', 'reverse', 'select', 'separate', 'subtype', 'task', 'terminate', + 'then', 'type', 'until', 'when', 'while', 'xor', + -- Preprocessor. + 'package', 'pragma', 'use', 'with', + -- Function + 'function', 'procedure', 'return', + -- Storage class. + 'abstract', 'access', 'aliased', 'array', 'at', 'constant', 'delta', 'digits', + 'interface', 'limited', 'of', 'private', 'range', 'tagged', 'synchronized', + -- Boolean. + 'true', 'false' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'boolean', 'character', 'count', 'duration', 'float', 'integer', 'long_float', + 'long_integer', 'priority', 'short_float', 'short_integer', 'string' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S(':;=<>&+-*/.()')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/ansi_c.lua b/lua/lexers/ansi_c.lua @@ -0,0 +1,154 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- C LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'ansi_c'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +-- local preproc_ifzero = l.starts_line('#if') * S(' \t')^0 * '0' * l.space * +-- (l.starts_line('#endif')) +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = P('L')^-1 * l.delimited_range("'", true) +local dq_str = P('L')^-1 * l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local float_suffix = P('f')^-1 +local integer_suffix = (S('uU')^-1 * word_match{ 'l', 'L', 'll', 'LL' }^-1) + + (word_match{ 'l', 'L', 'll', 'LL' }^-1 * S('uU')^-1) +local number = token(l.NUMBER, (l.float * float_suffix) + + (l.integer * integer_suffix)) + +-- Preprocessor. +local preproc_word = word_match{ + 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'line', + 'pragma', 'undef', 'warning' +} + +local preproc = #l.starts_line('#') * + (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + + token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (token(l.WHITESPACE, S('\t ')^0) * + token(l.STRING, l.delimited_range('<>', true, true)))^-1) + +-- Keywords. +local storage_class = word_match{ + -- C11 6.7.1 + 'typedef', 'extern', 'static', '_Thread_local', 'auto', 'register', +} + +local type_qualifier = word_match{ + -- C11 6.7.3 + 'const', 'restrict', 'volatile', '_Atomic', +} + +local function_specifier = word_match{ + -- C11 6.7.4 + 'inline', '_Noreturn', +} + +local extra_keywords = word_match{ + 'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__', + '__attribute__', '__declspec' +} + +local keyword = token(l.KEYWORD, word_match{ + 'break', 'case', 'continue', 'default', 'do', 'else', 'enum', 'for', 'goto', + 'if', 'return', 'switch', 'while', + '_Alignas', '_Generic', '_Static_assert', +} + storage_class + type_qualifier + function_specifier + extra_keywords) + +-- Constants. +local errno = word_match{ + -- http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + 'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', + 'EAGAIN', 'EALREADY', 'EBADF', 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD', + 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET', 'EDEADLK', 'EDESTADDRREQ', + 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM', + 'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR', + 'ELOOP', 'EMFILE', 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG', + 'ENETDOWN', 'ENETRESET', 'ENETUNREACH', 'ENFILE', 'ENOBUFS', 'ENODATA', + 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM', + 'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS', + 'ENOTCONN', 'ENOTDIR', 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK', + 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP', 'EOVERFLOW', 'EOWNERDEAD', + 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE', + 'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY', + 'EWOULDBLOCK', 'EXDEV', +} + +local preproc_macros = word_match{ + -- C11 6.10.8.1 Mandatory macros + '__DATE__', '__FILE__', '__LINE__', '__TIME__', + -- C11 6.4.2.2 Predefined identifiers + '__func__', +} + +local constant = token(l.CONSTANT, word_match{ + 'true', 'false', + 'NULL', 'CHAR_BIT', 'SIZE_MAX', } + + ((P('WINT') + P('WCHAR') + P('SIG_ATOMIC') + P('PTRDIFF')) * (P('_MIN') + P('_MAX'))) + + ( P('INT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * (P('_MIN') + P('_MAX'))) + + (P('UINT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * P('_MAX')) + + errno + preproc_macros +) + +-- Types. +local type = token(l.TYPE, word_match{ + 'bool', 'char', 'double', 'float', 'int', 'long', 'short', + 'signed', 'struct', 'union', 'unsigned', 'void', '_Bool', '_Complex', + '_Imaginary', 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t', + 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'} + + (P('u')^-1 * P('int') * (P('_least') + P('_fast'))^-1 * l.dec_num^1 * P('_t')) + + (S('usif') * l.dec_num^1 * P('_t')) + + (P('__')^-1 * S('usif') * l.dec_num^1) +) + +-- Labels. +-- FIXME: Accept whitespace before label. +local label = token(l.LABEL, l.starts_line(l.word * ':')) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, + S('+-/*%<>~!=^&|?~:;,.()[]{}') + + word_match{ 'sizeof', '_Alignof' } +) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'keyword', keyword}, + {'type', type}, + {'constant', constant}, + {'operator', operator}, + {'label', label}, + {'identifier', identifier}, + {'string', string}, + {'number', number}, + {'preproc', preproc}, +} + +M._foldsymbols = { + _patterns = {'#?%l+', '[{}]', '/%*', '%*/', '//'}, + [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = { + ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//'), + ['#if'] = 1, ['#endif'] = -1 + } +} + +return M diff --git a/lua/lexers/antlr.lua b/lua/lexers/antlr.lua @@ -0,0 +1,74 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- ANTLR LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'antlr'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local string = token(l.STRING, l.delimited_range("'", true)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', + 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof', + 'native', 'new', 'private', 'protected', 'public', 'return', 'static', + 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile', + 'while', 'package', 'import', 'header', 'options', 'tokens', 'strictfp', + 'false', 'null', 'super', 'this', 'true' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface', + 'long', 'short', 'void' +}) + +-- Functions. +local func = token(l.FUNCTION, 'assert') + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}')) + +-- Actions. +local action = #P('{') * operator * token('action', (1 - P('}'))^0) * + (#P('}') * operator)^-1 + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'action', action}, + {'operator', operator}, +} + +M._tokenstyles = { + action = l.STYLE_NOTHING +} + +M._foldsymbols = { + _patterns = {'[:;%(%){}]', '/%*', '%*/', '//'}, + [l.OPERATOR] = { + [':'] = 1, [';'] = -1, ['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1 + }, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/apdl.lua b/lua/lexers/apdl.lua @@ -0,0 +1,102 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- APDL LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'apdl'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '!' * l.nonnewline^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range("'", true, true)) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + '*abbr', '*abb', '*afun', '*afu', '*ask', '*cfclos', '*cfc', '*cfopen', + '*cfo', '*cfwrite', '*cfw', '*create', '*cre', '*cycle', '*cyc', '*del', + '*dim', '*do', '*elseif', '*else', '*enddo', '*endif', '*end', '*eval', + '*eva', '*exit', '*exi', '*get', '*go', '*if', '*list', '*lis', '*mfouri', + '*mfo', '*mfun', '*mfu', '*mooney', '*moo', '*moper', '*mop', '*msg', + '*repeat', '*rep', '*set', '*status', '*sta', '*tread', '*tre', '*ulib', + '*uli', '*use', '*vabs', '*vab', '*vcol', '*vco', '*vcum', '*vcu', '*vedit', + '*ved', '*vfact', '*vfa', '*vfill', '*vfi', '*vfun', '*vfu', '*vget', '*vge', + '*vitrp', '*vit', '*vlen', '*vle', '*vmask', '*vma', '*voper', '*vop', + '*vplot', '*vpl', '*vput', '*vpu', '*vread', '*vre', '*vscfun', '*vsc', + '*vstat', '*vst', '*vwrite', '*vwr', '/anfile', '/anf', '/angle', '/ang', + '/annot', '/ann', '/anum', '/anu', '/assign', '/ass', '/auto', '/aut', + '/aux15', '/aux2', '/aux', '/axlab', '/axl', '/batch', '/bat', '/clabel', + '/cla', '/clear', '/cle', '/clog', '/clo', '/cmap', '/cma', '/color', '/col', + '/com', '/config', '/contour', '/con', '/copy', '/cop', '/cplane', '/cpl', + '/ctype', '/cty', '/cval', '/cva', '/delete', '/del', '/devdisp', '/device', + '/dev', '/dist', '/dis', '/dscale', '/dsc', '/dv3d', '/dv3', '/edge', '/edg', + '/efacet', '/efa', '/eof', '/erase', '/era', '/eshape', '/esh', '/exit', + '/exi', '/expand', '/exp', '/facet', '/fac', '/fdele', '/fde', '/filname', + '/fil', '/focus', '/foc', '/format', '/for', '/ftype', '/fty', '/gcmd', + '/gcm', '/gcolumn', '/gco', '/gfile', '/gfi', '/gformat', '/gfo', '/gline', + '/gli', '/gmarker', '/gma', '/golist', '/gol', '/gopr', '/gop', '/go', + '/graphics', '/gra', '/gresume', '/gre', '/grid', '/gri', '/gropt', '/gro', + '/grtyp', '/grt', '/gsave', '/gsa', '/gst', '/gthk', '/gth', '/gtype', '/gty', + '/header', '/hea', '/input', '/inp', '/larc', '/lar', '/light', '/lig', + '/line', '/lin', '/lspec', '/lsp', '/lsymbol', '/lsy', '/menu', '/men', + '/mplib', '/mpl', '/mrep', '/mre', '/mstart', '/mst', '/nerr', '/ner', + '/noerase', '/noe', '/nolist', '/nol', '/nopr', '/nop', '/normal', '/nor', + '/number', '/num', '/opt', '/output', '/out', '/page', '/pag', '/pbc', '/pbf', + '/pcircle', '/pci', '/pcopy', '/pco', '/plopts', '/plo', '/pmacro', '/pma', + '/pmeth', '/pme', '/pmore', '/pmo', '/pnum', '/pnu', '/polygon', '/pol', + '/post26', '/post1', '/pos', '/prep7', '/pre', '/psearch', '/pse', '/psf', + '/pspec', '/psp', '/pstatus', '/pst', '/psymb', '/psy', '/pwedge', '/pwe', + '/quit', '/qui', '/ratio', '/rat', '/rename', '/ren', '/replot', '/rep', + '/reset', '/res', '/rgb', '/runst', '/run', '/seclib', '/sec', '/seg', + '/shade', '/sha', '/showdisp', '/show', '/sho', '/shrink', '/shr', '/solu', + '/sol', '/sscale', '/ssc', '/status', '/sta', '/stitle', '/sti', '/syp', + '/sys', '/title', '/tit', '/tlabel', '/tla', '/triad', '/tri', '/trlcy', + '/trl', '/tspec', '/tsp', '/type', '/typ', '/ucmd', '/ucm', '/uis', '/ui', + '/units', '/uni', '/user', '/use', '/vcone', '/vco', '/view', '/vie', + '/vscale', '/vsc', '/vup', '/wait', '/wai', '/window', '/win', '/xrange', + '/xra', '/yrange', '/yra', '/zoom', '/zoo' +}, '*/', true)) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Functions. +local func = token(l.FUNCTION, l.delimited_range('%', true, true)) + +-- Operators. +local operator = token(l.OPERATOR, S('+-*/$=,;()')) + +-- Labels. +local label = token(l.LABEL, l.starts_line(':') * l.word) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'identifier', identifier}, + {'string', string}, + {'number', number}, + {'function', func}, + {'label', label}, + {'comment', comment}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'%*[A-Za-z]+', '!'}, + [l.KEYWORD] = { + ['*if'] = 1, ['*IF'] = 1, ['*do'] = 1, ['*DO'] = 1, ['*dowhile'] = 1, + ['*DOWHILE'] = 1, + ['*endif'] = -1, ['*ENDIF'] = -1, ['*enddo'] = -1, ['*ENDDO'] = -1 + }, + [l.COMMENT] = {['!'] = l.fold_line_comments('!')} +} + +return M diff --git a/lua/lexers/apl.lua b/lua/lexers/apl.lua @@ -0,0 +1,69 @@ +-- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE. +-- APL LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'apl'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, (P('⍝') + P('#')) * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", false, true) +local dq_str = l.delimited_range('"') + +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local dig = R('09') +local rad = P('.') +local exp = S('eE') +local img = S('jJ') +local sgn = P('¯')^-1 +local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) * + (exp * sgn *dig^1)^-1 +local number = token(l.NUMBER, float * img * float + float) + +-- Keywords. +local keyword = token(l.KEYWORD, P('⍞') + P('χ') + P('⍺') + P('⍶') + P('⍵') + + P('⍹') + P('⎕') * R('AZ', 'az')^0) + +-- Names. +local n1l = R('AZ', 'az') +local n1b = P('_') + P('∆') + P('⍙') +local n2l = n1l + R('09') +local n2b = n1b + P('¯') +local n1 = n1l + n1b +local n2 = n2l + n2b +local name = n1 * n2^0 + +-- Labels. +local label = token(l.LABEL, name * P(':')) + +-- Variables. +local variable = token(l.VARIABLE, name) + +-- Special. +local special = token(l.TYPE, S('{}[]();') + P('←') + P('→') + P('◊')) + +-- Nabla. +local nabla = token(l.PREPROCESSOR, P('∇') + P('⍫')) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'string', string}, + {'number', number}, + {'keyword', keyword}, + {'label', label}, + {'variable', variable}, + {'special', special}, + {'nabla', nabla}, +} + +return M diff --git a/lua/lexers/applescript.lua b/lua/lexers/applescript.lua @@ -0,0 +1,82 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Applescript LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'applescript'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '--' * l.nonnewline^0 +local block_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"', true)) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'script', 'property', 'prop', 'end', 'copy', 'to', 'set', 'global', 'local', + 'on', 'to', 'of', 'in', 'given', 'with', 'without', 'return', 'continue', + 'tell', 'if', 'then', 'else', 'repeat', 'times', 'while', 'until', 'from', + 'exit', 'try', 'error', 'considering', 'ignoring', 'timeout', 'transaction', + 'my', 'get', 'put', 'into', 'is', + -- References. + 'each', 'some', 'every', 'whose', 'where', 'id', 'index', 'first', 'second', + 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth', + 'last', 'front', 'back', 'st', 'nd', 'rd', 'th', 'middle', 'named', 'through', + 'thru', 'before', 'after', 'beginning', 'the', + -- Commands. + 'close', 'copy', 'count', 'delete', 'duplicate', 'exists', 'launch', 'make', + 'move', 'open', 'print', 'quit', 'reopen', 'run', 'save', 'saving', + -- Operators. + 'div', 'mod', 'and', 'not', 'or', 'as', 'contains', 'equal', 'equals', + 'isn\'t', +}, "'", true)) + +-- Constants. +local constant = token(l.CONSTANT, word_match({ + 'case', 'diacriticals', 'expansion', 'hyphens', 'punctuation', + -- Predefined variables. + 'it', 'me', 'version', 'pi', 'result', 'space', 'tab', 'anything', + -- Text styles. + 'bold', 'condensed', 'expanded', 'hidden', 'italic', 'outline', 'plain', + 'shadow', 'strikethrough', 'subscript', 'superscript', 'underline', + -- Save options. + 'ask', 'no', 'yes', + -- Booleans. + 'false', 'true', + -- Date and time. + 'weekday', 'monday', 'mon', 'tuesday', 'tue', 'wednesday', 'wed', 'thursday', + 'thu', 'friday', 'fri', 'saturday', 'sat', 'sunday', 'sun', 'month', + 'january', 'jan', 'february', 'feb', 'march', 'mar', 'april', 'apr', 'may', + 'june', 'jun', 'july', 'jul', 'august', 'aug', 'september', 'sep', 'october', + 'oct', 'november', 'nov', 'december', 'dec', 'minutes', 'hours', 'days', + 'weeks' +}, nil, true)) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, (l.alpha + '_') * l.alnum^0) + +-- Operators. +local operator = token(l.OPERATOR, S('+-^*/&<>=:,(){}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'constant', constant}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/asm.lua b/lua/lexers/asm.lua @@ -0,0 +1,486 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- NASM Assembly LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'asm'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, ';' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer * S('hqb')^-1) + +-- Preprocessor. +local preproc_word = word_match{ + 'arg', 'assign', 'clear', 'define', 'defstr', 'deftok', 'depend', 'elif', + 'elifctx', 'elifdef', 'elifempty', 'elifenv', 'elifid', 'elifidn', 'elifidni', + 'elifmacro', 'elifn', 'elifnctx', 'elifndef', 'elifnempty', 'elifnenv', + 'elifnid', 'elifnidn', 'elifnidni', 'elifnmacro', 'elifnnum', 'elifnstr', + 'elifntoken', 'elifnum', 'elifstr', 'eliftoken', 'else', 'endif', 'endmacro', + 'endrep', 'endwhile', 'error', 'exitmacro', 'exitrep', 'exitwhile', 'fatal', + 'final', 'idefine', 'idefstr', 'ideftok', 'if', 'ifctx', 'ifdef', 'ifempty', + 'ifenv', 'ifid', 'ifidn', 'ifidni', 'ifmacro', 'ifn', 'ifnctx', 'ifndef', + 'ifnempty', 'ifnenv', 'ifnid', 'ifnidn', 'ifnidni', 'ifnmacro', 'ifnnum', + 'ifnstr', 'ifntoken', 'ifnum', 'ifstr', 'iftoken', 'imacro', 'include', + 'ixdefine', 'line', 'local', 'macro', 'pathsearch', 'pop', 'push', 'rep', + 'repl', 'rmacro', 'rotate', 'stacksize', 'strcat', 'strlen', 'substr', + 'undef', 'unmacro', 'use', 'warning', 'while', 'xdefine', +} +local preproc_symbol = '??' + S('!$+?') + '%' * -l.space + R('09')^1 +local preproc = token(l.PREPROCESSOR, '%' * (preproc_word + preproc_symbol)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + -- Preprocessor macros. + 'struc', 'endstruc', 'istruc', 'at', 'iend', 'align', 'alignb', 'sectalign', + '.nolist', + -- Preprocessor Packages. + --'altreg', 'smartalign', 'fp', 'ifunc' + -- Directives. + 'absolute', 'bits', 'class', 'common', 'common', 'cpu', 'default', 'export', + 'extern', 'float', 'global', 'group', 'import', 'osabi', 'overlay', 'private', + 'public', '__SECT__', 'section', 'segment', 'stack', 'use16', 'use32', + 'use64', + -- Section Names. + '.bss', '.comment', '.data', '.lbss', '.ldata', '.lrodata', '.rdata', + '.rodata', '.tbss', '.tdata', '.text', + -- Section Qualifiers. + 'alloc', 'bss', 'code', 'exec', 'data', 'noalloc', 'nobits', 'noexec', + 'nowrite', 'progbits', 'rdata', 'tls', 'write', + -- Operators. + 'abs', 'rel', 'seg', 'wrt', 'strict', + '__utf16__', '__utf16be__', '__utf16le__', '__utf32__', '__utf32be__', + '__utf32le__', +}, '.')) + +-- Instructions. +-- awk '{print $1}'|uniq|tr '[:upper:]' '[:lower:]'| +-- lua -e "for l in io.lines() do print(\"'\"..l..\"',\") end"|fmt -w 78 +local instruction = token('instruction', word_match{ + -- Special Instructions. + 'db', 'dd', 'do', 'dq', 'dt', 'dw', 'dy', 'resb', 'resd', 'reso', 'resq', + 'rest', 'resw', 'resy', + -- Conventional Instructions. + 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bb0_reset', + 'bb1_reset', 'bound', 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', + 'call', 'cbw', 'cdq', 'cdqe', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmp', + 'cmpsb', 'cmpsd', 'cmpsq', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', + 'cmpxchg16b', 'cpuid', 'cpu_read', 'cpu_write', 'cqo', 'cwd', 'cwde', 'daa', + 'das', 'dec', 'div', 'dmint', 'emms', 'enter', 'equ', 'f2xm1', 'fabs', + 'fadd', 'faddp', 'fbld', 'fbstp', 'fchs', 'fclex', 'fcmovb', 'fcmovbe', + 'fcmove', 'fcmovnb', 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu', 'fcom', + 'fcomi', 'fcomip', 'fcomp', 'fcompp', 'fcos', 'fdecstp', 'fdisi', 'fdiv', + 'fdivp', 'fdivr', 'fdivrp', 'femms', 'feni', 'ffree', 'ffreep', 'fiadd', + 'ficom', 'ficomp', 'fidiv', 'fidivr', 'fild', 'fimul', 'fincstp', 'finit', + 'fist', 'fistp', 'fisttp', 'fisub', 'fisubr', 'fld', 'fld1', 'fldcw', + 'fldenv', 'fldl2e', 'fldl2t', 'fldlg2', 'fldln2', 'fldpi', 'fldz', 'fmul', + 'fmulp', 'fnclex', 'fndisi', 'fneni', 'fninit', 'fnop', 'fnsave', 'fnstcw', + 'fnstenv', 'fnstsw', 'fpatan', 'fprem', 'fprem1', 'fptan', 'frndint', + 'frstor', 'fsave', 'fscale', 'fsetpm', 'fsin', 'fsincos', 'fsqrt', + 'fst', 'fstcw', 'fstenv', 'fstp', 'fstsw', 'fsub', 'fsubp', 'fsubr', + 'fsubrp', 'ftst', 'fucom', 'fucomi', 'fucomip', 'fucomp', 'fucompp', + 'fxam', 'fxch', 'fxtract', 'fyl2x', 'fyl2xp1', 'hlt', 'ibts', 'icebp', + 'idiv', 'imul', 'in', 'inc', 'incbin', 'insb', 'insd', 'insw', 'int', + 'int01', 'int1', 'int03', 'int3', 'into', 'invd', 'invpcid', 'invlpg', + 'invlpga', 'iret', 'iretd', 'iretq', 'iretw', 'jcxz', 'jecxz', 'jrcxz', + 'jmp', 'jmpe', 'lahf', 'lar', 'lds', 'lea', 'leave', 'les', 'lfence', + 'lfs', 'lgdt', 'lgs', 'lidt', 'lldt', 'lmsw', 'loadall', 'loadall286', + 'lodsb', 'lodsd', 'lodsq', 'lodsw', 'loop', 'loope', 'loopne', 'loopnz', + 'loopz', 'lsl', 'lss', 'ltr', 'mfence', 'monitor', 'mov', 'movd', 'movq', + 'movsb', 'movsd', 'movsq', 'movsw', 'movsx', 'movsxd', 'movsx', 'movzx', + 'mul', 'mwait', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', 'outsw', + 'packssdw', 'packsswb', 'packuswb', 'paddb', 'paddd', 'paddsb', 'paddsiw', + 'paddsw', 'paddusb', 'paddusw', 'paddw', 'pand', 'pandn', 'pause', 'paveb', + 'pavgusb', 'pcmpeqb', 'pcmpeqd', 'pcmpeqw', 'pcmpgtb', 'pcmpgtd', 'pcmpgtw', + 'pdistib', 'pf2id', 'pfacc', 'pfadd', 'pfcmpeq', 'pfcmpge', 'pfcmpgt', + 'pfmax', 'pfmin', 'pfmul', 'pfrcp', 'pfrcpit1', 'pfrcpit2', 'pfrsqit1', + 'pfrsqrt', 'pfsub', 'pfsubr', 'pi2fd', 'pmachriw', 'pmaddwd', 'pmagw', + 'pmulhriw', 'pmulhrwa', 'pmulhrwc', 'pmulhw', 'pmullw', 'pmvgezb', 'pmvlzb', + 'pmvnzb', 'pmvzb', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', + 'popfq', 'popfw', 'por', 'prefetch', 'prefetchw', 'pslld', 'psllq', + 'psllw', 'psrad', 'psraw', 'psrld', 'psrlq', 'psrlw', 'psubb', 'psubd', + 'psubsb', 'psubsiw', 'psubsw', 'psubusb', 'psubusw', 'psubw', 'punpckhbw', + 'punpckhdq', 'punpckhwd', 'punpcklbw', 'punpckldq', 'punpcklwd', 'push', + 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfq', 'pushfw', 'pxor', + 'rcl', 'rcr', 'rdshr', 'rdmsr', 'rdpmc', 'rdtsc', 'rdtscp', 'ret', 'retf', + 'retn', 'rol', 'ror', 'rdm', 'rsdc', 'rsldt', 'rsm', 'rsts', 'sahf', 'sal', + 'salc', 'sar', 'sbb', 'scasb', 'scasd', 'scasq', 'scasw', 'sfence', 'sgdt', + 'shl', 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'skinit', 'smi', 'smint', + 'smintold', 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosq', 'stosw', + 'str', 'sub', 'svdc', 'svldt', 'svts', 'swapgs', 'syscall', 'sysenter', + 'sysexit', 'sysret', 'test', 'ud0', 'ud1', 'ud2b', 'ud2', 'ud2a', 'umov', + 'verr', 'verw', 'fwait', 'wbinvd', 'wrshr', 'wrmsr', 'xadd', 'xbts', + 'xchg', 'xlatb', 'xlat', 'xor', 'cmova', 'cmovae', 'cmovb', 'cmovbe', + 'cmovc', 'cmove', 'cmovg', 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', + 'cmovnb', 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', + 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 'cmovp', + 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmovcc', 'ja', 'jae', 'jb', 'jbe', + 'jc', 'je', 'jg', 'jge', 'jl', 'jle', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', + 'jne', 'jng', 'jnge', 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', + 'jpe', 'jpo', 'js', 'jz', 'seta', 'setae', 'setb', 'setbe', 'setc', 'sete', + 'setg', 'setge', 'setl', 'setle', 'setna', 'setnae', 'setnb', 'setnbe', + 'setnc', 'setne', 'setng', 'setnge', 'setnl', 'setnle', 'setno', 'setnp', + 'setns', 'setnz', 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', + --" Katmai Streaming SIMD instructions (SSE -- a.k.a. KNI, XMM, MMX2). + 'addps', 'addss', 'andnps', 'andps', 'cmpeqps', 'cmpeqss', 'cmpleps', + 'cmpless', 'cmpltps', 'cmpltss', 'cmpneqps', 'cmpneqss', 'cmpnleps', + 'cmpnless', 'cmpnltps', 'cmpnltss', 'cmpordps', 'cmpordss', 'cmpunordps', + 'cmpunordss', 'cmpps', 'cmpss', 'comiss', 'cvtpi2ps', 'cvtps2pi', 'cvtsi2ss', + 'cvtss2si', 'cvttps2pi', 'cvttss2si', 'divps', 'divss', 'ldmxcsr', 'maxps', + 'maxss', 'minps', 'minss', 'movaps', 'movhps', 'movlhps', 'movlps', + 'movhlps', 'movmskps', 'movntps', 'movss', 'movups', 'mulps', 'mulss', + 'orps', 'rcpps', 'rcpss', 'rsqrtps', 'rsqrtss', 'shufps', 'sqrtps', 'sqrtss', + 'stmxcsr', 'subps', 'subss', 'ucomiss', 'unpckhps', 'unpcklps', 'xorps', + -- Introduced in Deschutes but necessary for SSE support. + 'fxrstor', 'fxrstor64', 'fxsave', 'fxsave64', + -- XSAVE group (AVX and extended state). + 'xgetbv', 'xsetbv', 'xsave', 'xsave64', 'xsaveopt', 'xsaveopt64', 'xrstor', + 'xrstor64', + -- Generic memory operations. + 'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2', 'sfence', + -- New MMX instructions introduced in Katmai. + 'maskmovq', 'movntq', 'pavgb', 'pavgw', 'pextrw', 'pinsrw', 'pmaxsw', + 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', 'psadbw', 'pshufw', + -- AMD Enhanced 3DNow! (Athlon) instructions. + 'pf2iw', 'pfnacc', 'pfpnacc', 'pi2fw', 'pswapd', + -- Willamette SSE2 Cacheability Instructions. + 'maskmovdqu', 'clflush', 'movntdq', 'movnti', 'movntpd', 'lfence', 'mfence', + -- Willamette MMX instructions (SSE2 SIMD Integer Instructions). + 'movd', 'movdqa', 'movdqu', 'movdq2q', 'movq', 'movq2dq', 'packsswb', + 'packssdw', 'packuswb', 'paddb', 'paddw', 'paddd', 'paddq', 'paddsb', + 'paddsw', 'paddusb', 'paddusw', 'pand', 'pandn', 'pavgb', 'pavgw', 'pcmpeqb', + 'pcmpeqw', 'pcmpeqd', 'pcmpgtb', 'pcmpgtw', 'pcmpgtd', 'pextrw', 'pinsrw', + 'pmaddwd', 'pmaxsw', 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', + 'pmulhw', 'pmullw', 'pmuludq', 'por', 'psadbw', 'pshufd', 'pshufhw', + 'pshuflw', 'pslldq', 'psllw', 'pslld', 'psllq', 'psraw', 'psrad', 'psrldq', + 'psrlw', 'psrld', 'psrlq', 'psubb', 'psubw', 'psubd', 'psubq', 'psubsb', + 'psubsw', 'psubusb', 'psubusw', 'punpckhbw', 'punpckhwd', 'punpckhdq', + 'punpckhqdq', 'punpcklbw', 'punpcklwd', 'punpckldq', 'punpcklqdq', 'pxor', + -- Willamette Streaming SIMD instructions (SSE2). + 'addpd', 'addsd', 'andnpd', 'andpd', 'cmpeqpd', 'cmpeqsd', 'cmplepd', + 'cmplesd', 'cmpltpd', 'cmpltsd', 'cmpneqpd', 'cmpneqsd', 'cmpnlepd', + 'cmpnlesd', 'cmpnltpd', 'cmpnltsd', 'cmpordpd', 'cmpordsd', 'cmpunordpd', + 'cmpunordsd', 'cmppd', 'cmpsd', 'comisd', 'cvtdq2pd', 'cvtdq2ps', + 'cvtpd2dq', 'cvtpd2pi', 'cvtpd2ps', 'cvtpi2pd', 'cvtps2dq', 'cvtps2pd', + 'cvtsd2si', 'cvtsd2ss', 'cvtsi2sd', 'cvtss2sd', 'cvttpd2pi', 'cvttpd2dq', + 'cvttps2dq', 'cvttsd2si', 'divpd', 'divsd', 'maxpd', 'maxsd', 'minpd', + 'minsd', 'movapd', 'movhpd', 'movlpd', 'movmskpd', 'movsd', 'movupd', + 'mulpd', 'mulsd', 'orpd', 'shufpd', 'sqrtpd', 'sqrtsd', 'subpd', 'subsd', + 'ucomisd', 'unpckhpd', 'unpcklpd', 'xorpd', + -- Prescott New Instructions (SSE3). + 'addsubpd', 'addsubps', 'haddpd', 'haddps', 'hsubpd', 'hsubps', 'lddqu', + 'movddup', 'movshdup', 'movsldup', + -- VMX/SVM Instructions. + 'clgi', 'stgi', 'vmcall', 'vmclear', 'vmfunc', 'vmlaunch', 'vmload', + 'vmmcall', 'vmptrld', 'vmptrst', 'vmread', 'vmresume', 'vmrun', 'vmsave', + 'vmwrite', 'vmxoff', 'vmxon', + -- Extended Page Tables VMX instructions. + 'invept', 'invvpid', + -- Tejas New Instructions (SSSE3). + 'pabsb', 'pabsw', 'pabsd', 'palignr', 'phaddw', 'phaddd', 'phaddsw', + 'phsubw', 'phsubd', 'phsubsw', 'pmaddubsw', 'pmulhrsw', 'pshufb', 'psignb', + 'psignw', 'psignd', + -- AMD SSE4A. + 'extrq', 'insertq', 'movntsd', 'movntss', + -- New instructions in Barcelona. + 'lzcnt', + -- Penryn New Instructions (SSE4.1). + 'blendpd', 'blendps', 'blendvpd', 'blendvps', 'dppd', 'dpps', 'extractps', + 'insertps', 'movntdqa', 'mpsadbw', 'packusdw', 'pblendvb', 'pblendw', + 'pcmpeqq', 'pextrb', 'pextrd', 'pextrq', 'pextrw', 'phminposuw', 'pinsrb', + 'pinsrd', 'pinsrq', 'pmaxsb', 'pmaxsd', 'pmaxud', 'pmaxuw', 'pminsb', + 'pminsd', 'pminud', 'pminuw', 'pmovsxbw', 'pmovsxbd', 'pmovsxbq', 'pmovsxwd', + 'pmovsxwq', 'pmovsxdq', 'pmovzxbw', 'pmovzxbd', 'pmovzxbq', 'pmovzxwd', + 'pmovzxwq', 'pmovzxdq', 'pmuldq', 'pmulld', 'ptest', 'roundpd', 'roundps', + 'roundsd', 'roundss', + -- Nehalem New Instructions (SSE4.2). + 'crc32', 'pcmpestri', 'pcmpestrm', 'pcmpistri', 'pcmpistrm', 'pcmpgtq', + 'popcnt', + -- Intel SMX. + 'getsec', + -- Geode (Cyrix) 3DNow! additions. + 'pfrcpv', 'pfrsqrtv', + -- Intel new instructions in ???. + 'movbe', + -- Intel AES instructions. + 'aesenc', 'aesenclast', 'aesdec', 'aesdeclast', 'aesimc', 'aeskeygenassist', + -- Intel AVX AES instructions. + 'vaesenc', 'vaesenclast', 'vaesdec', 'vaesdeclast', 'vaesimc', + 'vaeskeygenassist', + -- Intel AVX instructions. + 'vaddpd', 'vaddps', 'vaddsd', 'vaddss', 'vaddsubpd', 'vaddsubps', + 'vandpd', 'vandps', 'vandnpd', 'vandnps', 'vblendpd', 'vblendps', + 'vblendvpd', 'vblendvps', 'vbroadcastss', 'vbroadcastsd', 'vbroadcastf128', + 'vcmpeq_ospd', 'vcmpeqpd', 'vcmplt_ospd', 'vcmpltpd', 'vcmple_ospd', + 'vcmplepd', 'vcmpunord_qpd', 'vcmpunordpd', 'vcmpneq_uqpd', 'vcmpneqpd', + 'vcmpnlt_uspd', 'vcmpnltpd', 'vcmpnle_uspd', 'vcmpnlepd', 'vcmpord_qpd', + 'vcmpordpd', 'vcmpeq_uqpd', 'vcmpnge_uspd', 'vcmpngepd', 'vcmpngt_uspd', + 'vcmpngtpd', 'vcmpfalse_oqpd', 'vcmpfalsepd', 'vcmpneq_oqpd', 'vcmpge_ospd', + 'vcmpgepd', 'vcmpgt_ospd', 'vcmpgtpd', 'vcmptrue_uqpd', 'vcmptruepd', + 'vcmpeq_ospd', 'vcmplt_oqpd', 'vcmple_oqpd', 'vcmpunord_spd', 'vcmpneq_uspd', + 'vcmpnlt_uqpd', 'vcmpnle_uqpd', 'vcmpord_spd', 'vcmpeq_uspd', 'vcmpnge_uqpd', + 'vcmpngt_uqpd', 'vcmpfalse_ospd', 'vcmpneq_ospd', 'vcmpge_oqpd', + 'vcmpgt_oqpd', 'vcmptrue_uspd', 'vcmppd', 'vcmpeq_osps', 'vcmpeqps', + 'vcmplt_osps', 'vcmpltps', 'vcmple_osps', 'vcmpleps', 'vcmpunord_qps', + 'vcmpunordps', 'vcmpneq_uqps', 'vcmpneqps', 'vcmpnlt_usps', 'vcmpnltps', + 'vcmpnle_usps', 'vcmpnleps', 'vcmpord_qps', 'vcmpordps', 'vcmpeq_uqps', + 'vcmpnge_usps', 'vcmpngeps', 'vcmpngt_usps', 'vcmpngtps', 'vcmpfalse_oqps', + 'vcmpfalseps', 'vcmpneq_oqps', 'vcmpge_osps', 'vcmpgeps', 'vcmpgt_osps', + 'vcmpgtps', 'vcmptrue_uqps', 'vcmptrueps', 'vcmpeq_osps', 'vcmplt_oqps', + 'vcmple_oqps', 'vcmpunord_sps', 'vcmpneq_usps', 'vcmpnlt_uqps', + 'vcmpnle_uqps', 'vcmpord_sps', 'vcmpeq_usps', 'vcmpnge_uqps', + 'vcmpngt_uqps', 'vcmpfalse_osps', 'vcmpneq_osps', 'vcmpge_oqps', + 'vcmpgt_oqps', 'vcmptrue_usps', 'vcmpps', 'vcmpeq_ossd', 'vcmpeqsd', + 'vcmplt_ossd', 'vcmpltsd', 'vcmple_ossd', 'vcmplesd', 'vcmpunord_qsd', + 'vcmpunordsd', 'vcmpneq_uqsd', 'vcmpneqsd', 'vcmpnlt_ussd', 'vcmpnltsd', + 'vcmpnle_ussd', 'vcmpnlesd', 'vcmpord_qsd', 'vcmpordsd', 'vcmpeq_uqsd', + 'vcmpnge_ussd', 'vcmpngesd', 'vcmpngt_ussd', 'vcmpngtsd', 'vcmpfalse_oqsd', + 'vcmpfalsesd', 'vcmpneq_oqsd', 'vcmpge_ossd', 'vcmpgesd', 'vcmpgt_ossd', + 'vcmpgtsd', 'vcmptrue_uqsd', 'vcmptruesd', 'vcmpeq_ossd', 'vcmplt_oqsd', + 'vcmple_oqsd', 'vcmpunord_ssd', 'vcmpneq_ussd', 'vcmpnlt_uqsd', + 'vcmpnle_uqsd', 'vcmpord_ssd', 'vcmpeq_ussd', 'vcmpnge_uqsd', + 'vcmpngt_uqsd', 'vcmpfalse_ossd', 'vcmpneq_ossd', 'vcmpge_oqsd', + 'vcmpgt_oqsd', 'vcmptrue_ussd', 'vcmpsd', 'vcmpeq_osss', 'vcmpeqss', + 'vcmplt_osss', 'vcmpltss', 'vcmple_osss', 'vcmpless', 'vcmpunord_qss', + 'vcmpunordss', 'vcmpneq_uqss', 'vcmpneqss', 'vcmpnlt_usss', 'vcmpnltss', + 'vcmpnle_usss', 'vcmpnless', 'vcmpord_qss', 'vcmpordss', 'vcmpeq_uqss', + 'vcmpnge_usss', 'vcmpngess', 'vcmpngt_usss', 'vcmpngtss', 'vcmpfalse_oqss', + 'vcmpfalsess', 'vcmpneq_oqss', 'vcmpge_osss', 'vcmpgess', 'vcmpgt_osss', + 'vcmpgtss', 'vcmptrue_uqss', 'vcmptruess', 'vcmpeq_osss', 'vcmplt_oqss', + 'vcmple_oqss', 'vcmpunord_sss', 'vcmpneq_usss', 'vcmpnlt_uqss', + 'vcmpnle_uqss', 'vcmpord_sss', 'vcmpeq_usss', 'vcmpnge_uqss', + 'vcmpngt_uqss', 'vcmpfalse_osss', 'vcmpneq_osss', 'vcmpge_oqss', + 'vcmpgt_oqss', 'vcmptrue_usss', 'vcmpss', 'vcomisd', 'vcomiss', + 'vcvtdq2pd', 'vcvtdq2ps', 'vcvtpd2dq', 'vcvtpd2ps', 'vcvtps2dq', + 'vcvtps2pd', 'vcvtsd2si', 'vcvtsd2ss', 'vcvtsi2sd', 'vcvtsi2ss', + 'vcvtss2sd', 'vcvtss2si', 'vcvttpd2dq', 'vcvttps2dq', 'vcvttsd2si', + 'vcvttss2si', 'vdivpd', 'vdivps', 'vdivsd', 'vdivss', 'vdppd', 'vdpps', + 'vextractf128', 'vextractps', 'vhaddpd', 'vhaddps', 'vhsubpd', 'vhsubps', + 'vinsertf128', 'vinsertps', 'vlddqu', 'vldqqu', 'vlddqu', 'vldmxcsr', + 'vmaskmovdqu', 'vmaskmovps', 'vmaskmovpd', 'vmaxpd', 'vmaxps', 'vmaxsd', + 'vmaxss', 'vminpd', 'vminps', 'vminsd', 'vminss', 'vmovapd', 'vmovaps', + 'vmovd', 'vmovq', 'vmovddup', 'vmovdqa', 'vmovqqa', 'vmovdqa', 'vmovdqu', + 'vmovqqu', 'vmovdqu', 'vmovhlps', 'vmovhpd', 'vmovhps', 'vmovlhps', + 'vmovlpd', 'vmovlps', 'vmovmskpd', 'vmovmskps', 'vmovntdq', 'vmovntqq', + 'vmovntdq', 'vmovntdqa', 'vmovntpd', 'vmovntps', 'vmovsd', 'vmovshdup', + 'vmovsldup', 'vmovss', 'vmovupd', 'vmovups', 'vmpsadbw', 'vmulpd', + 'vmulps', 'vmulsd', 'vmulss', 'vorpd', 'vorps', 'vpabsb', 'vpabsw', + 'vpabsd', 'vpacksswb', 'vpackssdw', 'vpackuswb', 'vpackusdw', 'vpaddb', + 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', + 'vpalignr', 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', + 'vpcmpestri', 'vpcmpestrm', 'vpcmpistri', 'vpcmpistrm', 'vpcmpeqb', + 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', + 'vpcmpgtq', 'vpermilpd', 'vpermilps', 'vperm2f128', 'vpextrb', 'vpextrw', + 'vpextrd', 'vpextrq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphminposuw', + 'vphsubw', 'vphsubd', 'vphsubsw', 'vpinsrb', 'vpinsrw', 'vpinsrd', + 'vpinsrq', 'vpmaddwd', 'vpmaddubsw', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd', + 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', + 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq', + 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq', + 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmulhuw', 'vpmulhrsw', 'vpmulhw', + 'vpmullw', 'vpmulld', 'vpmuludq', 'vpmuldq', 'vpor', 'vpsadbw', 'vpshufb', + 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd', + 'vpslldq', 'vpsrldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', + 'vpsrlw', 'vpsrld', 'vpsrlq', 'vptest', 'vpsubb', 'vpsubw', 'vpsubd', + 'vpsubq', 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', + 'vpunpckhwd', 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', + 'vpunpckldq', 'vpunpcklqdq', 'vpxor', 'vrcpps', 'vrcpss', 'vrsqrtps', + 'vrsqrtss', 'vroundpd', 'vroundps', 'vroundsd', 'vroundss', 'vshufpd', + 'vshufps', 'vsqrtpd', 'vsqrtps', 'vsqrtsd', 'vsqrtss', 'vstmxcsr', 'vsubpd', + 'vsubps', 'vsubsd', 'vsubss', 'vtestps', 'vtestpd', 'vucomisd', 'vucomiss', + 'vunpckhpd', 'vunpckhps', 'vunpcklpd', 'vunpcklps', 'vxorpd', 'vxorps', + 'vzeroall', 'vzeroupper', + -- Intel Carry-Less Multiplication instructions (CLMUL). + 'pclmullqlqdq', 'pclmulhqlqdq', 'pclmullqhqdq', 'pclmulhqhqdq', 'pclmulqdq', + -- Intel AVX Carry-Less Multiplication instructions (CLMUL). + 'vpclmullqlqdq', 'vpclmulhqlqdq', 'vpclmullqhqdq', 'vpclmulhqhqdq', + 'vpclmulqdq', + -- Intel Fused Multiply-Add instructions (FMA). + 'vfmadd132ps', 'vfmadd132pd', 'vfmadd312ps', 'vfmadd312pd', 'vfmadd213ps', + 'vfmadd213pd', 'vfmadd123ps', 'vfmadd123pd', 'vfmadd231ps', 'vfmadd231pd', + 'vfmadd321ps', 'vfmadd321pd', 'vfmaddsub132ps', 'vfmaddsub132pd', + 'vfmaddsub312ps', 'vfmaddsub312pd', 'vfmaddsub213ps', 'vfmaddsub213pd', + 'vfmaddsub123ps', 'vfmaddsub123pd', 'vfmaddsub231ps', 'vfmaddsub231pd', + 'vfmaddsub321ps', 'vfmaddsub321pd', 'vfmsub132ps', 'vfmsub132pd', + 'vfmsub312ps', 'vfmsub312pd', 'vfmsub213ps', 'vfmsub213pd', 'vfmsub123ps', + 'vfmsub123pd', 'vfmsub231ps', 'vfmsub231pd', 'vfmsub321ps', 'vfmsub321pd', + 'vfmsubadd132ps', 'vfmsubadd132pd', 'vfmsubadd312ps', 'vfmsubadd312pd', + 'vfmsubadd213ps', 'vfmsubadd213pd', 'vfmsubadd123ps', 'vfmsubadd123pd', + 'vfmsubadd231ps', 'vfmsubadd231pd', 'vfmsubadd321ps', 'vfmsubadd321pd', + 'vfnmadd132ps', 'vfnmadd132pd', 'vfnmadd312ps', 'vfnmadd312pd', + 'vfnmadd213ps', 'vfnmadd213pd', 'vfnmadd123ps', 'vfnmadd123pd', + 'vfnmadd231ps', 'vfnmadd231pd', 'vfnmadd321ps', 'vfnmadd321pd', + 'vfnmsub132ps', 'vfnmsub132pd', 'vfnmsub312ps', 'vfnmsub312pd', + 'vfnmsub213ps', 'vfnmsub213pd', 'vfnmsub123ps', 'vfnmsub123pd', + 'vfnmsub231ps', 'vfnmsub231pd', 'vfnmsub321ps', 'vfnmsub321pd', + 'vfmadd132ss', 'vfmadd132sd', 'vfmadd312ss', 'vfmadd312sd', 'vfmadd213ss', + 'vfmadd213sd', 'vfmadd123ss', 'vfmadd123sd', 'vfmadd231ss', 'vfmadd231sd', + 'vfmadd321ss', 'vfmadd321sd', 'vfmsub132ss', 'vfmsub132sd', 'vfmsub312ss', + 'vfmsub312sd', 'vfmsub213ss', 'vfmsub213sd', 'vfmsub123ss', 'vfmsub123sd', + 'vfmsub231ss', 'vfmsub231sd', 'vfmsub321ss', 'vfmsub321sd', 'vfnmadd132ss', + 'vfnmadd132sd', 'vfnmadd312ss', 'vfnmadd312sd', 'vfnmadd213ss', + 'vfnmadd213sd', 'vfnmadd123ss', 'vfnmadd123sd', 'vfnmadd231ss', + 'vfnmadd231sd', 'vfnmadd321ss', 'vfnmadd321sd', 'vfnmsub132ss', + 'vfnmsub132sd', 'vfnmsub312ss', 'vfnmsub312sd', 'vfnmsub213ss', + 'vfnmsub213sd', 'vfnmsub123ss', 'vfnmsub123sd', 'vfnmsub231ss', + 'vfnmsub231sd', 'vfnmsub321ss', 'vfnmsub321sd', + -- Intel post-32 nm processor instructions. + 'rdfsbase', 'rdgsbase', 'rdrand', 'wrfsbase', 'wrgsbase', 'vcvtph2ps', + 'vcvtps2ph', 'adcx', 'adox', 'rdseed', 'clac', 'stac', + -- VIA (Centaur) security instructions. + 'xstore', 'xcryptecb', 'xcryptcbc', 'xcryptctr', 'xcryptcfb', 'xcryptofb', + 'montmul', 'xsha1', 'xsha256', + -- AMD Lightweight Profiling (LWP) instructions. + 'llwpcb', 'slwpcb', 'lwpval', 'lwpins', + -- AMD XOP and FMA4 instructions (SSE5). + 'vfmaddpd', 'vfmaddps', 'vfmaddsd', 'vfmaddss', 'vfmaddsubpd', + 'vfmaddsubps', 'vfmsubaddpd', 'vfmsubaddps', 'vfmsubpd', 'vfmsubps', + 'vfmsubsd', 'vfmsubss', 'vfnmaddpd', 'vfnmaddps', 'vfnmaddsd', 'vfnmaddss', + 'vfnmsubpd', 'vfnmsubps', 'vfnmsubsd', 'vfnmsubss', 'vfrczpd', 'vfrczps', + 'vfrczsd', 'vfrczss', 'vpcmov', 'vpcomb', 'vpcomd', 'vpcomq', 'vpcomub', + 'vpcomud', 'vpcomuq', 'vpcomuw', 'vpcomw', 'vphaddbd', 'vphaddbq', + 'vphaddbw', 'vphadddq', 'vphaddubd', 'vphaddubq', 'vphaddubw', 'vphaddudq', + 'vphadduwd', 'vphadduwq', 'vphaddwd', 'vphaddwq', 'vphsubbw', 'vphsubdq', + 'vphsubwd', 'vpmacsdd', 'vpmacsdqh', 'vpmacsdql', 'vpmacssdd', 'vpmacssdqh', + 'vpmacssdql', 'vpmacsswd', 'vpmacssww', 'vpmacswd', 'vpmacsww', 'vpmadcsswd', + 'vpmadcswd', 'vpperm', 'vprotb', 'vprotd', 'vprotq', 'vprotw', 'vpshab', + 'vpshad', 'vpshaq', 'vpshaw', 'vpshlb', 'vpshld', 'vpshlq', 'vpshlw', + -- Intel AVX2 instructions. + 'vmpsadbw', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw', + 'vpackusdw', 'vpackuswb', 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq', + 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', 'vpalignr', 'vpand', + 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', 'vpcmpeqb', + 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', + 'vpcmpgtq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphsubw', 'vphsubd', + 'vphsubsw', 'vpmaddubsw', 'vpmaddwd', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd', + 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', + 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq', + 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq', + 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmuldq', 'vpmulhrsw', 'vpmulhuw', + 'vpmulhw', 'vpmullw', 'vpmulld', 'vpmuludq', 'vpor', 'vpsadbw', 'vpshufb', + 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd', + 'vpslldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', 'vpsrldq', + 'vpsrlw', 'vpsrld', 'vpsrlq', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq', + 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', 'vpunpckhwd', + 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq', + 'vpunpcklqdq', 'vpxor', 'vmovntdqa', 'vbroadcastss', 'vbroadcastsd', + 'vbroadcasti128', 'vpblendd', 'vpbroadcastb', 'vpbroadcastw', 'vpbroadcastd', + 'vpbroadcastq', 'vpermd', 'vpermpd', 'vpermps', 'vpermq', 'vperm2i128', + 'vextracti128', 'vinserti128', 'vpmaskmovd', 'vpmaskmovq', 'vpmaskmovd', + 'vpmaskmovq', 'vpsllvd', 'vpsllvq', 'vpsllvd', 'vpsllvq', 'vpsravd', + 'vpsrlvd', 'vpsrlvq', 'vpsrlvd', 'vpsrlvq', 'vgatherdpd', 'vgatherqpd', + 'vgatherdpd', 'vgatherqpd', 'vgatherdps', 'vgatherqps', 'vgatherdps', + 'vgatherqps', 'vpgatherdd', 'vpgatherqd', 'vpgatherdd', 'vpgatherqd', + 'vpgatherdq', 'vpgatherqq', 'vpgatherdq', 'vpgatherqq', + -- Transactional Synchronization Extensions (TSX). + 'xabort', 'xbegin', 'xend', 'xtest', + -- Intel BMI1 and BMI2 instructions, AMD TBM instructions. + 'andn', 'bextr', 'blci', 'blcic', 'blsi', 'blsic', 'blcfill', 'blsfill', + 'blcmsk', 'blsmsk', 'blsr', 'blcs', 'bzhi', 'mulx', 'pdep', 'pext', 'rorx', + 'sarx', 'shlx', 'shrx', 'tzcnt', 'tzmsk', 't1mskc', + -- Systematic names for the hinting nop instructions. + 'hint_nop0', 'hint_nop1', 'hint_nop2', 'hint_nop3', 'hint_nop4', + 'hint_nop5', 'hint_nop6', 'hint_nop7', 'hint_nop8', 'hint_nop9', + 'hint_nop10', 'hint_nop11', 'hint_nop12', 'hint_nop13', 'hint_nop14', + 'hint_nop15', 'hint_nop16', 'hint_nop17', 'hint_nop18', 'hint_nop19', + 'hint_nop20', 'hint_nop21', 'hint_nop22', 'hint_nop23', 'hint_nop24', + 'hint_nop25', 'hint_nop26', 'hint_nop27', 'hint_nop28', 'hint_nop29', + 'hint_nop30', 'hint_nop31', 'hint_nop32', 'hint_nop33', 'hint_nop34', + 'hint_nop35', 'hint_nop36', 'hint_nop37', 'hint_nop38', 'hint_nop39', + 'hint_nop40', 'hint_nop41', 'hint_nop42', 'hint_nop43', 'hint_nop44', + 'hint_nop45', 'hint_nop46', 'hint_nop47', 'hint_nop48', 'hint_nop49', + 'hint_nop50', 'hint_nop51', 'hint_nop52', 'hint_nop53', 'hint_nop54', + 'hint_nop55', 'hint_nop56', 'hint_nop57', 'hint_nop58', 'hint_nop59', + 'hint_nop60', 'hint_nop61', 'hint_nop62', 'hint_nop63', +}) + +-- Types. +local sizes = word_match{ + 'byte', 'word', 'dword', 'qword', 'tword', 'oword', 'yword', + 'a16', 'a32', 'a64', 'o16', 'o32', 'o64' -- instructions +} +local wrt_types = '..' * word_match{ + 'start', 'gotpc', 'gotoff', 'gottpoff', 'got', 'plt', 'sym', 'tlsie' +} +local type = token(l.TYPE, sizes + wrt_types) + +-- Registers. +local register = token('register', word_match{ + -- 32-bit registers. + 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cx', 'dh', 'di', 'dl', + 'dx', 'eax', 'ebx', 'ebx', 'ecx', 'edi', 'edx', 'esi', 'esp', 'fs', 'mm0', + 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'si', 'st0', 'st1', 'st2', + 'st3', 'st4', 'st5', 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', + 'xmm5', 'xmm6', 'xmm7', 'ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', + 'ymm6', 'ymm7', + -- 64-bit registers. + 'bpl', 'dil', 'gs', 'r8', 'r8b', 'r8w', 'r9', 'r9b', 'r9w', 'r10', 'r10b', + 'r10w', 'r11', 'r11b', 'r11w', 'r12', 'r12b', 'r12w', 'r13', 'r13b', 'r13w', + 'r14', 'r14b', 'r14w', 'r15', 'r15b', 'r15w', 'rax', 'rbp', 'rbx', 'rcx', + 'rdi', 'rdx', 'rsi', 'rsp', 'sil', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', + 'xmm13', 'xmm14', 'xmm15', 'ymm8', 'ymm9', 'ymm10', 'ymm11', 'ymm12', 'ymm13', + 'ymm14', 'ymm15' +}) + +local word = (l.alpha + S('$._?')) * (l.alnum + S('$._?#@~'))^0 + +-- Labels. +local label = token(l.LABEL, word * ':') + +-- Identifiers. +local identifier = token(l.IDENTIFIER, word) + +-- Constants. +local constants = word_match{ + '__float8__', '__float16__', '__float32__', '__float64__', '__float80m__', + '__float80e__', '__float128l__', '__float128h__', '__Infinity__', '__QNaN__', + '__NaN__', '__SNaN__' +} +local constant = token(l.CONSTANT, constants + '$' * P('$')^-1 * -identifier) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%<>!=^&|~:,()[]')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'instruction', instruction}, + {'register', register}, + {'type', type}, + {'constant', constant}, + {'label', label}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'preproc', preproc}, + {'operator', operator}, +} + +M._tokenstyles = { + instruction = l.STYLE_FUNCTION, + register = l.STYLE_CONSTANT, +} + +M._foldsymbols = { + _patterns = {'%l+', '//'}, + [l.PREPROCESSOR] = { + ['if'] = 1, endif = -1, macro = 1, endmacro = -1, rep = 1, endrep = -1, + ['while'] = 1, endwhile = -1, + }, + [l.KEYWORD] = {struc = 1, endstruc = -1}, + [l.COMMENT] = {['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/asp.lua b/lua/lexers/asp.lua @@ -0,0 +1,42 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- ASP LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'asp'} + +-- Embedded in HTML. +local html = l.load('html') + +-- Embedded VB. +local vb = l.load('vb') +local vb_start_rule = token('asp_tag', '<%' * P('=')^-1) +local vb_end_rule = token('asp_tag', '%>') +l.embed_lexer(html, vb, vb_start_rule, vb_end_rule) + +-- Embedded VBScript. +local vbs = l.load('vbscript') +local script_element = word_match({'script'}, nil, html.case_insensitive_tags) +local vbs_start_rule = #(P('<') * script_element * (P(function(input, index) + if input:find('^%s+language%s*=%s*(["\'])vbscript%1', index) or + input:find('^%s+type%s*=%s*(["\'])text/vbscript%1', index) then + return index + end +end) + '>')) * html.embed_start_tag -- <script language="vbscript"> +local vbs_end_rule = #('</' * script_element * l.space^0 * '>') * + html.embed_end_tag -- </script> +l.embed_lexer(html, vbs, vbs_start_rule, vbs_end_rule) + +M._tokenstyles = { + asp_tag = l.STYLE_EMBEDDED +} + +local _foldsymbols = html._foldsymbols +_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%' +_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>' +_foldsymbols.asp_tag = {['<%'] = 1, ['%>'] = -1} +M._foldsymbols = _foldsymbols + +return M diff --git a/lua/lexers/autoit.lua b/lua/lexers/autoit.lua @@ -0,0 +1,168 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- AutoIt LPeg lexer. +-- Contributed by Jeff Stone. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'autoit'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = ';' * l.nonnewline_esc^0 +local block_comment1 = '#comments-start' * (l.any - '#comments-end')^0 * + P('#comments-end')^-1 +local block_comment2 = '#cs' * (l.any - '#ce')^0 * P('#ce')^-1 +local comment = token(l.COMMENT, line_comment + block_comment1 + block_comment2) + +-- Keywords. +local kw = token(l.KEYWORD, word_match({ + 'False', 'True', 'And', 'Or', 'Not', 'ContinueCase', 'ContinueLoop', + 'Default', 'Dim', 'Global', 'Local', 'Const', 'Do', 'Until', 'Enum', 'Exit', + 'ExitLoop', 'For', 'To', 'Step', 'Next', 'In', 'Func', 'Return', 'EndFunc', + 'If', 'Then', 'ElseIf', 'Else', 'EndIf', 'Null', 'ReDim', 'Select', 'Case', + 'EndSelect', 'Static', 'Switch', 'EndSwitch', 'Volatile', 'While', 'WEnd', + 'With', 'EndWith' +}, nil, true)) + +-- Functions. +local func = token(l.FUNCTION, word_match({ + 'Abs', 'ACos', 'AdlibRegister', 'AdlibUnRegister', 'Asc', 'AscW', 'ASin', + 'Assign', 'ATan', 'AutoItSetOption', 'AutoItWinGetTitle', 'AutoItWinSetTitle', + 'Beep', 'Binary', 'BinaryLen', 'BinaryMid', 'BinaryToString', 'BitAND', + 'BitNOT', 'BitOR', 'BitRotate', 'BitShift', 'BitXOR', 'BlockInput', 'Break', + 'Call', 'CDTray', 'Ceiling', 'Chr', 'ChrW', 'ClipGet', 'ClipPut', + 'ConsoleRead', 'ConsoleWrite', 'ConsoleWriteError', 'ControlClick', + 'ControlCommand', 'ControlDisable', 'ControlEnable', 'ControlFocus', + 'ControlGetFocus', 'ControlGetHandle', 'ControlGetPos', 'ControlGetText', + 'ControlHide', 'ControlListView', 'ControlMove', 'ControlSend', + 'ControlSetText', 'ControlShow', 'ControlTreeView', 'Cos', 'Dec', 'DirCopy', + 'DirCreate', 'DirGetSize', 'DirMove', 'DirRemove', 'DllCall', + 'DllCallAddress', 'DllCallbackFree', 'DllCallbackGetPtr', + 'DllCallbackRegister', 'DllClose', 'DllOpen', 'DllStructCreate', + 'DllStructGetData', 'DllStructGetPtr', 'DllStructGetSize', 'DllStructSetData', + 'DriveGetDrive', 'DriveGetFileSystem', 'DriveGetLabel', 'DriveGetSerial', + 'DriveGetType', 'DriveMapAdd', 'DriveMapDel', 'DriveMapGet', 'DriveSetLabel', + 'DriveSpaceFree', 'DriveSpaceTotal', 'DriveStatus', 'EnvGet', 'EnvSet', + 'EnvUpdate', 'Eval', 'Execute', 'Exp', 'FileChangeDir', 'FileClose', + 'FileCopy', 'FileCreateNTFSLink', 'FileCreateShortcut', 'FileDelete', + 'FileExists', 'FileFindFirstFile', 'FileFindNextFile', 'FileFlush', + 'FileGetAttrib', 'FileGetEncoding', 'FileGetLongName', 'FileGetPos', + 'FileGetShortcut', 'FileGetShortName', 'FileGetSize', 'FileGetTime', + 'FileGetVersion', 'FileInstall', 'FileMove', 'FileOpen', 'FileOpenDialog', + 'FileRead', 'FileReadLine', 'FileReadToArray', 'FileRecycle', + 'FileRecycleEmpty', 'FileSaveDialog', 'FileSelectFolder', 'FileSetAttrib', + 'FileSetEnd', 'FileSetPos', 'FileSetTime', 'FileWrite', 'FileWriteLine', + 'Floor', 'FtpSetProxy', 'FuncName', 'GUICreate', 'GUICtrlCreateAvi', + 'GUICtrlCreateButton', 'GUICtrlCreateCheckbox', 'GUICtrlCreateCombo', + 'GUICtrlCreateContextMenu', 'GUICtrlCreateDate', 'GUICtrlCreateDummy', + 'GUICtrlCreateEdit', 'GUICtrlCreateGraphic', 'GUICtrlCreateGroup', + 'GUICtrlCreateIcon', 'GUICtrlCreateInput', 'GUICtrlCreateLabel', + 'GUICtrlCreateList', 'GUICtrlCreateListView', 'GUICtrlCreateListViewItem', + 'GUICtrlCreateMenu', 'GUICtrlCreateMenuItem', 'GUICtrlCreateMonthCal', + 'GUICtrlCreateObj', 'GUICtrlCreatePic', 'GUICtrlCreateProgress', + 'GUICtrlCreateRadio', 'GUICtrlCreateSlider', 'GUICtrlCreateTab', + 'GUICtrlCreateTabItem', 'GUICtrlCreateTreeView', 'GUICtrlCreateTreeViewItem', + 'GUICtrlCreateUpdown', 'GUICtrlDelete', 'GUICtrlGetHandle', 'GUICtrlGetState', + 'GUICtrlRead', 'GUICtrlRecvMsg', 'GUICtrlRegisterListViewSort', + 'GUICtrlSendMsg', 'GUICtrlSendToDummy', 'GUICtrlSetBkColor', + 'GUICtrlSetColor', 'GUICtrlSetCursor', 'GUICtrlSetData', + 'GUICtrlSetDefBkColor', 'GUICtrlSetDefColor', 'GUICtrlSetFont', + 'GUICtrlSetGraphic', 'GUICtrlSetImage', 'GUICtrlSetLimit', + 'GUICtrlSetOnEvent', 'GUICtrlSetPos', 'GUICtrlSetResizing', 'GUICtrlSetState', + 'GUICtrlSetStyle', 'GUICtrlSetTip', 'GUIDelete', 'GUIGetCursorInfo', + 'GUIGetMsg', 'GUIGetStyle', 'GUIRegisterMsg', 'GUISetAccelerators', + 'GUISetBkColor', 'GUISetCoord', 'GUISetCursor', 'GUISetFont', 'GUISetHelp', + 'GUISetIcon', 'GUISetOnEvent', 'GUISetState', 'GUISetStyle', 'GUIStartGroup', + 'GUISwitch', 'Hex', 'HotKeySet', 'HttpSetProxy', 'HttpSetUserAgent', 'HWnd', + 'InetClose', 'InetGet', 'InetGetInfo', 'InetGetSize', 'InetRead', 'IniDelete', + 'IniRead', 'IniReadSection', 'IniReadSectionNames', 'IniRenameSection', + 'IniWrite', 'IniWriteSection', 'InputBox', 'Int', 'IsAdmin', 'IsArray', + 'IsBinary', 'IsBool', 'IsDeclared', 'IsDllStruct', 'IsFloat', 'IsFunc', + 'IsHWnd', 'IsInt', 'IsKeyword', 'IsNumber', 'IsObj', 'IsPtr', 'IsString', + 'Log', 'MemGetStats', 'Mod', 'MouseClick', 'MouseClickDrag', 'MouseDown', + 'MouseGetCursor', 'MouseGetPos', 'MouseMove', 'MouseUp', 'MouseWheel', + 'MsgBox', 'Number', 'ObjCreate', 'ObjCreateInterface', 'ObjEvent', 'ObjGet', + 'ObjName', 'OnAutoItExitRegister', 'OnAutoItExitUnRegister', 'Ping', + 'PixelChecksum', 'PixelGetColor', 'PixelSearch', 'ProcessClose', + 'ProcessExists', 'ProcessGetStats', 'ProcessList', 'ProcessSetPriority', + 'ProcessWait', 'ProcessWaitClose', 'ProgressOff', 'ProgressOn', 'ProgressSet', + 'Ptr', 'Random', 'RegDelete', 'RegEnumKey', 'RegEnumVal', 'RegRead', + 'RegWrite', 'Round', 'Run', 'RunAs', 'RunAsWait', 'RunWait', 'Send', + 'SendKeepActive', 'SetError', 'SetExtended', 'ShellExecute', + 'ShellExecuteWait', 'Shutdown', 'Sin', 'Sleep', 'SoundPlay', + 'SoundSetWaveVolume', 'SplashImageOn', 'SplashOff', 'SplashTextOn', 'Sqrt', + 'SRandom', 'StatusbarGetText', 'StderrRead', 'StdinWrite', 'StdioClose', + 'StdoutRead', 'String', 'StringAddCR', 'StringCompare', 'StringFormat', + 'StringFromASCIIArray', 'StringInStr', 'StringIsAlNum', 'StringIsAlpha', + 'StringIsASCII', 'StringIsDigit', 'StringIsFloat', 'StringIsInt', + 'StringIsLower', 'StringIsSpace', 'StringIsUpper', 'StringIsXDigit', + 'StringLeft', 'StringLen', 'StringLower', 'StringMid', 'StringRegExp', + 'StringRegExpReplace', 'StringReplace', 'StringReverse', 'StringRight', + 'StringSplit', 'StringStripCR', 'StringStripWS', 'StringToASCIIArray', + 'StringToBinary', 'StringTrimLeft', 'StringTrimRight', 'StringUpper', 'Tan', + 'TCPAccept', 'TCPCloseSocket', 'TCPConnect', 'TCPListen', 'TCPNameToIP', + 'TCPRecv', 'TCPSend', 'TCPShutdown, UDPShutdown', 'TCPStartup, UDPStartup', + 'TimerDiff', 'TimerInit', 'ToolTip', 'TrayCreateItem', 'TrayCreateMenu', + 'TrayGetMsg', 'TrayItemDelete', 'TrayItemGetHandle', 'TrayItemGetState', + 'TrayItemGetText', 'TrayItemSetOnEvent', 'TrayItemSetState', + 'TrayItemSetText', 'TraySetClick', 'TraySetIcon', 'TraySetOnEvent', + 'TraySetPauseIcon', 'TraySetState', 'TraySetToolTip', 'TrayTip', 'UBound', + 'UDPBind', 'UDPCloseSocket', 'UDPOpen', 'UDPRecv', 'UDPSend', 'VarGetType', + 'WinActivate', 'WinActive', 'WinClose', 'WinExists', 'WinFlash', + 'WinGetCaretPos', 'WinGetClassList', 'WinGetClientSize', 'WinGetHandle', + 'WinGetPos', 'WinGetProcess', 'WinGetState', 'WinGetText', 'WinGetTitle', + 'WinKill', 'WinList', 'WinMenuSelectItem', 'WinMinimizeAll', + 'WinMinimizeAllUndo', 'WinMove', 'WinSetOnTop', 'WinSetState', 'WinSetTitle', + 'WinSetTrans', 'WinWait', 'WinWaitActive', 'WinWaitClose', 'WinWaitNotActive' +}, nil, true)) + +-- Preprocessor. +local preproc = token(l.PREPROCESSOR, '#' * word_match({ + 'include-once', 'include', 'pragma', 'forceref', 'RequireAdmin', 'NoTrayIcon', + 'OnAutoItStartRegister' +}, '-', true)) + +-- Strings. +local dq_str = l.delimited_range('"', true, true) +local sq_str = l.delimited_range("'", true, true) +local inc = l.delimited_range('<>', true, true, true) +local str = token(l.STRING, dq_str + sq_str + inc) + +-- Macros. +local macro = token('macro', '@' * (l.alnum + '_')^1) + +-- Variables. +local var = token(l.VARIABLE, '$' * (l.alnum + '_')^1) + +-- Identifiers. +local ident = token(l.IDENTIFIER, (l.alnum + '_')^1) + +-- Numbers. +local nbr = token(l.NUMBER, l.float + l.integer) + +-- Operators. +local oper = token(l.OPERATOR, S('+-^*/&<>=?:()[]')) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'keyword', kw}, + {'function', func}, + {'preproc', preproc}, + {'string', str}, + {'macro', macro}, + {'variable', var}, + {'number', nbr}, + {'identifier', ident}, + {'operator', oper} +} + +M._tokenstyles = { + macro = l.STYLE_PREPROCESSOR +} + +return M diff --git a/lua/lexers/awk.lua b/lua/lexers/awk.lua @@ -0,0 +1,334 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- AWK LPeg lexer. +-- Modified by Wolfgang Seeberg 2012, 2013. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'awk'} + +local LEFTBRACKET = '[' +local RIGHTBRACKET = ']' +local SLASH = '/' +local BACKSLASH = '\\' +local CARET = '^' +local CR = '\r' +local LF = '\n' +local CRLF = CR .. LF +local DQUOTE = '"' +local DELIMITER_MATCHES = {['('] = ')', ['['] = ']'} +local COMPANION = {['('] = '[', ['['] = '('} +local CC = { + alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1, + print = 1, punct = 1, space = 1, upper = 1, xdigit = 1 +} +local LastRegexEnd = 0 +local BackslashAtCommentEnd = 0 +local KW_BEFORE_RX = { + case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1, + ['return'] = 1 +} + +local function findKeyword(input, e) + local i = e + while i > 0 and input:find("^[%l]", i) do i = i - 1 end + local w = input:sub(i + 1, e) + if i == 0 then + return KW_BEFORE_RX[w] == 1 + elseif input:find("^[%u%d_]", i) then + return false + else + return KW_BEFORE_RX[w] == 1 + end +end + +local function isRegex(input, i) + while i >= 1 and input:find('^[ \t]', i) do i = i - 1 end + if i < 1 then return true end + if input:find("^[-!%%&(*+,:;<=>?[^{|}~\f]", i) or findKeyword(input, i) then + return true + elseif input:sub(i, i) == SLASH then + return i ~= LastRegexEnd -- deals with /xx/ / /yy/. + elseif input:find('^[]%w)."]', i) then + return false + elseif input:sub(i, i) == LF then + if i == 1 then return true end + i = i - 1 + if input:sub(i, i) == CR then + if i == 1 then return true end + i = i - 1 + end + elseif input:sub(i, i) == CR then + if i == 1 then return true end + i = i - 1 + else + return false + end + if input:sub(i, i) == BACKSLASH and i ~= BackslashAtCommentEnd then + return isRegex(input, i - 1) + else + return true + end +end + +local function eatCharacterClass(input, s, e) + local i = s + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i + 1) == ':]' then + local str = input:sub(s, i - 1) + return CC[str] == 1 and i + 1 + end + i = i + 1 + end + return false +end + +local function eatBrackets(input, i, e) + if input:sub(i, i) == CARET then i = i + 1 end + if input:sub(i, i) == RIGHTBRACKET then i = i + 1 end + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i) == RIGHTBRACKET then + return i + elseif input:sub(i, i + 1) == '[:' then + i = eatCharacterClass(input, i + 2, e) + if not i then return false end + elseif input:sub(i, i) == BACKSLASH then + i = i + 1 + if input:sub(i, i + 1) == CRLF then i = i + 1 end + end + i = i + 1 + end + return false +end + +local function eatRegex(input, i) + local e = #input + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i) == SLASH then + LastRegexEnd = i + return i + elseif input:sub(i, i) == LEFTBRACKET then + i = eatBrackets(input, i + 1, e) + if not i then return false end + elseif input:sub(i, i) == BACKSLASH then + i = i + 1 + if input:sub(i, i + 1) == CRLF then i = i + 1 end + end + i = i + 1 + end + return false +end + +local ScanRegexResult +local function scanGawkRegex(input, index) + if isRegex(input, index - 2) then + local i = eatRegex(input, index) + if not i then + ScanRegexResult = false + return false + end + local rx = input:sub(index - 1, i) + for bs in rx:gmatch("[^\\](\\+)[BSsWwy<>`']") do + -- /\S/ is special, but /\\S/ is not. + if #bs % 2 == 1 then return i + 1 end + end + ScanRegexResult = i + 1 + else + ScanRegexResult = false + end + return false +end +-- Is only called immediately after scanGawkRegex(). +local function scanRegex() + return ScanRegexResult +end + +local function scanString(input, index) + local i = index + local e = #input + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i) == DQUOTE then + return i + 1 + elseif input:sub(i, i) == BACKSLASH then + i = i + 1 + -- l.delimited_range() doesn't handle CRLF. + if input:sub(i, i + 1) == CRLF then i = i + 1 end + end + i = i + 1 + end + return false +end + +-- purpose: prevent isRegex() from entering a comment line that ends with a +-- backslash. +local function scanComment(input, index) + local _, i = input:find('[^\r\n]*', index) + if input:sub(i, i) == BACKSLASH then BackslashAtCommentEnd = i end + return i + 1 +end + +local function scanFieldDelimiters(input, index) + local i = index + local e = #input + local left = input:sub(i - 1, i - 1) + local count = 1 + local right = DELIMITER_MATCHES[left] + local left2 = COMPANION[left] + local count2 = 0 + local right2 = DELIMITER_MATCHES[left2] + while i <= e do + if input:find('^[#\r\n]', i) then + return false + elseif input:sub(i, i) == right then + count = count - 1 + if count == 0 then return count2 == 0 and i + 1 end + elseif input:sub(i, i) == left then + count = count + 1 + elseif input:sub(i, i) == right2 then + count2 = count2 - 1 + if count2 < 0 then return false end + elseif input:sub(i, i) == left2 then + count2 = count2 + 1 + elseif input:sub(i, i) == DQUOTE then + i = scanString(input, i + 1) + if not i then return false end + i = i - 1 + elseif input:sub(i, i) == SLASH then + if isRegex(input, i - 1) then + i = eatRegex(input, i + 1) + if not i then return false end + end + elseif input:sub(i, i) == BACKSLASH then + if input:sub(i + 1, i + 2) == CRLF then + i = i + 2 + elseif input:find('^[\r\n]', i + 1) then + i = i + 1 + end + end + i = i + 1 + end + return false +end + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * P(scanComment)) + +-- Strings. +local string = token(l.STRING, DQUOTE * P(scanString)) + +-- Regular expressions. +-- Slash delimited regular expressions are preceded by most operators or +-- the keywords 'print' and 'case', possibly on a preceding line. They +-- can contain unescaped slashes and brackets in brackets. Some escape +-- sequences like '\S', '\s' have special meanings with Gawk. Tokens that +-- contain them are displayed differently. +local regex = token(l.REGEX, SLASH * P(scanRegex)) +local gawkRegex = token('gawkRegex', SLASH * P(scanGawkRegex)) + +-- no leading sign because it might be binary. +local float = ((l.digit ^ 1 * ('.' * l.digit ^ 0) ^ -1) + + ('.' * l.digit ^ 1)) * (S('eE') * S('+-') ^ -1 * l.digit ^ 1) ^ -1 +-- Numbers. +local number = token(l.NUMBER, float) +local gawkNumber = token('gawkNumber', l.hex_num + l.oct_num) + +-- Operators. +local operator = token(l.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~')) +local gawkOperator = token('gawkOperator', P("|&") + "@" + "**=" + "**") + +-- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc. +local field = token('field', P('$') * S('$+-') ^ 0 * + (float + (l.word ^ 0 * '(' * P(scanFieldDelimiters)) + + (l.word ^ 1 * ('[' * P(scanFieldDelimiters)) ^ -1) + + ('"' * P(scanString)) + ('/' * P(eatRegex) * '/'))) + +-- Functions. +local func = token(l.FUNCTION, l.word * #P('(')) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do', + 'else', 'exit', 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if', + 'in', 'index', 'int', 'length', 'log', 'match', 'next', 'nextfile', 'print', + 'printf', 'rand', 'return', 'sin', 'split', 'sprintf', 'sqrt', 'srand', 'sub', + 'substr', 'system', 'tolower', 'toupper', 'while' +}) + +local gawkKeyword = token('gawkKeyword', word_match{ + 'BEGINFILE', 'ENDFILE', 'adump', 'and', 'asort', 'asorti', 'bindtextdomain', + 'case', 'compl', 'dcgettext', 'dcngettext', 'default', 'extension', 'func', + 'gensub', 'include', 'isarray', 'load', 'lshift', 'mktime', 'or', 'patsplit', + 'rshift', 'stopme', 'strftime', 'strtonum', 'switch', 'systime', 'xor' +}) + +local builtInVariable = token('builtInVariable', word_match{ + 'ARGC', 'ARGV', 'CONVFMT', 'ENVIRON', 'FILENAME', 'FNR', 'FS', 'NF', 'NR', + 'OFMT', 'OFS', 'ORS', 'RLENGTH', 'RS', 'RSTART', 'SUBSEP' +}) + +local gawkBuiltInVariable = token('gawkBuiltInVariable', word_match { + 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE', + 'LINT', 'PREC', 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN' +}) + +-- Within each group order matters, but the groups themselves (except the +-- last) can be in any order. +M._rules = { + {'whitespace', ws}, + + {'comment', comment}, + + {'string', string}, + + {'field', field}, + + {'gawkRegex', gawkRegex}, + {'regex', regex}, + {'gawkOperator', gawkOperator}, + {'operator', operator}, + + {'gawkNumber', gawkNumber}, + {'number', number}, + + {'keyword', keyword}, + {'builtInVariable', builtInVariable}, + {'gawkKeyword', gawkKeyword}, + {'gawkBuiltInVariable', gawkBuiltInVariable}, + {'function', func}, + {'identifier', identifier}, +} + +M._tokenstyles = { + builtInVariable = l.STYLE_CONSTANT, + default = l.STYLE_ERROR, + field = l.STYLE_LABEL, + gawkBuiltInVariable = l.STYLE_CONSTANT..',underlined', + gawkKeyword = l.STYLE_KEYWORD..',underlined', + gawkNumber = l.STYLE_NUMBER..',underlined', + gawkOperator = l.STYLE_OPERATOR..',underlined', + gawkRegex = l.STYLE_PREPROCESSOR..',underlined', + regex = l.STYLE_PREPROCESSOR +} + +M._foldsymbols = { + _patterns = {'[{}]', '#'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['#'] = l.fold_line_comments('#')} +} + +return M diff --git a/lua/lexers/bash.lua b/lua/lexers/bash.lua @@ -0,0 +1,82 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Shell LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'bash'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", false, true) +local dq_str = l.delimited_range('"') +local ex_str = l.delimited_range('`') +local heredoc = '<<' * P(function(input, index) + local s, e, minus, _, delimiter = + input:find('(-?)(["\']?)([%a_][%w_]*)%2[\n\r\f;]+', index) + if s == index and delimiter then + -- If the starting delimiter of a here-doc begins with "-", then + -- spaces are allowed to come before the closing delimiter. + local close_pattern + if minus == '-' then + close_pattern = '[\n\r\f%s]+'..delimiter..'\n' + else + close_pattern = '[\n\r\f]+'..delimiter..'\n' + end + local _, e = input:find(close_pattern, e) + return e and e + 1 or #input + 1 + end +end) +local string = token(l.STRING, sq_str + dq_str + ex_str + heredoc) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for', + 'do', 'done', 'continue', 'local', 'return', 'select', + -- Operators. + '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t', + '-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o', + '-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge' +}, '-')) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Variables. +local variable = token(l.VARIABLE, + '$' * (S('!#?*@$') + l.digit^1 + l.word + + l.delimited_range('{}', true, true, true))) + +-- Operators. +local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'variable', variable}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[a-z]+', '[{}]', '#'}, + [l.KEYWORD] = { + ['if'] = 1, fi = -1, case = 1, esac = -1, ['do'] = 1, done = -1 + }, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['#'] = l.fold_line_comments('#')} +} + +return M diff --git a/lua/lexers/batch.lua b/lua/lexers/batch.lua @@ -0,0 +1,71 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Batch LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'batch'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local rem = (P('REM') + 'rem') * l.space +local comment = token(l.COMMENT, (rem + '::') * l.nonnewline^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"', true)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'cd', 'chdir', 'md', 'mkdir', 'cls', 'for', 'if', 'echo', 'echo.', 'move', + 'copy', 'ren', 'del', 'set', 'call', 'exit', 'setlocal', 'shift', + 'endlocal', 'pause', 'defined', 'exist', 'errorlevel', 'else', 'in', 'do', + 'NUL', 'AUX', 'PRN', 'not', 'goto', 'pushd', 'popd' +}, nil, true)) + +-- Functions. +local func = token(l.FUNCTION, word_match({ + 'APPEND', 'ATTRIB', 'CHKDSK', 'CHOICE', 'DEBUG', 'DEFRAG', 'DELTREE', + 'DISKCOMP', 'DISKCOPY', 'DOSKEY', 'DRVSPACE', 'EMM386', 'EXPAND', 'FASTOPEN', + 'FC', 'FDISK', 'FIND', 'FORMAT', 'GRAPHICS', 'KEYB', 'LABEL', 'LOADFIX', + 'MEM', 'MODE', 'MORE', 'MOVE', 'MSCDEX', 'NLSFUNC', 'POWER', 'PRINT', 'RD', + 'REPLACE', 'RESTORE', 'SETVER', 'SHARE', 'SORT', 'SUBST', 'SYS', 'TREE', + 'UNDELETE', 'UNFORMAT', 'VSAFE', 'XCOPY' +}, nil, true)) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Variables. +local variable = token(l.VARIABLE, + '%' * (l.digit + '%' * l.alpha) + + l.delimited_range('%', true, true)) + +-- Operators. +local operator = token(l.OPERATOR, S('+|&!<>=')) + +-- Labels. +local label = token(l.LABEL, ':' * l.word) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'function', func}, + {'comment', comment}, + {'identifier', identifier}, + {'string', string}, + {'variable', variable}, + {'label', label}, + {'operator', operator}, +} + +M._LEXBYLINE = true + +M._foldsymbols = { + _patterns = {'[A-Za-z]+'}, + [l.KEYWORD] = {setlocal = 1, endlocal = -1, SETLOCAL = 1, ENDLOCAL = -1} +} + +return M diff --git a/lua/lexers/bibtex.lua b/lua/lexers/bibtex.lua @@ -0,0 +1,58 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Bibtex LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'bibtex'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"') + + l.delimited_range('{}', false, true, true)) + +-- Fields. +local field = token('field', word_match{ + 'author', 'title', 'journal', 'year', 'volume', 'number', 'pages', 'month', + 'note', 'key', 'publisher', 'editor', 'series', 'address', 'edition', + 'howpublished', 'booktitle', 'organization', 'chapter', 'school', + 'institution', 'type', 'isbn', 'issn', 'affiliation', 'issue', 'keyword', + 'url' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S(',=')) + +M._rules = { + {'whitespace', ws}, + {'field', field}, + {'identifier', identifier}, + {'string', string}, + {'operator', operator}, +} + +-- Embedded in Latex. +local latex = l.load('latex') + +-- Embedded Bibtex. +local entry = token('entry', P('@') * word_match({ + 'book', 'article', 'booklet', 'conference', 'inbook', 'incollection', + 'inproceedings', 'manual', 'mastersthesis', 'lambda', 'misc', 'phdthesis', + 'proceedings', 'techreport', 'unpublished' +}, nil, true)) +local bibtex_start_rule = entry * ws^0 * token(l.OPERATOR, P('{')) +local bibtex_end_rule = token(l.OPERATOR, P('}')) +l.embed_lexer(latex, M, bibtex_start_rule, bibtex_end_rule) + +M._tokenstyles = { + field = l.STYLE_CONSTANT, + entry = l.STYLE_PREPROCESSOR +} + +return M diff --git a/lua/lexers/boo.lua b/lua/lexers/boo.lua @@ -0,0 +1,81 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Boo LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'boo'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '#' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 +local regex_str = #('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') * + l.delimited_range('/', true) +local string = token(l.STRING, triple_dq_str + sq_str + dq_str) + + token(l.REGEX, regex_str) + + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer) * + (S('msdhsfFlL') + 'ms')^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'and', 'break', 'cast', 'continue', 'elif', 'else', 'ensure', 'except', 'for', + 'given', 'goto', 'if', 'in', 'isa', 'is', 'not', 'or', 'otherwise', 'pass', + 'raise', 'ref', 'try', 'unless', 'when', 'while', + -- Definitions. + 'abstract', 'callable', 'class', 'constructor', 'def', 'destructor', 'do', + 'enum', 'event', 'final', 'get', 'interface', 'internal', 'of', 'override', + 'partial', 'private', 'protected', 'public', 'return', 'set', 'static', + 'struct', 'transient', 'virtual', 'yield', + -- Namespaces. + 'as', 'from', 'import', 'namespace', + -- Other. + 'self', 'super', 'null', 'true', 'false' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'bool', 'byte', 'char', 'date', 'decimal', 'double', 'duck', 'float', 'int', + 'long', 'object', 'operator', 'regex', 'sbyte', 'short', 'single', 'string', + 'timespan', 'uint', 'ulong', 'ushort' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'array', 'assert', 'checked', 'enumerate', '__eval__', 'filter', 'getter', + 'len', 'lock', 'map', 'matrix', 'max', 'min', 'normalArrayIndexing', 'print', + 'property', 'range', 'rawArrayIndexing', 'required', '__switch__', 'typeof', + 'unchecked', 'using', 'yieldAll', 'zip' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/caml.lua b/lua/lexers/caml.lua @@ -0,0 +1,83 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- OCaml LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'caml'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, l.nested_pair('(*', '*)')) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done', + 'downto', 'else', 'end', 'exception', 'external', 'failwith', 'false', + 'flush', 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', + 'inherit', 'incr', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor', + 'match', 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open', + 'option', 'or', 'parser', 'private', 'ref', 'rec', 'raise', 'regexp', 'sig', + 'struct', 'stdout', 'stdin', 'stderr', 'then', 'to', 'true', 'try', 'type', + 'val', 'virtual', 'when', 'while', 'with' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'int', 'float', 'bool', 'char', 'string', 'unit' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'raise', 'invalid_arg', 'failwith', 'compare', 'min', 'max', 'succ', 'pred', + 'mod', 'abs', 'max_int', 'min_int', 'sqrt', 'exp', 'log', 'log10', 'cos', + 'sin', 'tan', 'acos', 'asin', 'atan', 'atan2', 'cosh', 'sinh', 'tanh', 'ceil', + 'floor', 'abs_float', 'mod_float', 'frexp', 'ldexp', 'modf', 'float', + 'float_of_int', 'truncate', 'int_of_float', 'infinity', 'nan', 'max_float', + 'min_float', 'epsilon_float', 'classify_float', 'int_of_char', 'char_of_int', + 'ignore', 'string_of_bool', 'bool_of_string', 'string_of_int', + 'int_of_string', 'string_of_float', 'float_of_string', 'fst', 'snd', 'stdin', + 'stdout', 'stderr', 'print_char', 'print_string', 'print_int', 'print_float', + 'print_endline', 'print_newline', 'prerr_char', 'prerr_string', 'prerr_int', + 'prerr_float', 'prerr_endline', 'prerr_newline', 'read_line', 'read_int', + 'read_float', 'open_out', 'open_out_bin', 'open_out_gen', 'flush', + 'flush_all', 'output_char', 'output_string', 'output', 'output_byte', + 'output_binary_int', 'output_value', 'seek_out', 'pos_out', + 'out_channel_length', 'close_out', 'close_out_noerr', 'set_binary_mode_out', + 'open_in', 'open_in_bin', 'open_in_gen', 'input_char', 'input_line', 'input', + 'really_input', 'input_byte', 'input_binary_int', 'input_value', 'seek_in', + 'pos_in', 'in_channel_length', 'close_in', 'close_in_noerr', + 'set_binary_mode_in', 'incr', 'decr', 'string_of_format', 'format_of_string', + 'exit', 'at_exit' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/chuck.lua b/lua/lexers/chuck.lua @@ -0,0 +1,92 @@ +-- Copyright 2010-2017 Martin Morawetz. See LICENSE. +-- ChucK LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'chuck'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = P('L')^-1 * l.delimited_range("'", true) +local dq_str = P('L')^-1 * l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Constants. +local constant = token(l.CONSTANT, word_match{ + -- special values + 'false', 'maybe', 'me', 'null', 'NULL', 'pi', 'true' +}) + +-- Special special value. +local now = token('now', P('now')) + +-- Times. +local time = token('time', word_match{ + 'samp', 'ms', 'second', 'minute', 'hour', 'day', 'week' +}) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + -- Control structures. + 'break', 'continue', 'else', 'for', 'if', 'repeat', 'return', 'switch', + 'until', 'while', + -- Other chuck keywords. + 'function', 'fun', 'spork', 'const', 'new' +}) + +-- Classes. +local class = token(l.CLASS, word_match{ + -- Class keywords. + 'class', 'extends', 'implements', 'interface', 'private', 'protected', + 'public', 'pure', 'super', 'static', 'this' +}) + +-- Types. +local types = token(l.TYPE, word_match{ + 'float', 'int', 'time', 'dur', 'void', 'same' +}) + +-- Global ugens. +local ugen = token('ugen', word_match{'dac', 'adc', 'blackhole'}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@')) + +M._rules = { + {'whitespace', ws}, + {'string', string}, + {'keyword', keyword}, + {'constant', constant}, + {'type', types}, + {'class', class}, + {'ugen', ugen}, + {'time', time}, + {'now', now}, + {'identifier', identifier}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._tokenstyles = { + ugen = l.STYLE_CONSTANT, + time = l.STYLE_NUMBER, + now = l.STYLE_CONSTANT..',bold' +} + +return M diff --git a/lua/lexers/clojure.lua b/lua/lexers/clojure.lua @@ -0,0 +1,193 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Clojure LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'clojure'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = ';' * l.nonnewline^0 +local block_comment = '#_(' * (l.any - ')')^0 * P(')') +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"')) + +-- Numbers. +local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'fn','try','catch','finaly','defonce', + 'and', 'case', 'cond', 'def', 'defn', 'defmacro', + 'do', 'else', 'when', 'when-let', 'if-let', 'if', 'let', 'loop', + 'or', 'recur', 'quote', +}, '-*!')) + +-- Functions. +local func = token(l.FUNCTION, word_match({ + '*', '+', '-', '->ArrayChunk', '->Eduction', '->Vec', + '->VecNode', '->VecSeq', '/', '<', '<=', '=', '==', '>', '>=', + 'StackTraceElement->vec', 'Throwable->map', 'accessor', 'aclone', + 'add-classpath', 'add-watch', 'agent', 'agent-error', 'agent-errors', 'aget', + 'alength', 'alias', 'all-ns', 'alter', 'alter-meta!', 'alter-var-root', + 'ancestors', 'any?', 'apply', 'array-map', 'aset', 'aset-boolean', + 'aset-byte', 'aset-char', 'aset-double', 'aset-float', 'aset-int', + 'aset-long', 'aset-short', 'assoc', 'assoc!', 'assoc-in', 'associative?', + 'atom', 'await', 'await-for', 'bases', 'bean', 'bigdec', 'bigint', + 'biginteger', 'bit-and', 'bit-and-not', 'bit-clear', 'bit-flip', 'bit-not', + 'bit-or', 'bit-set', 'bit-shift-left', 'bit-shift-right', 'bit-test', + 'bit-xor', 'boolean', 'boolean-array', 'boolean?', 'booleans', 'bound-fn*', + 'bound?', 'bounded-count', 'butlast', 'byte', 'byte-array', 'bytes', 'bytes?', + 'cast', 'cat', 'char', 'char-array', 'char?', 'chars', 'class', 'class?', + 'clear-agent-errors', 'clojure-version', 'coll?', 'commute', 'comp', + 'comparator', 'compare', 'compare-and-set!', 'compile', 'complement', + 'completing', 'concat', 'conj', 'conj!', 'cons', 'constantly', + 'construct-proxy', 'contains?', 'count', 'counted?', 'create-ns', + 'create-struct', 'cycle', 'dec', 'decimal?', 'dedupe', 'delay?', + 'deliver', 'denominator', 'deref', 'derive', 'descendants', 'disj', 'disj!', + 'dissoc', 'dissoc!', 'distinct', 'distinct?', 'doall', 'dorun', 'double', + 'double-array', 'double?', 'doubles', 'drop', 'drop-last', 'drop-while', + 'eduction', 'empty', 'empty?', 'ensure', 'ensure-reduced', 'enumeration-seq', + 'error-handler', 'error-mode', 'eval', 'even?', 'every-pred', 'every?', + 'ex-data', 'ex-info', 'extend', 'extenders', 'extends?', 'false?', 'ffirst', + 'file-seq', 'filter', 'filterv', 'find', 'find-keyword', 'find-ns', + 'find-var', 'first', 'flatten', 'float', 'float-array', 'float?', 'floats', + 'flush', 'fn?', 'fnext', 'fnil', 'force', 'format', 'frequencies', + 'future-call', 'future-cancel', 'future-cancelled?', 'future-done?', + 'future?', 'gensym', 'get', 'get-in', 'get-method', 'get-proxy-class', + 'get-thread-bindings', 'get-validator', 'group-by', 'halt-when', 'hash', + 'hash-map', 'hash-ordered-coll', 'hash-set', 'hash-unordered-coll', 'ident?', + 'identical?', 'identity', 'ifn?', 'in-ns', 'inc', 'inc', 'indexed?', + 'init-proxy', 'inst-ms', 'inst?', 'instance?', 'int', 'int-array', 'int?', + 'integer?', 'interleave', 'intern', 'interpose', 'into', 'into-array', 'ints', + 'isa?', 'iterate', 'iterator-seq', 'juxt', 'keep', 'keep-indexed', 'key', + 'keys', 'keyword', 'keyword?', 'last', 'line-seq', 'list', 'list*', 'list?', + 'load', 'load-file', 'load-reader', 'load-string', 'loaded-libs', 'long', + 'long-array', 'longs', 'macroexpand', 'macroexpand-1', 'make-array', + 'make-hierarchy', 'map', 'map-entry?', 'map-indexed', 'map?', 'mapcat', + 'mapv', 'max', 'max-key', 'memoize', 'merge', 'merge-with', 'meta', 'methods', + 'min', 'min-key', 'mix-collection-hash', 'mod', 'name', 'namespace', + 'namespace-munge', 'nat-int?', 'neg-int?', 'neg?', 'newline', 'next', + 'nfirst', 'nil?', 'nnext', 'not', 'not-any?', 'not-empty', 'not-every?', + 'not=', 'ns-aliases', 'ns-imports', 'ns-interns', 'ns-map', 'ns-name', + 'ns-publics', 'ns-refers', 'ns-resolve', 'ns-unalias', 'ns-unmap', 'nth', + 'nthnext', 'nthrest', 'num', 'number?', 'numerator', 'object-array', 'odd?', + 'parents', 'partial', 'partition', 'partition-all', 'partition-by', 'pcalls', + 'peek', 'persistent!', 'pmap', 'pop', 'pop!', 'pop-thread-bindings', + 'pos-int?', 'pos?', 'pr-str', 'prefer-method', 'prefers', 'print', + 'print-str', 'printf', 'println', 'println-str', 'prn', 'prn-str', 'promise', + 'proxy-mappings', 'push-thread-bindings', 'qualified-ident?', + 'qualified-keyword?', 'qualified-symbol?', 'quot', 'rand', 'rand-int', + 'rand-nth', 'random-sample', 'range', 'ratio?', 'rational?', 'rationalize', + 're-find', 're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq', + 'read', 'read-line', 'read-string', 'reader-conditional', + 'reader-conditional?', 'realized?', 'record?', 'reduce', 'reduce-kv', + 'reduced', 'reduced?', 'reductions', 'ref', 'ref-history-count', + 'ref-max-history', 'ref-min-history', 'ref-set', 'refer', + 'release-pending-sends', 'rem', 'remove', 'remove-all-methods', + 'remove-method', 'remove-ns', 'remove-watch', 'repeat', 'repeatedly', + 'replace', 'replicate', 'require', 'reset!', 'reset-meta!', 'reset-vals!', + 'resolve', 'rest', 'restart-agent', 'resultset-seq', 'reverse', 'reversible?', + 'rseq', 'rsubseq', 'run!', 'satisfies?', 'second', 'select-keys', 'send', + 'send-off', 'send-via', 'seq', 'seq?', 'seqable?', 'seque', 'sequence', + 'sequential?', 'set', 'set-agent-send-executor!', + 'set-agent-send-off-executor!', 'set-error-handler!', 'set-error-mode!', + 'set-validator!', 'set?', 'short', 'short-array', 'shorts', 'shuffle', + 'shutdown-agents', 'simple-ident?', 'simple-keyword?', 'simple-symbol?', + 'slurp', 'some', 'some-fn', 'some?', 'sort', 'sort-by', 'sorted-map', + 'sorted-map-by', 'sorted-set', 'sorted-set-by', 'sorted?', 'special-symbol?', + 'spit', 'split-at', 'split-with', 'str', 'string?', 'struct', 'struct-map', + 'subs', 'subseq', 'subvec', 'supers', 'swap!', 'swap-vals!', 'symbol', + 'symbol?', 'tagged-literal', 'tagged-literal?', 'take', 'take-last', + 'take-nth', 'take-while', 'test', 'the-ns', 'thread-bound?', 'to-array', + 'to-array-2d', 'trampoline', 'transduce', 'transient', 'tree-seq', 'true?', + 'type', 'unchecked-add', 'unchecked-add-int', 'unchecked-byte', + 'unchecked-char', 'unchecked-dec', 'unchecked-dec-int', + 'unchecked-divide-int', 'unchecked-double', 'unchecked-float', + 'unchecked-inc', 'unchecked-inc-int', 'unchecked-int', 'unchecked-long', + 'unchecked-multiply', 'unchecked-multiply-int', 'unchecked-negate', + 'unchecked-negate-int', 'unchecked-remainder-int', 'unchecked-short', + 'unchecked-subtract', 'unchecked-subtract-int', 'underive', 'unreduced', + 'unsigned-bit-shift-right', 'update', 'update-in', 'update-proxy', 'uri?', + 'use', 'uuid?', 'val', 'vals', 'var-get', 'var-set', 'var?', 'vary-meta', + 'vec', 'vector', 'vector-of', 'vector?', 'volatile!', 'volatile?', 'vreset!', + 'with-bindings*', 'with-meta', 'with-redefs-fn', 'xml-seq', 'zero?', 'zipmap', + 'diff-similar', 'equality-partition', 'diff', 'inspect', 'inspect-table', + 'inspect-tree', '', 'validated', 'browse-url', 'as-file', 'as-url', + 'make-input-stream', 'make-output-stream', 'make-reader', 'make-writer', + 'as-relative-path', 'copy', 'delete-file', 'file', 'input-stream', + 'make-parents', 'output-stream', 'reader', 'resource', 'writer', + 'add-local-javadoc', 'add-remote-javadoc', 'javadoc', 'sh', 'demunge', + 'load-script', 'main', 'repl', 'repl-caught', 'repl-exception', 'repl-prompt', + 'repl-read', 'root-cause', 'skip-if-eol', 'skip-whitespace', + 'stack-element-str', 'cl-format', 'fresh-line', 'get-pretty-writer', 'pprint', + 'pprint-indent', 'pprint-newline', 'pprint-tab', 'print-table', + 'set-pprint-dispatch', 'write', 'write-out', 'resolve-class', 'do-reflect', + 'typename', '->AsmReflector', '->Constructor', '->Field', '->JavaReflector', + '->Method', 'map->Constructor', 'map->Field', 'map->Method', 'reflect', + 'type-reflect', 'apropos', 'dir-fn', 'find-doc', 'pst', 'set-break-handler!', + 'source-fn', 'thread-stopper', 'difference', 'index', 'intersection', 'join', + 'map-invert', 'project', 'rename', 'rename-keys', 'select', 'subset?', + 'superset?', 'union', 'e', 'print-cause-trace', 'print-stack-trace', + 'print-throwable', 'print-trace-element', 'blank?', 'capitalize', + 'ends-with?', 'escape', 'includes?', 'index-of', 'last-index-of', + 'lower-case', 're-quote-replacement', 'replace-first', 'split', 'split-lines', + 'starts-with?', 'trim', 'trim-newline', 'triml', 'trimr', 'upper-case', + 'apply-template', 'assert-any', 'assert-predicate', 'compose-fixtures', + 'do-report', 'file-position', 'function?', 'get-possibly-unbound-var', + 'inc-report-counter', 'join-fixtures', 'run-all-tests', 'run-tests', + 'successful?', 'test-all-vars', 'test-ns', 'test-vars', + 'testing-contexts-str', 'testing-vars-str', 'keywordize-keys', + 'macroexpand-all', 'postwalk', 'postwalk-demo', 'postwalk-replace', 'prewalk', + 'prewalk-demo', 'prewalk-replace', 'stringify-keys', 'walk', 'append-child', + 'branch?', 'children', 'down', 'edit', 'end?', 'insert-child', 'insert-left', + 'insert-right', 'left', 'leftmost', 'lefts', 'make-node', 'node', 'path', + 'prev', 'right', 'rightmost', 'rights', 'root', 'seq-zip', 'up', 'vector-zip', + 'xml-zip', 'zipper' +}, '-/<>!?=#\'')) + +-- Identifiers. +local word = (l.alpha + S('-!?*$=-')) * (l.alnum + S('.-!?*$+-'))^0 +local identifier = token(l.IDENTIFIER, word) + +-- Operators. +local operator = token(l.OPERATOR, S('`@()')) + +-- Clojure keywords +local clojure_keyword = token('clojure_keyword', ':' * S(':')^-1 * word * ('/' * word )^-1) +local clojure_symbol = token('clojure_symbol', "\'" * word * ('/' * word )^-1 ) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'func', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, + {"clojure_keyword", clojure_keyword}, + {"clojure_symbol", clojure_symbol} +} + + +M._tokenstyles = { + clojure_keyword = l.STYLE_TYPE, + clojure_symbol = l.STYLE_TYPE..',bold', +} + +M._foldsymbols = { + _patterns = {'[%(%)%[%]{}]', ';'}, + [l.OPERATOR] = { + ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1 + }, + [l.COMMENT] = {['#_('] = 1, [';'] = l.fold_line_comments(';')} +} + +return M diff --git a/lua/lexers/cmake.lua b/lua/lexers/cmake.lua @@ -0,0 +1,173 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- CMake LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'cmake'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"')) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'IF', 'ENDIF', 'FOREACH', 'ENDFOREACH', 'WHILE', 'ENDWHILE', 'ELSE', 'ELSEIF' +}, nil, true)) + +-- Commands. +local command = token(l.FUNCTION, word_match({ + 'ADD_CUSTOM_COMMAND', 'ADD_CUSTOM_TARGET', 'ADD_DEFINITIONS', + 'ADD_DEPENDENCIES', 'ADD_EXECUTABLE', 'ADD_LIBRARY', 'ADD_SUBDIRECTORY', + 'ADD_TEST', 'AUX_SOURCE_DIRECTORY', 'BUILD_COMMAND', 'BUILD_NAME', + 'CMAKE_MINIMUM_REQUIRED', 'CONFIGURE_FILE', 'CREATE_TEST_SOURCELIST', + 'ENABLE_LANGUAGE', 'ENABLE_TESTING', 'ENDMACRO', 'EXECUTE_PROCESS', + 'EXEC_PROGRAM', 'EXPORT_LIBRARY_DEPENDENCIES', 'FILE', 'FIND_FILE', + 'FIND_LIBRARY', 'FIND_PACKAGE', 'FIND_PATH', 'FIND_PROGRAM', 'FLTK_WRAP_UI', + 'GET_CMAKE_PROPERTY', 'GET_DIRECTORY_PROPERTY', 'GET_FILENAME_COMPONENT', + 'GET_SOURCE_FILE_PROPERTY', 'GET_TARGET_PROPERTY', 'GET_TEST_PROPERTY', + 'INCLUDE', 'INCLUDE_DIRECTORIES', 'INCLUDE_EXTERNAL_MSPROJECT', + 'INCLUDE_REGULAR_EXPRESSION', 'INSTALL', 'INSTALL_FILES', 'INSTALL_PROGRAMS', + 'INSTALL_TARGETS', 'LINK_DIRECTORIES', 'LINK_LIBRARIES', 'LIST', 'LOAD_CACHE', + 'LOAD_COMMAND', 'MACRO', 'MAKE_DIRECTORY', 'MARK_AS_ADVANCED', 'MATH', + 'MESSAGE', 'OPTION', 'OUTPUT_REQUIRED_FILES', 'PROJECT', 'QT_WRAP_CPP', + 'QT_WRAP_UI', 'REMOVE', 'REMOVE_DEFINITIONS', 'SEPARATE_ARGUMENTS', 'SET', + 'SET_DIRECTORY_PROPERTIES', 'SET_SOURCE_FILES_PROPERTIES', + 'SET_TARGET_PROPERTIES', 'SET_TESTS_PROPERTIES', 'SITE_NAME', 'SOURCE_GROUP', + 'STRING', 'SUBDIRS', 'SUBDIR_DEPENDS', 'TARGET_LINK_LIBRARIES', 'TRY_COMPILE', + 'TRY_RUN', 'USE_MANGLED_MESA', 'UTILITY_SOURCE', 'VARIABLE_REQUIRES', + 'VTK_MAKE_INSTANTIATOR', 'VTK_WRAP_JAVA', 'VTK_WRAP_PYTHON', 'VTK_WRAP_TCL', + 'WRITE_FILE', +}, nil, true)) + +-- Constants. +local constant = token(l.CONSTANT, word_match({ + 'BOOL', 'CACHE', 'FALSE', 'N', 'NO', 'ON', 'OFF', 'NOTFOUND', 'TRUE' +}, nil, true)) + +-- Variables. +local variable = token(l.VARIABLE, word_match{ + 'APPLE', 'BORLAND', 'CMAKE_AR', 'CMAKE_BACKWARDS_COMPATIBILITY', + 'CMAKE_BASE_NAME', 'CMAKE_BINARY_DIR', 'CMAKE_BUILD_TOOL', 'CMAKE_BUILD_TYPE', + 'CMAKE_CACHEFILE_DIR', 'CMAKE_CACHE_MAJOR_VERSION', + 'CMAKE_CACHE_MINOR_VERSION', 'CMAKE_CACHE_RELEASE_VERSION', + 'CMAKE_CFG_INTDIR', 'CMAKE_COLOR_MAKEFILE', 'CMAKE_COMMAND', + 'CMAKE_COMPILER_IS_GNUCC', 'CMAKE_COMPILER_IS_GNUCC_RUN', + 'CMAKE_COMPILER_IS_GNUCXX', 'CMAKE_COMPILER_IS_GNUCXX_RUN', + 'CMAKE_CTEST_COMMAND', 'CMAKE_CURRENT_BINARY_DIR', 'CMAKE_CURRENT_SOURCE_DIR', + 'CMAKE_CXX_COMPILER', 'CMAKE_CXX_COMPILER_ARG1', 'CMAKE_CXX_COMPILER_ENV_VAR', + 'CMAKE_CXX_COMPILER_FULLPATH', 'CMAKE_CXX_COMPILER_LOADED', + 'CMAKE_CXX_COMPILER_WORKS', 'CMAKE_CXX_COMPILE_OBJECT', + 'CMAKE_CXX_CREATE_SHARED_LIBRARY', + 'CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS', + 'CMAKE_CXX_CREATE_SHARED_MODULE', 'CMAKE_CXX_CREATE_STATIC_LIBRARY', + 'CMAKE_CXX_FLAGS', 'CMAKE_CXX_FLAGS_DEBUG', 'CMAKE_CXX_FLAGS_DEBUG_INIT', + 'CMAKE_CXX_FLAGS_INIT', 'CMAKE_CXX_FLAGS_MINSIZEREL', + 'CMAKE_CXX_FLAGS_MINSIZEREL_INIT', 'CMAKE_CXX_FLAGS_RELEASE', + 'CMAKE_CXX_FLAGS_RELEASE_INIT', 'CMAKE_CXX_FLAGS_RELWITHDEBINFO', + 'CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_CXX_IGNORE_EXTENSIONS', + 'CMAKE_CXX_INFORMATION_LOADED', 'CMAKE_CXX_LINKER_PREFERENCE', + 'CMAKE_CXX_LINK_EXECUTABLE', 'CMAKE_CXX_LINK_FLAGS', + 'CMAKE_CXX_OUTPUT_EXTENSION', 'CMAKE_CXX_SOURCE_FILE_EXTENSIONS', + 'CMAKE_C_COMPILER', 'CMAKE_C_COMPILER_ARG1', 'CMAKE_C_COMPILER_ENV_VAR', + 'CMAKE_C_COMPILER_FULLPATH', 'CMAKE_C_COMPILER_LOADED', + 'CMAKE_C_COMPILER_WORKS', 'CMAKE_C_COMPILE_OBJECT', + 'CMAKE_C_CREATE_SHARED_LIBRARY', + 'CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS', + 'CMAKE_C_CREATE_SHARED_MODULE', 'CMAKE_C_CREATE_STATIC_LIBRARY', + 'CMAKE_C_FLAGS', 'CMAKE_C_FLAGS_DEBUG', 'CMAKE_C_FLAGS_DEBUG_INIT', + 'CMAKE_C_FLAGS_INIT', 'CMAKE_C_FLAGS_MINSIZEREL', + 'CMAKE_C_FLAGS_MINSIZEREL_INIT', 'CMAKE_C_FLAGS_RELEASE', + 'CMAKE_C_FLAGS_RELEASE_INIT', 'CMAKE_C_FLAGS_RELWITHDEBINFO', + 'CMAKE_C_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_C_IGNORE_EXTENSIONS', + 'CMAKE_C_INFORMATION_LOADED', 'CMAKE_C_LINKER_PREFERENCE', + 'CMAKE_C_LINK_EXECUTABLE', 'CMAKE_C_LINK_FLAGS', 'CMAKE_C_OUTPUT_EXTENSION', + 'CMAKE_C_SOURCE_FILE_EXTENSIONS', 'CMAKE_DL_LIBS', 'CMAKE_EDIT_COMMAND', + 'CMAKE_EXECUTABLE_SUFFIX', 'CMAKE_EXE_LINKER_FLAGS', + 'CMAKE_EXE_LINKER_FLAGS_DEBUG', 'CMAKE_EXE_LINKER_FLAGS_MINSIZEREL', + 'CMAKE_EXE_LINKER_FLAGS_RELEASE', 'CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO', + 'CMAKE_FILES_DIRECTORY', 'CMAKE_FIND_APPBUNDLE', 'CMAKE_FIND_FRAMEWORK', + 'CMAKE_FIND_LIBRARY_PREFIXES', 'CMAKE_FIND_LIBRARY_SUFFIXES', + 'CMAKE_GENERATOR', 'CMAKE_HOME_DIRECTORY', 'CMAKE_INCLUDE_FLAG_C', + 'CMAKE_INCLUDE_FLAG_CXX', 'CMAKE_INCLUDE_FLAG_C_SEP', 'CMAKE_INIT_VALUE', + 'CMAKE_INSTALL_PREFIX', 'CMAKE_LIBRARY_PATH_FLAG', 'CMAKE_LINK_LIBRARY_FLAG', + 'CMAKE_LINK_LIBRARY_SUFFIX', 'CMAKE_MAJOR_VERSION', 'CMAKE_MAKE_PROGRAM', + 'CMAKE_MINOR_VERSION', 'CMAKE_MODULE_EXISTS', 'CMAKE_MODULE_LINKER_FLAGS', + 'CMAKE_MODULE_LINKER_FLAGS_DEBUG', 'CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL', + 'CMAKE_MODULE_LINKER_FLAGS_RELEASE', + 'CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO', + 'CMAKE_MacOSX_Content_COMPILE_OBJECT', 'CMAKE_NUMBER_OF_LOCAL_GENERATORS', + 'CMAKE_OSX_ARCHITECTURES', 'CMAKE_OSX_SYSROOT', 'CMAKE_PARENT_LIST_FILE', + 'CMAKE_PATCH_VERSION', 'CMAKE_PLATFORM_HAS_INSTALLNAME', + 'CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES', 'CMAKE_PLATFORM_ROOT_BIN', + 'CMAKE_PROJECT_NAME', 'CMAKE_RANLIB', 'CMAKE_ROOT', + 'CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS', + 'CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS', + 'CMAKE_SHARED_LIBRARY_CXX_FLAGS', 'CMAKE_SHARED_LIBRARY_C_FLAGS', + 'CMAKE_SHARED_LIBRARY_LINK_C_FLAGS', 'CMAKE_SHARED_LIBRARY_PREFIX', + 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG', + 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP', + 'CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG', 'CMAKE_SHARED_LIBRARY_SONAME_C_FLAG', + 'CMAKE_SHARED_LIBRARY_SUFFIX', 'CMAKE_SHARED_LINKER_FLAGS', + 'CMAKE_SHARED_LINKER_FLAGS_DEBUG', 'CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL', + 'CMAKE_SHARED_LINKER_FLAGS_RELEASE', + 'CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO', + 'CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS', 'CMAKE_SHARED_MODULE_CREATE_C_FLAGS', + 'CMAKE_SHARED_MODULE_PREFIX', 'CMAKE_SHARED_MODULE_SUFFIX', + 'CMAKE_SIZEOF_VOID_P', 'CMAKE_SKIP_RPATH', 'CMAKE_SOURCE_DIR', + 'CMAKE_STATIC_LIBRARY_PREFIX', 'CMAKE_STATIC_LIBRARY_SUFFIX', 'CMAKE_SYSTEM', + 'CMAKE_SYSTEM_AND_CXX_COMPILER_INFO_FILE', + 'CMAKE_SYSTEM_AND_C_COMPILER_INFO_FILE', 'CMAKE_SYSTEM_APPBUNDLE_PATH', + 'CMAKE_SYSTEM_FRAMEWORK_PATH', 'CMAKE_SYSTEM_INCLUDE_PATH', + 'CMAKE_SYSTEM_INFO_FILE', 'CMAKE_SYSTEM_LIBRARY_PATH', 'CMAKE_SYSTEM_LOADED', + 'CMAKE_SYSTEM_NAME', 'CMAKE_SYSTEM_PROCESSOR', 'CMAKE_SYSTEM_PROGRAM_PATH', + 'CMAKE_SYSTEM_SPECIFIC_INFORMATION_LOADED', 'CMAKE_SYSTEM_VERSION', + 'CMAKE_UNAME', 'CMAKE_USE_RELATIVE_PATHS', 'CMAKE_VERBOSE_MAKEFILE', 'CYGWIN', + 'EXECUTABLE_OUTPUT_PATH', 'FORCE', 'HAVE_CMAKE_SIZEOF_VOID_P', + 'LIBRARY_OUTPUT_PATH', 'MACOSX_BUNDLE', 'MINGW', 'MSVC60', 'MSVC70', 'MSVC71', + 'MSVC80', 'MSVC', 'MSVC_IDE', 'PROJECT_BINARY_DIR', 'PROJECT_NAME', + 'PROJECT_SOURCE_DIR', 'PROJECT_BINARY_DIR', 'PROJECT_SOURCE_DIR', + 'RUN_CONFIGURE', 'UNIX', 'WIN32', '_CMAKE_OSX_MACHINE', + -- More variables. + 'LOCATION', 'TARGET', 'POST_BUILD', 'PRE_BUILD', 'ARGS' +} + P('$') * l.delimited_range('{}', false, true)) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, word_match({ + 'AND', 'COMMAND', 'DEFINED', 'DOC', 'EQUAL', 'EXISTS', 'GREATER', 'INTERNAL', + 'LESS', 'MATCHES', 'NAME', 'NAMES', 'NAME_WE', 'NOT', 'OR', 'PATH', 'PATHS', + 'PROGRAM', 'STREQUAL', 'STRGREATER', 'STRINGS', 'STRLESS' +}) + S('=(){}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'command', command}, + {'constant', constant}, + {'variable', variable}, + {'operator', operator}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, +} + +M._foldsymbols = { + _patterns = {'[A-Z]+', '[%(%){}]', '#'}, + [l.KEYWORD] = { + IF = 1, ENDIF = -1, FOREACH = 1, ENDFOREACH = -1, WHILE = 1, ENDWHILE = -1 + }, + [l.FUNCTION] = {MACRO = 1, ENDMACRO = -1}, + [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['#'] = l.fold_line_comments('#')} +} + +return M diff --git a/lua/lexers/coffeescript.lua b/lua/lexers/coffeescript.lua @@ -0,0 +1,62 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- CoffeeScript LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, S = lpeg.P, lpeg.S + +local M = {_NAME = 'coffeescript'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local block_comment = '###' * (l.any - '###')^0 * P('###')^-1 +local line_comment = '#' * l.nonnewline_esc^0 +local comment = token(l.COMMENT, block_comment + line_comment) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local regex_str = #P('/') * l.last_char_includes('+-*%<>!=^&|?~:;,([{') * + l.delimited_range('/', true) * S('igm')^0 +local string = token(l.STRING, sq_str + dq_str) + token(l.REGEX, regex_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'all', 'and', 'bind', 'break', 'by', 'case', 'catch', 'class', 'const', + 'continue', 'default', 'delete', 'do', 'each', 'else', 'enum', 'export', + 'extends', 'false', 'for', 'finally', 'function', 'if', 'import', 'in', + 'instanceof', 'is', 'isnt', 'let', 'loop', 'native', 'new', 'no', 'not', 'of', + 'off', 'on', 'or', 'return', 'super', 'switch', 'then', 'this', 'throw', + 'true', 'try', 'typeof', 'unless', 'until', 'var', 'void', 'with', 'when', + 'while', 'yes' +}) + +-- Fields: object properties and methods. +local field = token(l.FUNCTION, '.' * (S('_$') + l.alpha) * + (S('_$') + l.alnum)^0) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'field', field}, + {'identifier', identifier}, + {'comment', comment}, + {'number', number}, + {'string', string}, + {'operator', operator}, +} + +M._FOLDBYINDENTATION = true + +return M diff --git a/lua/lexers/container.lua b/lua/lexers/container.lua @@ -0,0 +1,7 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Container LPeg lexer. +-- This is SciTE's plain text lexer. + +local M = {_NAME = 'container'} + +return M diff --git a/lua/lexers/context.lua b/lua/lexers/context.lua @@ -0,0 +1,59 @@ +-- Copyright 2006-2017 Robert Gieseke. See LICENSE. +-- ConTeXt LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'context'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '%' * l.nonnewline^0) + +-- Commands. +local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}'))) + +-- Sections. +local section = token('section', '\\' * word_match{ + 'part', 'chapter', 'section', 'subsection', 'subsubsection', 'title', + 'subject', 'subsubject', 'subsubsubject' +}) + +-- ConTeXt environments. +local environment = token('environment', '\\' * (P('start') + 'stop') * l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('$&#{}[]')) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'environment', environment}, + {'section', section}, + {'keyword', command}, + {'operator', operator}, +} + +M._tokenstyles = { + environment = l.STYLE_KEYWORD, + section = l.STYLE_CLASS +} + +M._foldsymbols = { + _patterns = {'\\start', '\\stop', '[{}]', '%%'}, + ['environment'] = {['\\start'] = 1, ['\\stop'] = -1}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['%'] = l.fold_line_comments('%')} +} + +-- Embedded Lua. +local luatex = l.load('lua') +local luatex_start_rule = #P('\\startluacode') * environment +local luatex_end_rule = #P('\\stopluacode') * environment +l.embed_lexer(M, luatex, luatex_start_rule, luatex_end_rule) + + +return M diff --git a/lua/lexers/cpp.lua b/lua/lexers/cpp.lua @@ -0,0 +1,90 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- C++ LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'cpp'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = P('L')^-1 * l.delimited_range("'", true) +local dq_str = P('L')^-1 * l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Preprocessor. +local preproc_word = word_match{ + 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'import', + 'line', 'pragma', 'undef', 'using', 'warning' +} +local preproc = #l.starts_line('#') * + (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + + token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (token(l.WHITESPACE, S('\t ')^1) * + token(l.STRING, l.delimited_range('<>', true, true)))^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 'const_cast', + 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else', 'explicit', + 'export', 'extern', 'false', 'for', 'friend', 'goto', 'if', 'inline', + 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public', + 'register', 'reinterpret_cast', 'return', 'sizeof', 'static', 'static_cast', + 'switch', 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', + 'typename', 'using', 'virtual', 'volatile', 'while', + -- Operators + 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', + 'xor', 'xor_eq', + -- C++11 + 'alignas', 'alignof', 'constexpr', 'decltype', 'final', 'noexcept', + 'override', 'static_assert', 'thread_local' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'bool', 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed', + 'struct', 'union', 'unsigned', 'void', 'wchar_t', + -- C++11 + 'char16_t', 'char32_t', 'nullptr' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'preproc', preproc}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'}, + [l.PREPROCESSOR] = { + region = 1, endregion = -1, + ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1 + }, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/crystal.lua b/lua/lexers/crystal.lua @@ -0,0 +1,141 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2017 Michel Martens. +-- Crystal LPeg lexer (based on Ruby). + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'crystal'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '#' * l.nonnewline_esc^0 +local comment = token(l.COMMENT, line_comment) + +local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'} +local literal_delimitted = P(function(input, index) + local delimiter = input:sub(index, index) + if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics + local match_pos, patt + if delimiter_matches[delimiter] then + -- Handle nested delimiter/matches in strings. + local s, e = delimiter, delimiter_matches[delimiter] + patt = l.delimited_range(s..e, false, false, true) + else + patt = l.delimited_range(delimiter) + end + match_pos = lpeg.match(patt, input, index) + return match_pos or #input + 1 + end +end) + +-- Strings. +local cmd_str = l.delimited_range('`') +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local heredoc = '<<' * P(function(input, index) + local s, e, indented, _, delimiter = + input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) + if s == index and delimiter then + local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') + local _, e = input:find(end_heredoc..delimiter, e) + return e and e + 1 or #input + 1 + end +end) +-- TODO: regex_str fails with `obj.method /patt/` syntax. +local regex_str = #P('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') * + l.delimited_range('/', true, false) * S('iomx')^0 +local string = token(l.STRING, (sq_str + dq_str + heredoc + cmd_str) * + S('f')^-1) + + token(l.REGEX, regex_str) + +local word_char = l.alnum + S('_!?') + +-- Numbers. +local dec = l.digit^1 * ('_' * l.digit^1)^0 * S('ri')^-1 +local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 +local integer = S('+-')^-1 * (bin + l.hex_num + l.oct_num + dec) +-- TODO: meta, control, etc. for numeric_literal. +local numeric_literal = '?' * (l.any - l.space) * -word_char +local number = token(l.NUMBER, l.float * S('ri')^-1 + integer + numeric_literal) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'alias', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', 'else', + 'elsif', 'end', 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil', + 'not', 'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true', + 'undef', 'unless', 'until', 'when', 'while', 'yield', '__FILE__', '__LINE__' +}, '?!')) + +-- Functions. +local func = token(l.FUNCTION, word_match({ + 'abort', 'at_exit', 'caller', 'delay', 'exit', 'fork', 'future', + 'get_stack_top', 'gets', 'lazy', 'loop', 'main', 'p', 'print', 'printf', + 'puts', 'raise', 'rand', 'read_line', 'require', 'sleep', 'spawn', 'sprintf', + 'system', 'with_color', + -- Macros + 'assert_responds_to', 'debugger', 'parallel', 'pp', 'record', 'redefine_main' +}, '?!')) * -S('.:|') + +-- Identifiers. +local word = (l.alpha + '_') * word_char^0 +local identifier = token(l.IDENTIFIER, word) + +-- Variables. +local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + l.digit + '-' * + S('0FadiIKlpvw')) +local class_var = '@@' * word +local inst_var = '@' * word +local variable = token(l.VARIABLE, global_var + class_var + inst_var) + +-- Symbols. +local symbol = token('symbol', ':' * P(function(input, index) + if input:sub(index - 2, index - 2) ~= ':' then return index end +end) * (word_char^1 + sq_str + dq_str)) + +-- Operators. +local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'function', func}, + {'identifier', identifier}, + {'comment', comment}, + {'string', string}, + {'number', number}, + {'variable', variable}, + {'symbol', symbol}, + {'operator', operator}, +} + +M._tokenstyles = { + symbol = l.STYLE_CONSTANT +} + +local function disambiguate(text, pos, line, s) + return line:sub(1, s - 1):match('^%s*$') and + not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 +end + +M._foldsymbols = { + _patterns = {'%l+', '[%(%)%[%]{}]', '#'}, + [l.KEYWORD] = { + begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1, + case = 1, + ['if'] = disambiguate, ['while'] = disambiguate, + ['unless'] = disambiguate, ['until'] = disambiguate, + ['end'] = -1 + }, + [l.OPERATOR] = { + ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1 + }, + [l.COMMENT] = { + ['#'] = l.fold_line_comments('#') + } +} + +return M diff --git a/lua/lexers/csharp.lua b/lua/lexers/csharp.lua @@ -0,0 +1,84 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- C# LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'csharp'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local ml_str = P('@')^-1 * l.delimited_range('"', false, true) +local string = token(l.STRING, sq_str + dq_str + ml_str) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer) * S('lLdDfFMm')^-1) + +-- Preprocessor. +local preproc_word = word_match{ + 'define', 'elif', 'else', 'endif', 'error', 'if', 'line', 'undef', 'warning', + 'region', 'endregion' +} +local preproc = token(l.PREPROCESSOR, + l.starts_line('#') * S('\t ')^0 * preproc_word * + (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'class', 'delegate', 'enum', 'event', 'interface', 'namespace', 'struct', + 'using', 'abstract', 'const', 'explicit', 'extern', 'fixed', 'implicit', + 'internal', 'lock', 'out', 'override', 'params', 'partial', 'private', + 'protected', 'public', 'ref', 'sealed', 'static', 'readonly', 'unsafe', + 'virtual', 'volatile', 'add', 'as', 'assembly', 'base', 'break', 'case', + 'catch', 'checked', 'continue', 'default', 'do', 'else', 'finally', 'for', + 'foreach', 'get', 'goto', 'if', 'in', 'is', 'new', 'remove', 'return', 'set', + 'sizeof', 'stackalloc', 'super', 'switch', 'this', 'throw', 'try', 'typeof', + 'unchecked', 'value', 'void', 'while', 'yield', + 'null', 'true', 'false' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'long', 'object', + 'operator', 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'preproc', preproc}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'}, + [l.PREPROCESSOR] = { + region = 1, endregion = -1, + ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1 + }, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/css.lua b/lua/lexers/css.lua @@ -0,0 +1,166 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- CSS LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local M = {_NAME = 'css'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.digit^1) + +-- Keywords. +local css1_property = word_match({ + 'color', 'background-color', 'background-image', 'background-repeat', + 'background-attachment', 'background-position', 'background', 'font-family', + 'font-style', 'font-variant', 'font-weight', 'font-size', 'font', + 'word-spacing', 'letter-spacing', 'text-decoration', 'vertical-align', + 'text-transform', 'text-align', 'text-indent', 'line-height', 'margin-top', + 'margin-right', 'margin-bottom', 'margin-left', 'margin', 'padding-top', + 'padding-right', 'padding-bottom', 'padding-left', 'padding', + 'border-top-width', 'border-right-width', 'border-bottom-width', + 'border-left-width', 'border-width', 'border-top', 'border-right', + 'border-bottom', 'border-left', 'border', 'border-color', 'border-style', + 'width', 'height', 'float', 'clear', 'display', 'white-space', + 'list-style-type', 'list-style-image', 'list-style-position', 'list-style' +}, '-') +local css1_value = word_match({ + 'auto', 'none', 'normal', 'italic', 'oblique', 'small-caps', 'bold', 'bolder', + 'lighter', 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', + 'xx-large', 'larger', 'smaller', 'transparent', 'repeat', 'repeat-x', + 'repeat-y', 'no-repeat', 'scroll', 'fixed', 'top', 'bottom', 'left', 'center', + 'right', 'justify', 'both', 'underline', 'overline', 'line-through', 'blink', + 'baseline', 'sub', 'super', 'text-top', 'middle', 'text-bottom', 'capitalize', + 'uppercase', 'lowercase', 'thin', 'medium', 'thick', 'dotted', 'dashed', + 'solid', 'double', 'groove', 'ridge', 'inset', 'outset', 'block', 'inline', + 'list-item', 'pre', 'no-wrap', 'inside', 'outside', 'disc', 'circle', + 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha', + 'upper-alpha', 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', + 'maroon', 'navy', 'olive', 'purple', 'red', 'silver', 'teal', 'white', + 'yellow' +}, '-') +local css2_property = word_match({ + 'border-top-color', 'border-right-color', 'border-bottom-color', + 'border-left-color', 'border-color', 'border-top-style', 'border-right-style', + 'border-bottom-style', 'border-left-style', 'border-style', 'top', 'right', + 'bottom', 'left', 'position', 'z-index', 'direction', 'unicode-bidi', + 'min-width', 'max-width', 'min-height', 'max-height', 'overflow', 'clip', + 'visibility', 'content', 'quotes', 'counter-reset', 'counter-increment', + 'marker-offset', 'size', 'marks', 'page-break-before', 'page-break-after', + 'page-break-inside', 'page', 'orphans', 'widows', 'font-stretch', + 'font-size-adjust', 'unicode-range', 'units-per-em', 'src', 'panose-1', + 'stemv', 'stemh', 'slope', 'cap-height', 'x-height', 'ascent', 'descent', + 'widths', 'bbox', 'definition-src', 'baseline', 'centerline', 'mathline', + 'topline', 'text-shadow', 'caption-side', 'table-layout', 'border-collapse', + 'border-spacing', 'empty-cells', 'speak-header', 'cursor', 'outline', + 'outline-width', 'outline-style', 'outline-color', 'volume', 'speak', + 'pause-before', 'pause-after', 'pause', 'cue-before', 'cue-after', 'cue', + 'play-during', 'azimuth', 'elevation', 'speech-rate', 'voice-family', 'pitch', + 'pitch-range', 'stress', 'richness', 'speak-punctuation', 'speak-numeral' +}, '-') +local css2_value = word_match({ + 'inherit', 'run-in', 'compact', 'marker', 'table', 'inline-table', + 'table-row-group', 'table-header-group', 'table-footer-group', 'table-row', + 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'static', + 'relative', 'absolute', 'fixed', 'ltr', 'rtl', 'embed', 'bidi-override', + 'visible', 'hidden', 'scroll', 'collapse', 'open-quote', 'close-quote', + 'no-open-quote', 'no-close-quote', 'decimal-leading-zero', 'lower-greek', + 'lower-latin', 'upper-latin', 'hebrew', 'armenian', 'georgian', + 'cjk-ideographic', 'hiragana', 'katakana', 'hiragana-iroha', 'katakana-iroha', + 'landscape', 'portrait', 'crop', 'cross', 'always', 'avoid', 'wider', + 'narrower', 'ultra-condensed', 'extra-condensed', 'condensed', + 'semi-condensed', 'semi-expanded', 'expanded', 'extra-expanded', + 'ultra-expanded', 'caption', 'icon', 'menu', 'message-box', 'small-caption', + 'status-bar', 'separate', 'show', 'hide', 'once', 'crosshair', 'default', + 'pointer', 'move', 'text', 'wait', 'help', 'e-resize', 'ne-resize', + 'nw-resize', 'n-resize', 'se-resize', 'sw-resize', 's-resize', 'w-resize', + 'ActiveBorder', 'ActiveCaption', 'AppWorkspace', 'Background', 'ButtonFace', + 'ButtonHighlight', 'ButtonShadow', 'InactiveCaptionText', 'ButtonText', + 'CaptionText', 'GrayText', 'Highlight', 'HighlightText', 'InactiveBorder', + 'InactiveCaption', 'InfoBackground', 'InfoText', 'Menu', 'MenuText', + 'Scrollbar', 'ThreeDDarkShadow', 'ThreeDFace', 'ThreeDHighlight', + 'ThreeDLightShadow', 'ThreeDShadow', 'Window', 'WindowFrame', 'WindowText', + 'silent', 'x-soft', 'soft', 'medium', 'loud', 'x-loud', 'spell-out', 'mix', + 'left-side', 'far-left', 'center-left', 'center-right', 'far-right', + 'right-side', 'behind', 'leftwards', 'rightwards', 'below', 'level', 'above', + 'higher', 'lower', 'x-slow', 'slow', 'medium', 'fast', 'x-fast', 'faster', + 'slower', 'male', 'female', 'child', 'x-low', 'low', 'high', 'x-high', 'code', + 'digits', 'continous' +}, '-') +local property = token(l.KEYWORD, css1_property + css2_property) +local value = token('value', css1_value + css2_value) +local keyword = property + value + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0) + +-- Operators. +local operator = token(l.OPERATOR, S('~!#*>+=|.,:;()[]{}')) + +-- At rule. +local at_rule = token('at_rule', P('@') * word_match{ + 'charset', 'font-face', 'media', 'page', 'import' +}) + +-- Colors. +local xdigit = l.xdigit +local hex_color = '#' * xdigit * xdigit * xdigit * (xdigit * xdigit * xdigit)^-1 +local color_name = word_match{ + 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', 'maroon', 'navy', + 'olive', 'orange', 'purple', 'red', 'silver', 'teal', 'white', 'yellow' +} +local color = token('color', hex_color + color_name) + +-- Pseudo. +local pseudo = token(l.CONSTANT, word_match({ + -- Pseudo elements. + 'first-line', 'first-letter', 'before', 'after', + -- Pseudo classes. + 'first-child', 'link', 'visited', 'hover', 'active', 'focus', 'lang', +}, '-')) + +-- Units. +local unit = token('unit', word_match{ + 'em', 'ex', 'px', 'pt', 'pc', 'in', 'ft', 'mm', 'cm', 'kHz', 'Hz', 'deg', + 'rad', 'grad', 'ms', 's' +} + '%') + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'pseudo', pseudo}, + {'color', color}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number * unit^-1}, + {'operator', operator}, + {'at_rule', at_rule}, +} + +M._tokenstyles = { + unit = l.STYLE_LABEL, + value = l.STYLE_CONSTANT, + color = l.STYLE_NUMBER, + at_rule = l.STYLE_PREPROCESSOR +} + +M._foldsymbols = { + _patterns = {'[{}]', '/%*', '%*/'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1} +} + +return M diff --git a/lua/lexers/cuda.lua b/lua/lexers/cuda.lua @@ -0,0 +1,92 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- CUDA LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S +local table = _G.table + +local M = {_NAME = 'cuda'} + +-- Whitespace +local ws = token(l.WHITESPACE, l.space^1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + '__global__', '__host__', '__device__', '__constant__', '__shared__' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + -- Atom. + 'atomicAdd', 'atomicAnd', 'atomicCAS', 'atomicDec', 'atomicExch', 'atomicInc', + 'atomicMax', 'atomicMin', 'atomicOr', 'atomicSub', 'atomicXor', + -- Dev. + 'tex1D', 'tex1Dfetch', 'tex2D', '__float_as_int', '__int_as_float', + '__float2int_rn', '__float2int_rz', '__float2int_ru', '__float2int_rd', + '__float2uint_rn', '__float2uint_rz', '__float2uint_ru', '__float2uint_rd', + '__int2float_rn', '__int2float_rz', '__int2float_ru', '__int2float_rd', + '__uint2float_rn', '__uint2float_rz', '__uint2float_ru', '__uint2float_rd', + '__fadd_rz', '__fmul_rz', '__fdividef', '__mul24', '__umul24', '__mulhi', + '__umulhi', '__mul64hi', '__umul64hi', 'min', 'umin', 'fminf', 'fmin', 'max', + 'umax', 'fmaxf', 'fmax', 'abs', 'fabsf', 'fabs', 'sqrtf', 'sqrt', 'sinf', + '__sinf', 'sin', 'cosf', '__cosf', 'cos', 'sincosf', '__sincosf', 'expf', + '__expf', 'exp', 'logf', '__logf', 'log', + -- Runtime. + 'cudaBindTexture', 'cudaBindTextureToArray', 'cudaChooseDevice', + 'cudaConfigureCall', 'cudaCreateChannelDesc', 'cudaD3D10GetDevice', + 'cudaD3D10MapResources', 'cudaD3D10RegisterResource', + 'cudaD3D10ResourceGetMappedArray', 'cudaD3D10ResourceGetMappedPitch', + 'cudaD3D10ResourceGetMappedPointer', 'cudaD3D10ResourceGetMappedSize', + 'cudaD3D10ResourceGetSurfaceDimensions', 'cudaD3D10ResourceSetMapFlags', + 'cudaD3D10SetDirect3DDevice', 'cudaD3D10UnmapResources', + 'cudaD3D10UnregisterResource', 'cudaD3D9GetDevice', + 'cudaD3D9GetDirect3DDevice', 'cudaD3D9MapResources', + 'cudaD3D9RegisterResource', 'cudaD3D9ResourceGetMappedArray', + 'cudaD3D9ResourceGetMappedPitch', 'cudaD3D9ResourceGetMappedPointer', + 'cudaD3D9ResourceGetMappedSize', 'cudaD3D9ResourceGetSurfaceDimensions', + 'cudaD3D9ResourceSetMapFlags', 'cudaD3D9SetDirect3DDevice', + 'cudaD3D9UnmapResources', 'cudaD3D9UnregisterResource', 'cudaEventCreate', + 'cudaEventDestroy', 'cudaEventElapsedTime', 'cudaEventQuery', + 'cudaEventRecord', 'cudaEventSynchronize', 'cudaFree', 'cudaFreeArray', + 'cudaFreeHost', 'cudaGetChannelDesc', 'cudaGetDevice', 'cudaGetDeviceCount', + 'cudaGetDeviceProperties', 'cudaGetErrorString', 'cudaGetLastError', + 'cudaGetSymbolAddress', 'cudaGetSymbolSize', 'cudaGetTextureAlignmentOffset', + 'cudaGetTextureReference', 'cudaGLMapBufferObject', + 'cudaGLRegisterBufferObject', 'cudaGLSetGLDevice', 'cudaGLUnmapBufferObject', + 'cudaGLUnregisterBufferObject', 'cudaLaunch', 'cudaMalloc', 'cudaMalloc3D', + 'cudaMalloc3DArray', 'cudaMallocArray', 'cudaMallocHost', 'cudaMallocPitch', + 'cudaMemcpy', 'cudaMemcpy2D', 'cudaMemcpy2DArrayToArray', + 'cudaMemcpy2DFromArray', 'cudaMemcpy2DToArray', 'cudaMemcpy3D', + 'cudaMemcpyArrayToArray', 'cudaMemcpyFromArray', 'cudaMemcpyFromSymbol', + 'cudaMemcpyToArray', 'cudaMemcpyToSymbol', 'cudaMemset', 'cudaMemset2D', + 'cudaMemset3D', 'cudaSetDevice', 'cudaSetupArgument', 'cudaStreamCreate', + 'cudaStreamDestroy', 'cudaStreamQuery', 'cudaStreamSynchronize', + 'cudaThreadExit', 'cudaThreadSynchronize', 'cudaUnbindTexture' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'uint', 'int1', 'uint1', 'int2', 'uint2', 'int3', 'uint3', 'int4', 'uint4', + 'float1', 'float2', 'float3', 'float4', 'char1', 'char2', 'char3', 'char4', + 'uchar1', 'uchar2', 'uchar3', 'uchar4', 'short1', 'short2', 'short3', + 'short4', 'dim1', 'dim2', 'dim3', 'dim4' +}) + +-- Variables. +local variable = token(l.VARIABLE, word_match{ + 'gridDim', 'blockIdx', 'blockDim', 'threadIdx' +}) + +-- Extend cpp lexer to include CUDA elements. +local cpp = l.load('cpp') +local _rules = cpp._rules +_rules[1] = {'whitespace', ws} +table.insert(_rules, 2, {'cuda_keyword', keyword}) +table.insert(_rules, 3, {'cuda_function', func}) +table.insert(_rules, 4, {'cuda_type', type}) +table.insert(_rules, 5, {'cuda_variable', variable}) +M._rules = _rules +M._foldsymbols = cpp._foldsymbols + +return M diff --git a/lua/lexers/dart.lua b/lua/lexers/dart.lua @@ -0,0 +1,77 @@ +-- Copyright 2013-2017 Brian Schott (@Hackerpilot on Github). See LICENSE. +-- Dart LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'dart'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local nested_comment = l.nested_pair('/*', '*/') +local comment = token(l.COMMENT, line_comment + nested_comment) + +-- Strings. +local sq_str = S('r')^-1 * l.delimited_range("'", true) +local dq_str = S('r')^-1 * l.delimited_range('"', true) +local sq_str_multiline = S('r')^-1 * l.delimited_range('"""') +local dq_str_multiline = S('r')^-1 * l.delimited_range("''' ") +local string = token(l.STRING, + sq_str + dq_str + sq_str_multiline + dq_str_multiline) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.hex_num)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default', + 'do', 'else', 'enum', 'extends', 'false', 'final' , 'finally', 'for', 'if', + 'in', 'is', 'new', 'null', 'rethrow', 'return', 'super', 'switch', 'this', + 'throw', 'true', 'try', 'var', 'void', 'while', 'with', +}) + +local builtin_identifiers = token(l.CONSTANT, word_match{ + 'abstract', 'as', 'dynamic', 'export', 'external', 'factory', 'get', + 'implements', 'import', 'library', 'operator', 'part', 'set', 'static', + 'typedef' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}')) + +-- Preprocs. +local annotation = token('annotation', '@' * l.word^1) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'constant', builtin_identifiers}, + {'string', string}, + {'identifier', identifier}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, + {'annotation', annotation}, +} + +M._tokenstyles = { + annotation = l.STYLE_PREPROCESSOR, +} + +M._foldsymbols = { + _patterns = {'[{}]', '/[*+]', '[*+]/', '//'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = { + ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1, + ['//'] = l.fold_line_comments('//') + } +} + +return M diff --git a/lua/lexers/desktop.lua b/lua/lexers/desktop.lua @@ -0,0 +1,62 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Desktop Entry LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'desktop'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"')) + +-- Group headers. +local group_header = l.starts_line(token(l.STRING, + l.delimited_range('[]', false, true))) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{'true', 'false'}) + +-- Locales. +local locale = token(l.CLASS, l.delimited_range('[]', false, true)) + +-- Keys. +local key = token(l.VARIABLE, word_match{ + 'Type', 'Version', 'Name', 'GenericName', 'NoDisplay', 'Comment', 'Icon', + 'Hidden', 'OnlyShowIn', 'NotShowIn', 'TryExec', 'Exec', 'Exec', 'Path', + 'Terminal', 'MimeType', 'Categories', 'StartupNotify', 'StartupWMClass', 'URL' +}) + +-- Field codes. +local code = l.token(l.CONSTANT, P('%') * S('fFuUdDnNickvm')) + +-- Identifiers. +local identifier = l.token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0) + +-- Operators. +local operator = token(l.OPERATOR, S('=')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'key', key}, + {'identifier', identifier}, + {'group_header', group_header}, + {'locale', locale}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'code', code}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/diff.lua b/lua/lexers/diff.lua @@ -0,0 +1,44 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Diff LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'diff'} + +-- Text, separators, and file headers. +local index = token(l.COMMENT, 'Index: ' * l.any^0 * P(-1)) +local separator = token(l.COMMENT, ('---' + P('*')^4 + P('=')^1) * l.space^0 * + -1) +local header = token('header', (P('*** ') + '--- ' + '+++ ') * l.any^1) + +-- Location. +local location = token(l.NUMBER, ('@@' + l.digit^1 + '****') * l.any^1) + +-- Additions, deletions, and changes. +local addition = token('addition', S('>+') * l.any^0) +local deletion = token('deletion', S('<-') * l.any^0) +local change = token('change', '! ' * l.any^0) + +M._rules = { + {'index', index}, + {'separator', separator}, + {'header', header}, + {'location', location}, + {'addition', addition}, + {'deletion', deletion}, + {'change', change}, + {'any_line', token('default', l.any^1)}, +} + +M._tokenstyles = { + header = l.STYLE_COMMENT, + addition = 'fore:green', + deletion = 'fore:red', + change = 'fore:yellow', +} + +M._LEXBYLINE = true + +return M diff --git a/lua/lexers/django.lua b/lua/lexers/django.lua @@ -0,0 +1,77 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Django LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local M = {_NAME = 'django'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '{#' * (l.any - l.newline - '#}')^0 * + P('#}')^-1) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"', false, true)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'as', 'block', 'blocktrans', 'by', 'endblock', 'endblocktrans', 'comment', + 'endcomment', 'cycle', 'date', 'debug', 'else', 'extends', 'filter', + 'endfilter', 'firstof', 'for', 'endfor', 'if', 'endif', 'ifchanged', + 'endifchanged', 'ifnotequal', 'endifnotequal', 'in', 'load', 'not', 'now', + 'or', 'parsed', 'regroup', 'ssi', 'trans', 'with', 'widthratio' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'add', 'addslashes', 'capfirst', 'center', 'cut', 'date', 'default', + 'dictsort', 'dictsortreversed', 'divisibleby', 'escape', 'filesizeformat', + 'first', 'fix_ampersands', 'floatformat', 'get_digit', 'join', 'length', + 'length_is', 'linebreaks', 'linebreaksbr', 'linenumbers', 'ljust', 'lower', + 'make_list', 'phone2numeric', 'pluralize', 'pprint', 'random', 'removetags', + 'rjust', 'slice', 'slugify', 'stringformat', 'striptags', 'time', 'timesince', + 'title', 'truncatewords', 'unordered_list', 'upper', 'urlencode', 'urlize', + 'urlizetrunc', 'wordcount', 'wordwrap', 'yesno', +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S(':,.|')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'operator', operator}, +} + +-- Embedded in HTML. +local html = l.load('html') + +-- Embedded Django. +local django_start_rule = token('django_tag', '{' * S('{%')) +local django_end_rule = token('django_tag', S('%}') * '}') +l.embed_lexer(html, M, django_start_rule, django_end_rule) +-- Modify HTML patterns to embed Django. +html._RULES['comment'] = html._RULES['comment'] + comment + +M._tokenstyles = { + django_tag = l.STYLE_EMBEDDED +} + +local _foldsymbols = html._foldsymbols +_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '{[%%{]' +_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '[%%}]}' +_foldsymbols.django_tag = {['{{'] = 1, ['}}'] = -1, ['{%'] = 1, ['%}'] = -1} +M._foldsymbols = _foldsymbols + +return M diff --git a/lua/lexers/dmd.lua b/lua/lexers/dmd.lua @@ -0,0 +1,176 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- D LPeg lexer. +-- Heavily modified by Brian Schott (@Hackerpilot on Github). + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'dmd'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local nested_comment = l.nested_pair('/+', '+/') +local comment = token(l.COMMENT, line_comment + block_comment + nested_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) * S('cwd')^-1 +local dq_str = l.delimited_range('"') * S('cwd')^-1 +local lit_str = 'r' * l.delimited_range('"', false, true) * S('cwd')^-1 +local bt_str = l.delimited_range('`', false, true) * S('cwd')^-1 +local hex_str = 'x' * l.delimited_range('"') * S('cwd')^-1 +local other_hex_str = '\\x' * (l.xdigit * l.xdigit)^1 +local del_str = l.nested_pair('q"[', ']"') * S('cwd')^-1 + + l.nested_pair('q"(', ')"') * S('cwd')^-1 + + l.nested_pair('q"{', '}"') * S('cwd')^-1 + + l.nested_pair('q"<', '>"') * S('cwd')^-1 + + P('q') * l.nested_pair('{', '}') * S('cwd')^-1 +local string = token(l.STRING, del_str + sq_str + dq_str + lit_str + bt_str + + hex_str + other_hex_str) + +-- Numbers. +local dec = l.digit^1 * ('_' * l.digit^1)^0 +local hex_num = l.hex_num * ('_' * l.xdigit^1)^0 +local bin_num = '0' * S('bB') * S('01_')^1 +local oct_num = '0' * S('01234567_')^1 +local integer = S('+-')^-1 * (hex_num + oct_num + bin_num + dec) +local number = token(l.NUMBER, (l.float + integer) * S('uUlLdDfFi')^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abstract', 'align', 'asm', 'assert', 'auto', 'body', 'break', 'case', 'cast', + 'catch', 'const', 'continue', 'debug', 'default', 'delete', + 'deprecated', 'do', 'else', 'extern', 'export', 'false', 'final', 'finally', + 'for', 'foreach', 'foreach_reverse', 'goto', 'if', 'import', 'immutable', + 'in', 'inout', 'invariant', 'is', 'lazy', 'macro', 'mixin', 'new', 'nothrow', + 'null', 'out', 'override', 'pragma', 'private', 'protected', 'public', 'pure', + 'ref', 'return', 'scope', 'shared', 'static', 'super', 'switch', + 'synchronized', 'this', 'throw','true', 'try', 'typeid', 'typeof', 'unittest', + 'version', 'virtual', 'volatile', 'while', 'with', '__gshared', '__thread', + '__traits', '__vector', '__parameters' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'alias', 'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'class', + 'creal', 'dchar', 'delegate', 'double', 'enum', 'float', 'function', + 'idouble', 'ifloat', 'int', 'interface', 'ireal', 'long', 'module', 'package', + 'ptrdiff_t', 'real', 'short', 'size_t', 'struct', 'template', 'typedef', + 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'ushort', 'void', 'wchar', + 'string', 'wstring', 'dstring', 'hash_t', 'equals_t' +}) + +-- Constants. +local constant = token(l.CONSTANT, word_match{ + '__FILE__', '__LINE__', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__', + '__VENDOR__', '__VERSION__', '__FUNCTION__', '__PRETTY_FUNCTION__', + '__MODULE__', +}) + +local class_sequence = token(l.TYPE, P('class') + P('struct')) * ws^1 * + token(l.CLASS, l.word) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('?=!<>+-*$/%&|^~.,;()[]{}')) + +-- Properties. +local properties = (type + identifier + operator) * token(l.OPERATOR, '.') * + token(l.VARIABLE, word_match{ + 'alignof', 'dig', 'dup', 'epsilon', 'idup', 'im', 'init', 'infinity', + 'keys', 'length', 'mangleof', 'mant_dig', 'max', 'max_10_exp', 'max_exp', + 'min', 'min_normal', 'min_10_exp', 'min_exp', 'nan', 'offsetof', 'ptr', + 're', 'rehash', 'reverse', 'sizeof', 'sort', 'stringof', 'tupleof', + 'values' + }) + +-- Preprocs. +local annotation = token('annotation', '@' * l.word^1) +local preproc = token(l.PREPROCESSOR, '#' * l.nonnewline^0) + +-- Traits. +local traits_list = token('traits', word_match{ + 'allMembers', 'classInstanceSize', 'compiles', 'derivedMembers', + 'getAttributes', 'getMember', 'getOverloads', 'getProtection', 'getUnitTests', + 'getVirtualFunctions', 'getVirtualIndex', 'getVirtualMethods', 'hasMember', + 'identifier', 'isAbstractClass', 'isAbstractFunction', 'isArithmetic', + 'isAssociativeArray', 'isFinalClass', 'isFinalFunction', 'isFloating', + 'isIntegral', 'isLazy', 'isNested', 'isOut', 'isOverrideFunction', 'isPOD', + 'isRef', 'isSame', 'isScalar', 'isStaticArray', 'isStaticFunction', + 'isUnsigned', 'isVirtualFunction', 'isVirtualMethod', 'parent' +}) + +local scopes_list = token('scopes', word_match{'exit', 'success', 'failure'}) + +-- versions +local versions_list = token('versions', word_match{ + 'AArch64', 'AIX', 'all', 'Alpha', 'Alpha_HardFloat', 'Alpha_SoftFloat', + 'Android', 'ARM', 'ARM_HardFloat', 'ARM_SoftFloat', 'ARM_SoftFP', 'ARM_Thumb', + 'assert', 'BigEndian', 'BSD', 'Cygwin', 'D_Coverage', 'D_Ddoc', 'D_HardFloat', + 'DigitalMars', 'D_InlineAsm_X86', 'D_InlineAsm_X86_64', 'D_LP64', + 'D_NoBoundsChecks', 'D_PIC', 'DragonFlyBSD', 'D_SIMD', 'D_SoftFloat', + 'D_Version2', 'D_X32', 'FreeBSD', 'GNU', 'Haiku', 'HPPA', 'HPPA64', 'Hurd', + 'IA64', 'LDC', 'linux', 'LittleEndian', 'MIPS32', 'MIPS64', 'MIPS_EABI', + 'MIPS_HardFloat', 'MIPS_N32', 'MIPS_N64', 'MIPS_O32', 'MIPS_O64', + 'MIPS_SoftFloat', 'NetBSD', 'none', 'OpenBSD', 'OSX', 'Posix', 'PPC', 'PPC64', + 'PPC_HardFloat', 'PPC_SoftFloat', 'S390', 'S390X', 'SDC', 'SH', 'SH64', + 'SkyOS', 'Solaris', 'SPARC', 'SPARC64', 'SPARC_HardFloat', 'SPARC_SoftFloat', + 'SPARC_V8Plus', 'SysV3', 'SysV4', 'unittest', 'Win32', 'Win64', 'Windows', + 'X86', 'X86_64' +}) + +local versions = token(l.KEYWORD, 'version') * l.space^0 * + token(l.OPERATOR, '(') * l.space^0 * versions_list + +local scopes = token(l.KEYWORD, 'scope') * l.space^0 * + token(l.OPERATOR, '(') * l.space^0 * scopes_list + +local traits = token(l.KEYWORD, '__traits') * l.space^0 * + token(l.OPERATOR, '(') * l.space^0 * traits_list + +local func = token(l.FUNCTION, l.word) * + #(l.space^0 * (P('!') * l.word^-1 * l.space^-1)^-1 * P('(')) + +M._rules = { + {'whitespace', ws}, + {'class', class_sequence}, + {'traits', traits}, + {'versions', versions}, + {'scopes', scopes}, + {'keyword', keyword}, + {'variable', properties}, + {'type', type}, + {'function', func}, + {'constant', constant}, + {'string', string}, + {'identifier', identifier}, + {'comment', comment}, + {'number', number}, + {'preproc', preproc}, + {'operator', operator}, + {'annotation', annotation}, +} + +M._tokenstyles = { + annotation = l.STYLE_PREPROCESSOR, + traits = l.STYLE_CLASS, + versions = l.STYLE_CONSTANT, + scopes = l.STYLE_CONSTANT +} + +M._foldsymbols = { + _patterns = {'[{}]', '/[*+]', '[*+]/', '//'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = { + ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1, + ['//'] = l.fold_line_comments('//') + } +} + +return M diff --git a/lua/lexers/dockerfile.lua b/lua/lexers/dockerfile.lua @@ -0,0 +1,55 @@ +-- Copyright 2016-2017 Alejandro Baez (https://keybase.io/baez). See LICENSE. +-- Dockerfile LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'dockerfile'} + +-- Whitespace +local indent = #l.starts_line(S(' \t')) * + (token(l.WHITESPACE, ' ') + token('indent_error', '\t'))^1 +local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", false, true) +local dq_str = l.delimited_range('"') +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'ADD', 'ARG', 'CMD', 'COPY', 'ENTRYPOINT', 'ENV', 'EXPOSE', 'FROM', 'LABEL', + 'MAINTAINER', 'ONBUILD', 'RUN', 'STOPSIGNAL', 'USER', 'VOLUME', 'WORKDIR' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Variable. +local variable = token(l.VARIABLE, + S('$')^1 * (S('{')^1 * l.word * S('}')^1 + l.word)) + +-- Operators. +local operator = token(l.OPERATOR, S('\\[],=:{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'variable', variable}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._FOLDBYINDENTATION = true + +return M diff --git a/lua/lexers/dot.lua b/lua/lexers/dot.lua @@ -0,0 +1,71 @@ +-- Copyright 2006-2017 Brian "Sir Alaran" Schott. See LICENSE. +-- Dot LPeg lexer. +-- Based off of lexer code by Mitchell. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'dot'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.digit^1 + l.float) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'graph', 'node', 'edge', 'digraph', 'fontsize', 'rankdir', + 'fontname', 'shape', 'label', 'arrowhead', 'arrowtail', 'arrowsize', + 'color', 'comment', 'constraint', 'decorate', 'dir', 'headlabel', 'headport', + 'headURL', 'labelangle', 'labeldistance', 'labelfloat', 'labelfontcolor', + 'labelfontname', 'labelfontsize', 'layer', 'lhead', 'ltail', 'minlen', + 'samehead', 'sametail', 'style', 'taillabel', 'tailport', 'tailURL', 'weight', + 'subgraph' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'box', 'polygon', 'ellipse', 'circle', 'point', 'egg', 'triangle', + 'plaintext', 'diamond', 'trapezium', 'parallelogram', 'house', 'pentagon', + 'hexagon', 'septagon', 'octagon', 'doublecircle', 'doubleoctagon', + 'tripleoctagon', 'invtriangle', 'invtrapezium', 'invhouse', 'Mdiamond', + 'Msquare', 'Mcircle', 'rect', 'rectangle', 'none', 'note', 'tab', 'folder', + 'box3d', 'record' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('->()[]{};')) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'number', number}, + {'string', string}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[{}]', '/%*', '%*/', '//'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/dsv.lua b/lua/lexers/dsv.lua @@ -0,0 +1,17 @@ +-- Copyright 2016 Christian Hesse +-- delimiter separated values LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'dsv'} + +-- Operators. +local operator = token(l.OPERATOR, S(',;:|')) + +M._rules = { + {'operator', operator} +} + +return M diff --git a/lua/lexers/eiffel.lua b/lua/lexers/eiffel.lua @@ -0,0 +1,69 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Eiffel LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'eiffel'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '--' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'alias', 'all', 'and', 'as', 'check', 'class', 'creation', 'debug', + 'deferred', 'do', 'else', 'elseif', 'end', 'ensure', 'expanded', 'export', + 'external', 'feature', 'from', 'frozen', 'if', 'implies', 'indexing', 'infix', + 'inherit', 'inspect', 'invariant', 'is', 'like', 'local', 'loop', 'not', + 'obsolete', 'old', 'once', 'or', 'prefix', 'redefine', 'rename', 'require', + 'rescue', 'retry', 'select', 'separate', 'then', 'undefine', 'until', + 'variant', 'when', 'xor', + 'current', 'false', 'precursor', 'result', 'strip', 'true', 'unique', 'void' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'character', 'string', 'bit', 'boolean', 'integer', 'real', 'none', 'any' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[a-z]+', '%-%-'}, + [l.KEYWORD] = { + check = 1, debug = 1, deferred = 1, ['do'] = 1, from = 1, ['if'] = 1, + inspect = 1, once = 1, class = function(text, pos, line, s) + return line:find('deferred%s+class') and 0 or 1 + end, ['end'] = -1 + }, + [l.COMMENT] = {['--'] = l.fold_line_comments('--')} +} + +return M diff --git a/lua/lexers/elixir.lua b/lua/lexers/elixir.lua @@ -0,0 +1,123 @@ +-- Copyright 2015-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Contributed by Richard Philips. +-- Elixer LPeg lexer. + +local l = require('lexer') +local token, style, color, word_match = l.token, l.style, l.color, l.word_match +local B, P, R, S = lpeg.B, lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'elixir'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0) + +-- Strings. +local dq_str = l.delimited_range('"', false) +local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 +local string = token(l.STRING, triple_dq_str + dq_str) + +-- Numbers +local dec = l.digit * (l.digit + P("_"))^0 +local bin = '0b' * S('01')^1 +local oct = '0o' * R('07')^1 +local integer = bin + l.hex_num + oct + dec +local float = l.digit^1 * P(".") * l.digit^1 * S("eE") * + (S('+-')^-1 * l.digit^1)^-1 +local number_token = B(1 - R('az', 'AZ', '__')) * + (S('+-')^-1) * token(l.NUMBER, (float + integer)) + +-- Keywords. +local keyword_token = token(l.KEYWORD, word_match{ + "is_atom", "is_binary", "is_bitstring", "is_boolean", "is_float", + "is_function", "is_integer", "is_list", "is_map", "is_number", "is_pid", + "is_port", "is_record", "is_reference", "is_tuple", "is_exception", "case", + "when", "cond", "for", "if", "unless", "try", "receive", "send", "exit", + "raise", "throw", "after", "rescue", "catch", "else", "do", "end", "quote", + "unquote", "super", "import", "require", "alias", "use", "self", "with", "fn" +}) + +-- Functions +local function_token = token(l.FUNCTION, word_match{ + "defstruct", "defrecordp", "defrecord", "defprotocol", "defp", + "defoverridable", "defmodule", "defmacrop", "defmacro", "defimpl", + "defexception", "defdelegate", "defcallback", "def" +}) + +-- Sigils +local sigil11 = P("~") * S("CRSW") * l.delimited_range('<>', false, true) +local sigil12 = P("~") * S("CRSW") * l.delimited_range('{}', false, true) +local sigil13 = P("~") * S("CRSW") * l.delimited_range('[]', false, true) +local sigil14 = P("~") * S("CRSW") * l.delimited_range('()', false, true) +local sigil15 = P("~") * S("CRSW") * l.delimited_range('|', false, true) +local sigil16 = P("~") * S("CRSW") * l.delimited_range('/', false, true) +local sigil17 = P("~") * S("CRSW") * l.delimited_range('"', false, true) +local sigil18 = P("~") * S("CRSW") * l.delimited_range("'", false, true) +local sigil19 = P("~") * S("CRSW") * '"""' * (l.any - '"""')^0 * P('"""')^-1 +local sigil10 = P("~") * S("CRSW") * "'''" * (l.any - "'''")^0 * P("'''")^-1 +local sigil21 = P("~") * S("crsw") * l.delimited_range('<>', false, false) +local sigil22 = P("~") * S("crsw") * l.delimited_range('{}', false, false) +local sigil23 = P("~") * S("crsw") * l.delimited_range('[]', false, false) +local sigil24 = P("~") * S("crsw") * l.delimited_range('()', false, false) +local sigil25 = P("~") * S("crsw") * l.delimited_range('|', false, false) +local sigil26 = P("~") * S("crsw") * l.delimited_range('/', false, false) +local sigil27 = P("~") * S("crsw") * l.delimited_range('"', false, false) +local sigil28 = P("~") * S("crsw") * l.delimited_range("'", false, false) +local sigil29 = P("~") * S("csrw") * '"""' * (l.any - '"""')^0 * P('"""')^-1 +local sigil20 = P("~") * S("csrw") * "'''" * (l.any - "'''")^0 * P("'''")^-1 +local sigil_token = token(l.REGEX, sigil10 + sigil19 + sigil11 + sigil12 + + sigil13 + sigil14 + sigil15 + sigil16 + + sigil17 + sigil18 + sigil20 + sigil29 + + sigil21 + sigil22 + sigil23 + sigil24 + + sigil25 + sigil26 + sigil27 + sigil28) +local sigiladdon_token = token(l.EMBEDDED, R('az', 'AZ')^0) + +-- Attributes +local attribute_token = token(l.LABEL, B(1 - R('az', 'AZ', '__')) * P('@') * + R('az','AZ') * R('az','AZ','09','__')^0) + +-- Booleans +local boolean_token = token(l.NUMBER, + P(':')^-1 * word_match{"true", "false", "nil"}) + +-- Identifiers +local identifier = token(l.IDENTIFIER, R('az', '__') * + R('az', 'AZ', '__', '09')^0 * S('?!')^-1) + +-- Atoms +local atom1 = B(1 - P(':')) * P(':') * dq_str +local atom2 = B(1 - P(':')) * P(':') * R('az', 'AZ') * + R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 +local atom3 = B(1 - R('az', 'AZ', '__', '09', '::')) * + R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 +local atom_token = token(l.CONSTANT, atom1 + atom2 + atom3) + +-- Operators +local operator1 = word_match{"and", "or", "not", "when", "xor", "in"} +local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' + + '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' + + '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' + + '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' + + '/' + '~~~' + '@' +local operator_token = token(l.OPERATOR, operator1 + operator2) + +M._rules = { + {'whitespace', ws}, + {'sigil', sigil_token * sigiladdon_token}, + {'atom', atom_token}, + {'string', string}, + {'comment', comment}, + {'attribute', attribute_token}, + {'boolean', boolean_token}, + {'function', function_token}, + {'keyword', keyword_token}, + {'operator', operator_token}, + {'identifier', identifier}, + {'number', number_token}, +} + +M._FOLDBYINDENTATION = true + +return M diff --git a/lua/lexers/elm.lua b/lua/lexers/elm.lua @@ -0,0 +1,64 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Elm LPeg lexer +-- Modified by Alex Suraci. +-- Adapted from Haskell LPeg lexer by Karl Schultheisz. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'elm'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '--' * l.nonnewline_esc^0 +local block_comment = '{-' * (l.any - '-}')^0 * P('-}')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"')) + +-- Chars. +local char = token(l.STRING, l.delimited_range("'", true)) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'if', 'then', 'else', + 'case', 'of', + 'let', 'in', + 'module', 'import', 'as', 'exposing', + 'type', 'alias', + 'port', +}) + +-- Identifiers. +local word = (l.alnum + S("._'#"))^0 +local identifier = token(l.IDENTIFIER, (l.alpha + '_') * word) + +-- Operators. +local op = l.punct - S('()[]{}') +local operator = token(l.OPERATOR, op) + +-- Types & type constructors. +local constructor = token(l.TYPE, (l.upper * word) + (P(":") * (op^1 - P(":")))) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', constructor}, + {'identifier', identifier}, + {'string', string}, + {'char', char}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._FOLDBYINDENTATION = true + +return M diff --git a/lua/lexers/erlang.lua b/lua/lexers/erlang.lua @@ -0,0 +1,81 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Erlang LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'erlang'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '%' * l.nonnewline^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"')) + +-- Numbers. +local const_char = '$' * (('\\' * l.ascii) + l.any) +local number = token(l.NUMBER, const_char + l.float + l.integer) + +-- Atoms. +local atom_pat = (l.lower * (l.alnum + '_')^0) + l.delimited_range("'") +local atom = token(l.LABEL, atom_pat) + +-- Functions. +local func = token(l.FUNCTION, atom_pat * #l.delimited_range("()", false, false, true)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', + 'query', 'receive', 'when' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, ((l.upper + '_') * (l.alnum + '_')^0)) + +-- Operators. +local named_operator = word_match{ + 'div', 'rem', 'or', 'xor', 'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not', + 'bnot' +} +local operator = token(l.OPERATOR, S('-<>.;=/|#+*:,?!()[]{}') + named_operator) + +-- Directives. +local directive = token('directive', '-' * word_match{ + 'author', 'compile', 'copyright', 'define', 'doc', 'else', 'endif', 'export', + 'file', 'ifdef', 'ifndef', 'import', 'include_lib', 'include', 'module', + 'record', 'undef' +}) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'function', func}, + {'operator', operator}, + {'atom', atom}, + {'identifier', identifier}, + {'directive', directive}, + {'string', string}, + {'comment', comment}, + {'number', number} +} + +M._tokenstyles = { + directive = l.STYLE_PREPROCESSOR +} + +M._foldsymbols = { + _patterns = {'[a-z]+', '[%(%)%[%]{}]', '%%'}, + [l.KEYWORD] = { + case = 1, fun = 1, ['if'] = 1, query = 1, receive = 1, ['end'] = -1 + }, + [l.OPERATOR] = { + ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1 + }, + [l.COMMENT] = {['%'] = l.fold_line_comments('%')} +} + +return M diff --git a/lua/lexers/fantom.lua b/lua/lexers/fantom.lua @@ -0,0 +1,105 @@ +-- Fantom LPeg lexer. +-- Based on Java LPeg lexer by Mitchell mitchell.att.foicica.com and Vim's Fantom syntax. +-- By MarSoft. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'fantom'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^2) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local doc_comment = '**' * l.nonnewline_esc^0 +local comment = token(l.COMMENT, line_comment + block_comment + doc_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'using', 'native', -- external + 'goto', 'void', 'serializable', 'volatile', -- error + 'if', 'else', 'switch', -- conditional + 'do', 'while', 'for', 'foreach', 'each', -- repeat + 'true', 'false', -- boolean + 'null', -- constant + 'this', 'super', -- typedef + 'new', 'is', 'isnot', 'as', -- operator + 'plus', 'minus', 'mult', 'div', 'mod', 'get', 'set', 'slice', 'lshift', 'rshift', 'and', 'or', 'xor', 'inverse', 'negate', 'increment', 'decrement', 'equals', 'compare', -- long operator + 'return', -- stmt + 'static', 'const', 'final', -- storage class + 'virtual', 'override', 'once', -- slot + 'readonly', -- field + 'throw', 'try', 'catch', 'finally', -- exceptions + 'assert', -- assert + 'class', 'enum', 'mixin', -- typedef + 'break', 'continue', -- branch + 'default', 'case', -- labels + 'public', 'internal', 'protected', 'private', 'abstract', -- scope decl +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'Void', 'Bool', 'Int', 'Float', 'Decimal', + 'Str', 'Duration', 'Uri', 'Type', 'Range', + 'List', 'Map', 'Obj', + 'Err', 'Env', +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#')) + +-- Annotations. +local facet = token('facet', '@' * l.word) + +-- Functions. +local func = token(l.FUNCTION, l.word) * #P('(') + +-- Classes. +local class_sequence = token(l.KEYWORD, P('class')) * ws^1 * + token(l.TYPE, l.word) * ( -- at most one inheritance spec + ws^1 * token(l.OPERATOR, P(':')) * ws^1 * + token(l.TYPE, l.word) * + ( -- at least 0 (i.e. any number) of additional classes + ws^0 * token(l.OPERATOR, P(',')) * ws^0 * token(l.TYPE, l.word) + )^0 + )^-1 + +M._rules = { + {'whitespace', ws}, + {'class', class_sequence}, + {'keyword', keyword}, + {'type', type}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'facet', facet}, + {'operator', operator}, +} + +M._tokenstyles = { + facet = l.STYLE_PREPROCESSOR +} + +M._foldsymbols = { + _patterns = {'[{}]', '/%*', '%*/', '//'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/faust.lua b/lua/lexers/faust.lua @@ -0,0 +1,58 @@ +-- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE. +-- Faust LPeg lexer, see http://faust.grame.fr/ + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'faust'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"', true)) + +-- Numbers. +local int = R('09')^1 +local rad = P('.') +local exp = (P('e') * S('+-')^-1 * int)^-1 +local flt = int * (rad * int)^-1 * exp + int^-1 * rad * int * exp +local number = token(l.NUMBER, flt + int) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'declare', 'import', 'mdoctags', 'dependencies', 'distributed', 'inputs', + 'outputs', 'par', 'seq', 'sum', 'prod', 'xor', 'with', 'environment', + 'library', 'component', 'ffunction', 'fvariable', 'fconstant', 'int', 'float', + 'case', 'waveform', 'h:', 'v:', 't:' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local punct = S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'') +local operator = token(l.OPERATOR, punct) + +-- Pragmas. +local mdoc = P('<mdoc>') * (l.any - P('</mdoc>'))^0 * P('</mdoc>') +local pragma = token(l.PREPROCESSOR, mdoc) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'pragma', pragma}, + {'keyword', keyword}, + {'number', number}, + {'operator', operator}, + {'identifier', identifier}, + {'string', string}, +} + +return M diff --git a/lua/lexers/fennel.lua b/lua/lexers/fennel.lua @@ -0,0 +1,88 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Lua LPeg lexer. +-- Original written by Peter Odding, 2007/04/04. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'fennel'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = ';' * l.nonnewline^0 +local comment = token(l.COMMENT, line_comment) + +-- Strings. +local dq_str = l.delimited_range('"') +local string = token(l.STRING, dq_str) + +-- Numbers. +local lua_integer = P('-')^-1 * (l.hex_num + l.dec_num) +local number = token(l.NUMBER, l.float + lua_integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + '%', '*', '+', '-', '->', '->>', '-?>', '-?>>', '.', '..', '/', '//', ':', '<', '<=', '=', '>', '>=', '^', '~=', 'λ', + 'and', 'comment', 'do', 'doc', 'doto', 'each', 'eval-compiler', 'fn', 'for', 'global', 'hashfn', 'if', 'include', 'lambda', + 'length', 'let', 'local', 'lua', 'macro', 'macros', 'match', 'not', 'not=', 'or', 'partial', 'quote', 'require-macros', + 'set', 'set-forcibly!', 'tset', 'values', 'var', 'when', 'while' +}, "%*+-./:<=>?~^λ!")) + +-- Libraries. +local library = token('library', word_match({ + -- Coroutine. + 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running', + 'coroutine.status', 'coroutine.wrap', 'coroutine.yield', + -- Module. + 'package', 'package.cpath', 'package.loaded', 'package.loadlib', + 'package.path', 'package.preload', + -- String. + 'string', 'string.byte', 'string.char', 'string.dump', 'string.find', + 'string.format', 'string.gmatch', 'string.gsub', 'string.len', 'string.lower', + 'string.match', 'string.rep', 'string.reverse', 'string.sub', 'string.upper', + -- Table. + 'table', 'table.concat', 'table.insert', 'table.remove', 'table.sort', + -- Math. + 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil', + 'math.cos', 'math.deg', 'math.exp', 'math.floor', 'math.fmod', 'math.huge', + 'math.log', 'math.max', 'math.min', 'math.modf', 'math.pi', 'math.rad', + 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt', 'math.tan', + -- IO. + 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output', + 'io.popen', 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile', + 'io.type', 'io.write', + -- OS. + 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit', + 'os.getenv', 'os.remove', 'os.rename', 'os.setlocale', 'os.time', + 'os.tmpname', + -- Debug. + 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal', + 'debug.getmetatable', 'debug.getregistry', 'debug.getupvalue', + 'debug.sethook', 'debug.setlocal', 'debug.setmetatable', 'debug.setupvalue', + 'debug.traceback', +}, '.')) + +local initial = l.alpha + S"|$%&#*+-./:<=>?~^_λ!" +local subsequent = initial + l.digit + +-- Identifiers. +local identifier = token(l.IDENTIFIER, initial * subsequent^0) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'library', library}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number} +} + +M._tokenstyles = { + library = l.STYLE_TYPE, +} + +return M diff --git a/lua/lexers/fish.lua b/lua/lexers/fish.lua @@ -0,0 +1,76 @@ +-- Copyright 2015-2017 Jason Schindler. See LICENSE. +-- Fish (http://fishshell.com/) script LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'fish'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- shebang +local shebang = token('shebang', '#!/' * l.nonnewline^0) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", false, true) +local dq_str = l.delimited_range('"') + +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'alias', 'and', 'begin', 'bg', 'bind', 'block', 'break', 'breakpoint', + 'builtin', 'case', 'cd', 'command', 'commandline', 'complete', 'contains', + 'continue', 'count', 'dirh', 'dirs', 'echo', 'else', 'emit', 'end', 'eval', + 'exec', 'exit', 'fg', 'fish', 'fish_config', 'fish_indent', 'fish_pager', + 'fish_prompt', 'fish_right_prompt', 'fish_update_completions', 'fishd', 'for', + 'funced', 'funcsave', 'function', 'functions', 'help', 'history', 'if', 'in', + 'isatty', 'jobs', 'math', 'mimedb', 'nextd', 'not', 'open', 'or', 'popd', + 'prevd', 'psub', 'pushd', 'pwd', 'random', 'read', 'return', 'set', + 'set_color', 'source', 'status', 'switch', 'test', 'trap', 'type', 'ulimit', + 'umask', 'vared', 'while' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Variables. +local variable = token(l.VARIABLE, + '$' * l.word + '$' * l.delimited_range('{}', true, true)) + +-- Operators. +local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'shebang', shebang}, + {'keyword', keyword}, + {'identifier', identifier}, + {'variable', variable}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._tokenstyles = { + shebang = l.STYLE_LABEL +} + +M._foldsymbols = { + _patterns = {'%l+'}, + [l.KEYWORD] = { + begin = 1, ['for'] = 1, ['function'] = 1, ['if'] = 1, switch = 1, + ['while'] = 1, ['end'] = -1 + } +} + +return M diff --git a/lua/lexers/forth.lua b/lua/lexers/forth.lua @@ -0,0 +1,71 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Forth LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'forth'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = S('|\\') * l.nonnewline^0 +local block_comment = '(' * (l.any - ')')^0 * P(')')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local c_str = 'c' * l.delimited_range('"', true, true) +local s_str = 's' * l.delimited_range('"', true, true) +local s_bs_str = 's\\' * l.delimited_range('"', true, false) +local dot_str = '.' * l.delimited_range('"', true, true) +local dot_paren_str = '.' * l.delimited_range('()', true, true, false) +local abort_str = 'abort' * l.delimited_range('"', true, true) +local string = token( + l.STRING, + c_str + s_str + s_bs_str + dot_str + dot_paren_str + abort_str +) + +-- Numbers. +local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + '#>', '#s', '*/', '*/mod', '+loop', ',', '.', '.r', '/mod', '0<', '0<>', + '0>', '0=', '1+', '1-', '2!', '2*', '2/', '2>r', '2@', '2drop', '2dup', + '2over', '2r>', '2r@', '2swap', ':noname', '<#', '<>', '>body', '>in', + '>number', '>r', '?do','?dup', '@', 'abort', 'abs', 'accept', 'action-of', + 'again', 'align', 'aligned', 'allot', 'and', 'base', 'begin', 'bl', + 'buffer:', 'c!', 'c,', 'c@', 'case', 'cell+', 'cells', 'char', 'char+', + 'chars', 'compile,', 'constant', 'count', 'cr', 'create', 'decimal', 'defer', + 'defer!', 'defer@', 'depth', 'do', 'does>', 'drop', 'dup', 'else', 'emit', + 'endcase', 'endof', 'environment?', 'erase', 'evaluate', 'execute', 'exit', + 'false', 'fill', 'find', 'fm/mod', 'here', 'hex', 'hold', 'holds', 'i', 'if', + 'immediate', 'invert', 'is', 'j', 'key', 'leave', 'literal', 'loop', + 'lshift', 'm*', 'marker', 'max', 'min', 'mod', 'move', 'negate', 'nip', 'of', + 'or', 'over', 'pad', 'parse', 'parse-name', 'pick', 'postpone', 'quit', 'r>', + 'r@', 'recurse', 'refill', 'restore-input', 'roll', 'rot', 'rshift', 's>d', + 'save-input', 'sign', 'sm/rem', 'source', 'source-id', 'space', 'spaces', + 'state', 'swap', 'to', 'then', 'true', 'tuck', 'type', 'u.', 'u.r', 'u>', + 'u<', 'um*', 'um/mod', 'unloop', 'until', 'unused', 'value', 'variable', + 'while', 'within', 'word', 'xor', '[\']', '[char]', '[compile]' +}, '><-@!?+,=[].\'', true)) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$#'))^1) + +-- Operators. +local operator = token(l.OPERATOR, S(':;<>+*-/[]#')) + +M._rules = { + {'whitespace', ws}, + {'string', string}, + {'keyword', keyword}, + {'identifier', identifier}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/fortran.lua b/lua/lexers/fortran.lua @@ -0,0 +1,91 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Fortran LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'fortran'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local c_comment = l.starts_line(S('Cc')) * l.nonnewline^0 +local d_comment = l.starts_line(S('Dd')) * l.nonnewline^0 +local ex_comment = l.starts_line('!') * l.nonnewline^0 +local ast_comment = l.starts_line('*') * l.nonnewline^0 +local line_comment = '!' * l.nonnewline^0 +local comment = token(l.COMMENT, c_comment + d_comment + ex_comment + + ast_comment + line_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true, true) +local dq_str = l.delimited_range('"', true, true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer) * -l.alpha) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'include', 'program', 'module', 'subroutine', 'function', 'contains', 'use', + 'call', 'return', + -- Statements. + 'case', 'select', 'default', 'continue', 'cycle', 'do', 'while', 'else', 'if', + 'elseif', 'then', 'elsewhere', 'end', 'endif', 'enddo', 'forall', 'where', + 'exit', 'goto', 'pause', 'stop', + -- Operators. + '.not.', '.and.', '.or.', '.xor.', '.eqv.', '.neqv.', '.eq.', '.ne.', '.gt.', + '.ge.', '.lt.', '.le.', + -- Logical. + '.false.', '.true.' +}, '.', true)) + +-- Functions. +local func = token(l.FUNCTION, word_match({ + -- I/O. + 'backspace', 'close', 'endfile', 'inquire', 'open', 'print', 'read', 'rewind', + 'write', 'format', + -- Type conversion, utility, and math. + 'aimag', 'aint', 'amax0', 'amin0', 'anint', 'ceiling', 'cmplx', 'conjg', + 'dble', 'dcmplx', 'dfloat', 'dim', 'dprod', 'float', 'floor', 'ifix', 'imag', + 'int', 'logical', 'modulo', 'nint', 'real', 'sign', 'sngl', 'transfer', + 'zext', 'abs', 'acos', 'aimag', 'aint', 'alog', 'alog10', 'amax0', 'amax1', + 'amin0', 'amin1', 'amod', 'anint', 'asin', 'atan', 'atan2', 'cabs', 'ccos', + 'char', 'clog', 'cmplx', 'conjg', 'cos', 'cosh', 'csin', 'csqrt', 'dabs', + 'dacos', 'dasin', 'datan', 'datan2', 'dble', 'dcos', 'dcosh', 'ddim', 'dexp', + 'dim', 'dint', 'dlog', 'dlog10', 'dmax1', 'dmin1', 'dmod', 'dnint', 'dprod', + 'dreal', 'dsign', 'dsin', 'dsinh', 'dsqrt', 'dtan', 'dtanh', 'exp', 'float', + 'iabs', 'ichar', 'idim', 'idint', 'idnint', 'ifix', 'index', 'int', 'isign', + 'len', 'lge', 'lgt', 'lle', 'llt', 'log', 'log10', 'max', 'max0', 'max1', + 'min', 'min0', 'min1', 'mod', 'nint', 'real', 'sign', 'sin', 'sinh', 'sngl', + 'sqrt', 'tan', 'tanh' +}, nil, true)) + +-- Types. +local type = token(l.TYPE, word_match({ + 'implicit', 'explicit', 'none', 'data', 'parameter', 'allocate', + 'allocatable', 'allocated', 'deallocate', 'integer', 'real', 'double', + 'precision', 'complex', 'logical', 'character', 'dimension', 'kind', +}, nil, true)) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.alnum^1) + +-- Operators. +local operator = token(l.OPERATOR, S('<>=&+-/*,()')) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'keyword', keyword}, + {'function', func}, + {'type', type}, + {'number', number}, + {'identifier', identifier}, + {'string', string}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/fsharp.lua b/lua/lexers/fsharp.lua @@ -0,0 +1,76 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- F# LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'fsharp'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = P('//') * l.nonnewline^0 +local block_comment = l.nested_pair('(*', '*)') +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer * S('uUlL')^-1)) + +-- Preprocessor. +local preproc_word = word_match{ + 'ifndef', 'ifdef', 'if', 'else', 'endif', 'light', 'region', 'endregion' +} +local preproc = token(l.PREPROCESSOR, + l.starts_line('#') * S('\t ')^0 * preproc_word * + (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abstract', 'and', 'as', 'assert', 'asr', 'begin', 'class', 'default', + 'delegate', 'do', 'done', 'downcast', 'downto', 'else', 'end', 'enum', + 'exception', 'false', 'finaly', 'for', 'fun', 'function', 'if', 'in', + 'iherit', 'interface', 'land', 'lazy', 'let', 'lor', 'lsl', 'lsr', 'lxor', + 'match', 'member', 'mod', 'module', 'mutable', 'namespace', 'new', 'null', + 'of', 'open', 'or', 'override', 'sig', 'static', 'struct', 'then', 'to', + 'true', 'try', 'type', 'val', 'when', 'inline', 'upcast', 'while', 'with', + 'async', 'atomic', 'break', 'checked', 'component', 'const', 'constructor', + 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include', + 'method', 'mixin', 'process', 'property', 'protected', 'public', 'pure', + 'readonly', 'return', 'sealed', 'switch', 'virtual', 'void', 'volatile', + 'where', + -- Booleans. + 'true', 'false' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'bool', 'byte', 'sbyte', 'int16', 'uint16', 'int', 'uint32', 'int64', + 'uint64', 'nativeint', 'unativeint', 'char', 'string', 'decimal', 'unit', + 'void', 'float32', 'single', 'float', 'double' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('=<>+-*/^.,:;~!@#%^&|?[](){}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/fstab.lua b/lua/lexers/fstab.lua @@ -0,0 +1,569 @@ +-- Copyright 2016 Christian Hesse +-- fstab LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'fstab'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, l.starts_line('#') * l.nonnewline^0) + +-- Numbers. +local dec = l.digit^1 * ('_' * l.digit^1)^0 +local oct_num = '0' * S('01234567_')^1 +local integer = S('+-')^-1 * (l.hex_num + oct_num + dec) +local number = token(l.NUMBER, (l.float + integer)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + -- basic filesystem-independent mount options + 'async', + 'atime', + 'auto', + 'comment', + 'context', + 'defaults', + 'defcontext', + 'dev', + 'dirsync', + 'exec', + 'fscontext', + 'group', + 'iversion', + 'lazytime', + 'loud', + 'mand', + '_netdev', + 'noatime', + 'noauto', + 'nodev', + 'nodiratime', + 'noexec', + 'nofail', + 'noiversion', + 'nolazytime', + 'nomand', + 'norelatime', + 'nostrictatime', + 'nosuid', + 'nouser', + 'owner', + 'relatime', + 'remount', + 'ro', + 'rootcontext', + 'rw', + 'silent', + 'strictatime', + 'suid', + 'sync', + 'user', + 'users', + + -- mount options for systemd, see systemd.mount(5) + 'x-systemd.automount', + 'x-systemd.device-timeout', + 'x-systemd.idle-timeout', + 'x-systemd.mount-timeout', + 'x-systemd.requires', + 'x-systemd.requires-mounts-for', + 'x-initrd.mount', + + -- mount options for adfs + 'uid', + 'gid', + 'ownmask', + 'othmask', + + -- mount options for affs + 'uid', + 'gid', + 'setuid', + 'setgid', + 'mode', + 'protect', + 'usemp', + 'verbose', + 'prefix', + 'volume', + 'reserved', + 'root', + 'bs', + 'grpquota', + 'noquota', + 'quota', + 'usrquota', + + -- mount options for btrfs + 'alloc_start', + 'autodefrag', + 'check_int', + 'check_int_data', + 'check_int_print_mask', + 'commit', + 'compress', + 'zlib', + 'lzo', + 'no', + 'compress-force', + 'degraded', + 'device', + 'discard', + 'enospc_debug', + 'fatal_errors', + 'bug', + 'panic', + 'flushoncommit', + 'inode_cache', + 'max_inline', + 'metadata_ratio', + 'noacl', + 'nobarrier', + 'nodatacow', + 'nodatasum', + 'notreelog', + 'recovery', + 'rescan_uuid_tree', + 'skip_balance', + 'nospace_cache', + 'clear_cache', + 'ssd', + 'nossd', + 'ssd_spread', + 'subvol', + 'subvolid', + 'subvolrootid', + 'thread_pool', + 'user_subvol_rm_allowed', + + -- mount options for devpts + 'uid', + 'gid', + 'mode', + 'newinstance', + 'ptmxmode', + + -- mount options for ext2 + 'acl', + 'noacl', + 'bsddf', + 'minixdf', + 'check', + 'nocheck', + 'debug', + 'errors', + 'continue', + 'remount-ro', + 'panic', + 'grpid', + 'bsdgroups', + 'nogrpid', + 'sysvgroups', + 'grpquota', + 'noquota', + 'quota', + 'usrquota', + 'nouid32', + 'oldalloc', + 'orlov', + 'resgid', + 'resuid', + 'sb', + 'user_xattr', + 'nouser_xattr', + + -- mount options for ext3 + 'journal', + 'update', + 'journal_dev', + 'journal_path', + 'norecoverynoload', + 'data', + 'journal', + 'ordered', + 'writeback', + 'data_err', + 'ignore', + 'abort', + 'barrier', + 'commit', + 'user_xattr', + 'acl', + 'usrjquota', + 'grpjquota', + 'jqfmt', + + -- mount options for ext4 + 'journal_checksum', + 'journal_async_commit', + 'barrier', + 'nobarrier', + 'inode_readahead_blks', + 'stripe', + 'delalloc', + 'nodelalloc', + 'max_batch_time', + 'min_batch_time', + 'journal_ioprio', + 'abort', + 'auto_da_alloc', + 'noauto_da_alloc', + 'noinit_itable', + 'init_itable', + 'discard', + 'nodiscard', + 'nouid32', + 'block_validity', + 'noblock_validity', + 'dioread_lock', + 'dioread_nolock', + 'max_dir_size_kb', + 'i_version', + + -- mount options for fat (common part of msdos, umsdos and vfat) + 'blocksize', + 'uid', + 'gid', + 'umask', + 'dmask', + 'fmask', + 'allow_utime', + 'check', + 'relaxed', + 'normal', + 'strict', + 'codepage', + 'conv', + 'binary', + 'text', + 'auto', + 'cvf_format', + 'cvf_option', + 'debug', + 'discard', + 'dos1xfloppy', + 'errors', + 'panic', + 'continue', + 'remount-ro', + 'fat', + 'iocharset', + 'nfs', + 'stale_rw', + 'nostale_ro', + 'tz', + 'time_offset', + 'quiet', + 'rodir', + 'showexec', + 'sys_immutable', + 'flush', + 'usefree', + 'dots', + 'nodots', + 'dotsOK', + + -- mount options for hfs + 'creator', + 'type', + 'uid', + 'gid', + 'dir_umask', + 'file_umask', + 'umask', + 'session', + 'part', + 'quiet', + + -- mount options for hpfs + 'uid', + 'gid', + 'umask', + 'case', + 'lower', + 'asis', + 'conv', + 'binary', + 'text', + 'auto', + 'nocheck', + + -- mount options for iso9660 + 'norock', + 'nojoliet', + 'check', + 'relaxed', + 'strict', + 'uid', + 'gid', + 'map', + 'normal', + 'offacorn', + 'mode', + 'unhide', + 'block', + 'conv', + 'auto', + 'binary', + 'mtext', + 'text', + 'cruft', + 'session', + 'sbsector', + 'iocharset', + 'utf8', + + -- mount options for jfs + 'iocharset', + 'resize', + 'nointegrity', + 'integrity', + 'errors', + 'continue', + 'remount-ro', + 'panic', + 'noquota', + 'quota', + 'usrquota', + 'grpquota', + + -- mount options for ntfs + 'iocharset', + 'nls', + 'utf8', + 'uni_xlate', + 'posix', + 'uid', + 'gid', + 'umask', + + -- mount options for overlay + 'lowerdir', + 'upperdir', + 'workdir', + + -- mount options for reiserfs + 'conv', + 'hash', + 'rupasov', + 'tea', + 'r5', + 'detect', + 'hashed_relocation', + 'no_unhashed_relocation', + 'noborder', + 'nolog', + 'notail', + 'replayonly', + 'resize', + 'user_xattr', + 'acl', + 'barrier', + 'none', + 'flush', + + -- mount options for tmpfs + 'size', + 'nr_blocks', + 'nr_inodes', + 'mode', + 'uid', + 'gid', + 'mpol', + 'default', + 'prefer', + 'bind', + 'interleave', + + -- mount options for ubifs + 'bulk_read', + 'no_bulk_read', + 'chk_data_crc', + 'no_chk_data_crc.', + 'compr', + 'none', + 'lzo', + 'zlib', + + -- mount options for udf + 'gid', + 'umask', + 'uid', + 'unhide', + 'undelete', + 'nostrict', + 'iocharset', + 'bs', + 'novrs', + 'session', + 'anchor', + 'volume', + 'partition', + 'lastblock', + 'fileset', + 'rootdir', + + -- mount options for ufs + 'ufstype', + 'old', + '44bsd', + 'ufs2', + '5xbsd', + 'sun', + 'sunx86', + 'hp', + 'nextstep', + 'nextstep-cd', + 'openstep', + 'onerror', + 'lock', + 'umount', + 'repair', + + -- mount options for vfat + 'uni_xlate', + 'posix', + 'nonumtail', + 'utf8', + 'shortname', + 'lower', + 'win95', + 'winnt', + 'mixed', + + -- mount options for usbfs + 'devuid', + 'devgid', + 'devmode', + 'busuid', + 'busgid', + 'busmode', + 'listuid', + 'listgid', + 'listmode', + + -- filesystems + 'adfs', + 'ados', + 'affs', + 'anon_inodefs', + 'atfs', + 'audiofs', + 'auto', + 'autofs', + 'bdev', + 'befs', + 'bfs', + 'btrfs', + 'binfmt_misc', + 'cd9660', + 'cfs', + 'cgroup', + 'cifs', + 'coda', + 'configfs', + 'cpuset', + 'cramfs', + 'devfs', + 'devpts', + 'devtmpfs', + 'e2compr', + 'efs', + 'ext2', + 'ext2fs', + 'ext3', + 'ext4', + 'fdesc', + 'ffs', + 'filecore', + 'fuse', + 'fuseblk', + 'fusectl', + 'hfs', + 'hpfs', + 'hugetlbfs', + 'iso9660', + 'jffs', + 'jffs2', + 'jfs', + 'kernfs', + 'lfs', + 'linprocfs', + 'mfs', + 'minix', + 'mqueue', + 'msdos', + 'ncpfs', + 'nfs', + 'nfsd', + 'nilfs2', + 'none', + 'ntfs', + 'null', + 'nwfs', + 'overlay', + 'ovlfs', + 'pipefs', + 'portal', + 'proc', + 'procfs', + 'pstore', + 'ptyfs', + 'qnx4', + 'reiserfs', + 'ramfs', + 'romfs', + 'securityfs', + 'shm', + 'smbfs', + 'squashfs', + 'sockfs', + 'sshfs', + 'std', + 'subfs', + 'swap', + 'sysfs', + 'sysv', + 'tcfs', + 'tmpfs', + 'udf', + 'ufs', + 'umap', + 'umsdos', + 'union', + 'usbfs', + 'userfs', + 'vfat', + 'vs3fs', + 'vxfs', + 'wrapfs', + 'wvfs', + 'xenfs', + 'xfs', + 'zisofs', +}, '.-')) + +-- Identifiers. +local word = (l.alpha + '_') * (l.alnum + S('_.'))^0 +local identifier = token(l.IDENTIFIER, word) + +-- Operators. +local operator = token(l.OPERATOR, S('=,')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'identifier', identifier}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._LEXBYLINE = true + +return M diff --git a/lua/lexers/gap.lua b/lua/lexers/gap.lua @@ -0,0 +1,56 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Gap LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'gap'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.digit^1 * -l.alpha) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'and', 'break', 'continue', 'do', 'elif', 'else', 'end', 'fail', 'false', + 'fi', 'for', 'function', 'if', 'in', 'infinity', 'local', 'not', 'od', 'or', + 'rec', 'repeat', 'return', 'then', 'true', 'until', 'while' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('*+-,./:;<=>~^#()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[a-z]+', '#'}, + [l.KEYWORD] = { + ['function'] = 1, ['end'] = -1, ['do'] = 1, od = -1, ['if'] = 1, fi = -1, + ['repeat'] = 1, ['until'] = -1 + }, + [l.COMMENT] = {['#'] = l.fold_line_comments('#')} +} + +return M diff --git a/lua/lexers/gemini.lua b/lua/lexers/gemini.lua @@ -0,0 +1,48 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Markdown LPeg lexer. +-- Copyright 2020 Haelwenn (lanodan) Monnier <contact+gemini.lua@hacktivis.me> +-- Gemini / Gemtext LPeg lexer. +-- See https://gemini.circumlunar.space/docs/specification.html + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'gemini'} + +local ws = token(l.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1) + +local link = token('link', l.starts_line('=>') * l.nonnewline^0) + +-- Should only match ``` at start of line +local pre = token('pre', l.delimited_range('```', false, true)) + +local header = token('h3', l.starts_line('###') * l.nonnewline^0) + + token('h2', l.starts_line('##') * l.nonnewline^0) + + token('h1', l.starts_line('#') * l.nonnewline^0) + +local list = token('list', l.starts_line('*') * l.nonnewline^0) + +local blockquote = token(l.STRING, l.starts_line('>') * l.nonnewline^0) + +M._rules = { + {'header', header}, + {'list', list}, + {'blockquote', blockquote}, + {'pre', pre}, + {'whitespace', ws}, + {'link', link} +} + +local font_size = 10 +local hstyle = 'fore:red' +M._tokenstyles = { + h3 = hstyle..',size:'..(font_size + 3), + h2 = hstyle..',size:'..(font_size + 4), + h1 = hstyle..',size:'..(font_size + 5), + pre = l.STYLE_EMBEDDED..',eolfilled', + link = 'underlined', + list = l.STYLE_CONSTANT +} + +return M diff --git a/lua/lexers/gettext.lua b/lua/lexers/gettext.lua @@ -0,0 +1,39 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Gettext LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'gettext'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * S(': .~') * l.nonnewline^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"', true)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + 'msgid', 'msgid_plural', 'msgstr', 'fuzzy', 'c-format', 'no-c-format' +}, '-', true)) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Variables. +local variable = token(l.VARIABLE, S('%$@') * l.word) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'string', string}, + {'keyword', keyword}, + {'identifier', identifier}, + {'variable', variable}, +} + +return M diff --git a/lua/lexers/gherkin.lua b/lua/lexers/gherkin.lua @@ -0,0 +1,64 @@ +-- Copyright 2015-2017 Jason Schindler. See LICENSE. +-- Gherkin (https://github.com/cucumber/cucumber/wiki/Gherkin) LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'gherkin'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Tags. +local tag = token('tag', '@' * l.word^0) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local doc_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 +local dq_str = l.delimited_range('"') + +local string = token(l.STRING, doc_str + dq_str) + +-- Placeholders. +local placeholder = token('placeholder', l.nested_pair('<', '>')) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'Given', 'When', 'Then', 'And', 'But' +}) + +-- Identifiers. +local identifier = token(l.KEYWORD, P('Scenario Outline') + word_match{ + 'Feature', 'Background', 'Scenario', 'Scenarios', 'Examples' +}) + +-- Examples. +local example = token('example', '|' * l.nonnewline^0) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'tag', tag}, + {'placeholder', placeholder}, + {'keyword', keyword}, + {'identifier', identifier}, + {'example', example}, + {'string', string}, + {'number', number} +} + +M._tokenstyles = { + tag = l.STYLE_LABEL, + placeholder = l.STYLE_NUMBER, + example = l.STYLE_NUMBER +} + +M._FOLDBYINDENTATION = true + +return M diff --git a/lua/lexers/git-rebase.lua b/lua/lexers/git-rebase.lua @@ -0,0 +1,51 @@ +-- Copyright 2017-2021 Marc André Tanner +-- git-rebase(1) LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R = lpeg.P, lpeg.R + +local M = {_NAME = 'git-rebase'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, l.starts_line('#') * l.nonnewline^0) + +-- Keywords. +local keywords = l.starts_line(word_match{ + 'p', 'pick', + 'r', 'reword', + 'e', 'edit', + 's', 'squash', + 'f', 'fixup', + 'x', 'exec', + 'd', 'drop', + 'b', 'break', + 'l', 'label', + 't', 'reset', + 'm', 'merge', +}) +local keyword = token(l.KEYWORD, keywords) + +-- Commit SHA1. +local function patn(pat, min, max) + return -pat^(max + 1) * pat^min +end + +local commit = token(l.NUMBER, patn(R('09', 'af'), 7, 40)) + +local message = token(l.STRING, l.nonnewline^1) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'keyword', keyword}, + {'commit', commit}, + {'message', message}, +} + +M._LEXBYLINE = true + +return M diff --git a/lua/lexers/gleam.lua b/lua/lexers/gleam.lua @@ -0,0 +1,119 @@ +-- Copyright 2021-2022 Mitchell. See LICENSE. +-- Gleam LPeg lexer +-- https://gleam.run/ +-- Contributed by Tynan Beatty + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S + +local KEY, OP = lexer.KEYWORD, lexer.OPERATOR + +local lex = lexer.new('gleam') + +-- Whitespace. +local gleam_ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', gleam_ws) + +-- Types. +local typ_tok = token(lexer.TYPE, lexer.upper * lexer.alnum^0) +lex:add_rule('type', typ_tok) + +-- Modules. +local name = (lexer.lower + '_') * (lexer.lower + lexer.digit + '_')^0 +local fn_name = token(lexer.FUNCTION, name) +local mod_name = token('module', name) +local typ_or_fn = typ_tok + fn_name +local function mod_tok(ws) + return token(KEY, 'import') * ws^1 * mod_name * (ws^0 * token(OP, '/') * ws^0 * mod_name)^0 * + (ws^1 * token(KEY, 'as') * ws^1 * mod_name)^-1 * + (ws^0 * token(OP, '.') * ws^0 * token(OP, '{') * ws^0 * typ_or_fn * + (ws^0 * token(OP, ',') * ws^0 * typ_or_fn)^0 * ws^0 * token(OP, '}'))^-1 +end +lex:add_rule('module', mod_tok(gleam_ws)) +lex:add_style('module', lexer.styles.constant) + +-- Keywords. +local key_tok = token(KEY, word_match( + 'as assert case const external fn if import let opaque pub todo try tuple type')) +lex:add_rule('keyword', key_tok) + +-- Functions. +local function fn_tok(ws) + local mod_name_op = mod_name * ws^0 * token(OP, '.') + local fn_def_call = mod_name_op^-1 * ws^0 * fn_name * ws^0 * #P('(') + local fn_pipe = token(OP, '|>') * ws^0 * (token(KEY, 'fn') + mod_name_op^-1 * fn_name) + return fn_def_call + fn_pipe +end +lex:add_rule('function', fn_tok(gleam_ws)) + +-- Labels. +local id = token(lexer.IDENTIFIER, name) +local function lab_tok(ws) + return token(OP, S('(,')) * ws^0 * token(lexer.LABEL, name) * #(ws^1 * id) +end +lex:add_rule('label', lab_tok(gleam_ws)) + +-- Identifiers. +local discard_id = token('discard', '_' * name) +local id_tok = discard_id + id +lex:add_rule('identifier', id_tok) +lex:add_style('discard', lexer.styles.comment) + +-- Strings. +local str_tok = token(lexer.STRING, lexer.range('"')) +lex:add_rule('string', str_tok) + +-- Comments. +local com_tok = token(lexer.COMMENT, lexer.to_eol('//')) +lex:add_rule('comment', com_tok) + +-- Numbers. +local function can_neg(patt) return (lpeg.B(lexer.space + S('+-/*%<>=&|:,.')) * '-')^-1 * patt end +local function can_sep(patt) return (P('_')^-1 * patt^1)^1 end +local dec = lexer.digit * can_sep(lexer.digit)^0 +local float = dec * '.' * dec^0 +local bin = '0' * S('bB') * can_sep(S('01')) * -lexer.xdigit +local oct = '0' * S('oO') * can_sep(lpeg.R('07')) +local hex = '0' * S('xX') * can_sep(lexer.xdigit) +local num_tok = token(lexer.NUMBER, can_neg(float) + bin + oct + hex + can_neg(dec)) +lex:add_rule('number', num_tok) + +-- Operators. +local op_tok = token(OP, S('+-*/%#!=<>&|.,:;{}[]()')) +lex:add_rule('operator', op_tok) + +-- Errors. +local err_tok = token(lexer.ERROR, lexer.any) +lex:add_rule('error', err_tok) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '(', ')') + +-- Embedded Bit Strings. +-- Mimic lexer.load() by creating a bitstring-specific whitespace style. +local bitstring = lexer.new(lex._NAME .. '_bitstring') +local bitstring_ws = token(bitstring._NAME .. '_whitespace', lexer.space^1) +bitstring:add_rule('whitespace', bitstring_ws) +bitstring:add_style(bitstring._NAME .. '_whitespace', lexer.styles.whitespace) +bitstring:add_rule('type', typ_tok) +bitstring:add_rule('module', mod_tok(bitstring_ws)) +bitstring:add_rule('keyword', key_tok + token(KEY, word_match{ + 'binary', 'bytes', 'int', 'float', 'bit_string', 'bits', 'utf8', 'utf16', 'utf32', + 'utf8_codepoint', 'utf16_codepoint', 'utf32_codepoint', 'signed', 'unsigned', 'big', 'little', + 'native', 'unit', 'size' +})) +bitstring:add_rule('function', fn_tok(bitstring_ws)) +bitstring:add_rule('label', lab_tok(bitstring_ws)) +bitstring:add_rule('identifier', id_tok) +bitstring:add_rule('string', str_tok) +bitstring:add_rule('comment', com_tok) +bitstring:add_rule('number', num_tok) +bitstring:add_rule('operator', op_tok) +bitstring:add_rule('error', err_tok) +lex:embed(bitstring, token(OP, '<<'), token(OP, '>>')) + +return lex diff --git a/lua/lexers/glsl.lua b/lua/lexers/glsl.lua @@ -0,0 +1,132 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- GLSL LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S +local table = _G.table + +local M = {_NAME = 'glsl'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'attribute', 'const', 'in', 'inout', 'out', 'uniform', 'varying', 'invariant', + 'centroid', 'flat', 'smooth', 'noperspective', 'layout', 'patch', 'sample', + 'subroutine', 'lowp', 'mediump', 'highp', 'precision', + -- Macros. + '__VERSION__', '__LINE__', '__FILE__', +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'radians', 'degrees', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sinh', + 'cosh', 'tanh', 'asinh', 'acosh', 'atanh', 'pow', 'exp', 'log', 'exp2', + 'log2', 'sqrt', 'inversesqrt', 'abs', 'sign', 'floor', 'trunc', 'round', + 'roundEven', 'ceil', 'fract', 'mod', 'modf', 'min', 'max', 'clamp', 'mix', + 'step', 'smoothstep', 'isnan', 'isinf', 'floatBitsToInt', 'floatBitsToUint', + 'intBitsToFloat', 'uintBitsToFloat', 'fma', 'frexp', 'ldexp', 'packUnorm2x16', + 'packUnorm4x8', 'packSnorm4x8', 'unpackUnorm2x16', 'unpackUnorm4x8', + 'unpackSnorm4x8', 'packDouble2x32', 'unpackDouble2x32', 'length', 'distance', + 'dot', 'cross', 'normalize', 'ftransform', 'faceforward', 'reflect', + 'refract', 'matrixCompMult', 'outerProduct', 'transpose', 'determinant', + 'inverse', 'lessThan', 'lessThanEqual', 'greaterThan', 'greaterThanEqual', + 'equal', 'notEqual', 'any', 'all', 'not', 'uaddCarry', 'usubBorrow', + 'umulExtended', 'imulExtended', 'bitfieldExtract', 'bitfildInsert', + 'bitfieldReverse', 'bitCount', 'findLSB', 'findMSB', 'textureSize', + 'textureQueryLOD', 'texture', 'textureProj', 'textureLod', 'textureOffset', + 'texelFetch', 'texelFetchOffset', 'textureProjOffset', 'textureLodOffset', + 'textureProjLod', 'textureProjLodOffset', 'textureGrad', 'textureGradOffset', + 'textureProjGrad', 'textureProjGradOffset', 'textureGather', + 'textureGatherOffset', 'texture1D', 'texture2D', 'texture3D', 'texture1DProj', + 'texture2DProj', 'texture3DProj', 'texture1DLod', 'texture2DLod', + 'texture3DLod', 'texture1DProjLod', 'texture2DProjLod', 'texture3DProjLod', + 'textureCube', 'textureCubeLod', 'shadow1D', 'shadow2D', 'shadow1DProj', + 'shadow2DProj', 'shadow1DLod', 'shadow2DLod', 'shadow1DProjLod', + 'shadow2DProjLod', 'dFdx', 'dFdy', 'fwidth', 'interpolateAtCentroid', + 'interpolateAtSample', 'interpolateAtOffset', 'noise1', 'noise2', 'noise3', + 'noise4', 'EmitStreamVertex', 'EndStreamPrimitive', 'EmitVertex', + 'EndPrimitive', 'barrier' +}) + +-- Types. +local type = token(l.TYPE, + S('bdiu')^-1 * 'vec' * R('24') + + P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) + + S('iu')^-1 * 'sampler' * R('13') * 'D' + + 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' + + S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + word_match{ + 'Cube', '2DRect', 'Buffer', '2DMS', '2DMSArray', + '2DMSCubeArray' + }) + + word_match{ + 'samplerCubeShadow', 'sampler2DRectShadow', + 'samplerCubeArrayShadow' + }) + +-- Variables. +local variable = token(l.VARIABLE, word_match{ + 'gl_VertexID', 'gl_InstanceID', 'gl_Position', 'gl_PointSize', + 'gl_ClipDistance', 'gl_PrimitiveIDIn', 'gl_InvocationID', 'gl_PrimitiveID', + 'gl_Layer', 'gl_PatchVerticesIn', 'gl_TessLevelOuter', 'gl_TessLevelInner', + 'gl_TessCoord', 'gl_FragCoord', 'gl_FrontFacing', 'gl_PointCoord', + 'gl_SampleID', 'gl_SamplePosition', 'gl_FragColor', 'gl_FragData', + 'gl_FragDepth', 'gl_SampleMask', 'gl_ClipVertex', 'gl_FrontColor', + 'gl_BackColor', 'gl_FrontSecondaryColor', 'gl_BackSecondaryColor', + 'gl_TexCoord', 'gl_FogFragCoord', 'gl_Color', 'gl_SecondaryColor', + 'gl_Normal', 'gl_Vertex', 'gl_MultiTexCoord0', 'gl_MultiTexCoord1', + 'gl_MultiTexCoord2', 'gl_MultiTexCoord3', 'gl_MultiTexCoord4', + 'gl_MultiTexCoord5', 'gl_MultiTexCoord6', 'gl_MultiTexCoord7', 'gl_FogCoord' +}) + +-- Constants. +local constant = token(l.CONSTANT, word_match{ + 'gl_MaxVertexAttribs', 'gl_MaxVertexUniformComponents', 'gl_MaxVaryingFloats', + 'gl_MaxVaryingComponents', 'gl_MaxVertexOutputComponents', + 'gl_MaxGeometryInputComponents', 'gl_MaxGeometryOutputComponents', + 'gl_MaxFragmentInputComponents', 'gl_MaxVertexTextureImageUnits', + 'gl_MaxCombinedTextureImageUnits', 'gl_MaxTextureImageUnits', + 'gl_MaxFragmentUniformComponents', 'gl_MaxDrawBuffers', 'gl_MaxClipDistances', + 'gl_MaxGeometryTextureImageUnits', 'gl_MaxGeometryOutputVertices', + 'gl_MaxGeometryTotalOutputComponents', 'gl_MaxGeometryUniformComponents', + 'gl_MaxGeometryVaryingComponents', 'gl_MaxTessControlInputComponents', + 'gl_MaxTessControlOutputComponents', 'gl_MaxTessControlTextureImageUnits', + 'gl_MaxTessControlUniformComponents', + 'gl_MaxTessControlTotalOutputComponents', + 'gl_MaxTessEvaluationInputComponents', 'gl_MaxTessEvaluationOutputComponents', + 'gl_MaxTessEvaluationTextureImageUnits', + 'gl_MaxTessEvaluationUniformComponents', 'gl_MaxTessPatchComponents', + 'gl_MaxPatchVertices', 'gl_MaxTessGenLevel', 'gl_MaxTextureUnits', + 'gl_MaxTextureCoords', 'gl_MaxClipPlanes', + + 'gl_DepthRange', 'gl_ModelViewMatrix', 'gl_ProjectionMatrix', + 'gl_ModelViewProjectionMatrix', 'gl_TextureMatrix', 'gl_NormalMatrix', + 'gl_ModelViewMatrixInverse', 'gl_ProjectionMatrixInverse', + 'gl_ModelViewProjectionMatrixInverse', 'gl_TextureMatrixInverse', + 'gl_ModelViewMatrixTranspose', 'gl_ProjectionMatrixTranspose', + 'gl_ModelViewProjectionMatrixTranspose', 'gl_TextureMatrixTranspose', + 'gl_ModelViewMatrixInverseTranspose', 'gl_ProjectionMatrixInverseTranspose', + 'gl_ModelViewProjectionMatrixInverseTranspose', + 'gl_TextureMatrixInverseTranspose', 'gl_NormalScale', 'gl_ClipPlane', + 'gl_Point', 'gl_FrontMaterial', 'gl_BackMaterial', 'gl_LightSource', + 'gl_LightModel', 'gl_FrontLightModelProduct', 'gl_BackLightModelProduct', + 'gl_FrontLightProduct', 'gl_BackLightProduct', 'gl_TextureEnvColor', + 'gl_EyePlaneS', 'gl_EyePlaneT', 'gl_EyePlaneR', 'gl_EyePlaneQ', + 'gl_ObjectPlaneS', 'gl_ObjectPlaneT', 'gl_ObjectPlaneR', 'gl_ObjectPlaneQ', + 'gl_Fog' +}) + +-- Extend cpp lexer to include GLSL elements. +local cpp = l.load('cpp') +local _rules = cpp._rules +_rules[1] = {'whitespace', ws} +table.insert(_rules, 2, {'glsl_keyword', keyword}) +table.insert(_rules, 3, {'glsl_function', func}) +table.insert(_rules, 4, {'glsl_type', type}) +table.insert(_rules, 5, {'glsl_variable', variable}) +M._rules = _rules +M._foldsymbols = cpp._foldsymbols + +return M diff --git a/lua/lexers/gnuplot.lua b/lua/lexers/gnuplot.lua @@ -0,0 +1,80 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Gnuplot LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'gnuplot'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local bk_str = l.delimited_range('[]', true) +local bc_str = l.delimited_range('{}', true) +local string = token(l.STRING, sq_str + dq_str + bk_str + bc_str) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'cd', 'call', 'clear', 'exit', 'fit', 'help', 'history', 'if', 'load', + 'pause', 'plot', 'using', 'with', 'index', 'every', 'smooth', 'thru', 'print', + 'pwd', 'quit', 'replot', 'reread', 'reset', 'save', 'set', 'show', 'unset', + 'shell', 'splot', 'system', 'test', 'unset', 'update' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'abs', 'acos', 'acosh', 'arg', 'asin', 'asinh', 'atan', 'atan2', 'atanh', + 'besj0', 'besj1', 'besy0', 'besy1', 'ceil', 'cos', 'cosh', 'erf', 'erfc', + 'exp', 'floor', 'gamma', 'ibeta', 'inverf', 'igamma', 'imag', 'invnorm', + 'int', 'lambertw', 'lgamma', 'log', 'log10', 'norm', 'rand', 'real', 'sgn', + 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'column', 'defined', 'tm_hour', + 'tm_mday', 'tm_min', 'tm_mon', 'tm_sec', 'tm_wday', 'tm_yday', 'tm_year', + 'valid' +}) + +-- Variables. +local variable = token(l.VARIABLE, word_match{ + 'angles', 'arrow', 'autoscale', 'bars', 'bmargin', 'border', 'boxwidth', + 'clabel', 'clip', 'cntrparam', 'colorbox', 'contour', 'datafile ', + 'decimalsign', 'dgrid3d', 'dummy', 'encoding', 'fit', 'fontpath', 'format', + 'functions', 'function', 'grid', 'hidden3d', 'historysize', 'isosamples', + 'key', 'label', 'lmargin', 'loadpath', 'locale', 'logscale', 'mapping', + 'margin', 'mouse', 'multiplot', 'mx2tics', 'mxtics', 'my2tics', 'mytics', + 'mztics', 'offsets', 'origin', 'output', 'parametric', 'plot', 'pm3d', + 'palette', 'pointsize', 'polar', 'print', 'rmargin', 'rrange', 'samples', + 'size', 'style', 'surface', 'terminal', 'tics', 'ticslevel', 'ticscale', + 'timestamp', 'timefmt', 'title', 'tmargin', 'trange', 'urange', 'variables', + 'version', 'view', 'vrange', 'x2data', 'x2dtics', 'x2label', 'x2mtics', + 'x2range', 'x2tics', 'x2zeroaxis', 'xdata', 'xdtics', 'xlabel', 'xmtics', + 'xrange', 'xtics', 'xzeroaxis', 'y2data', 'y2dtics', 'y2label', 'y2mtics', + 'y2range', 'y2tics', 'y2zeroaxis', 'ydata', 'ydtics', 'ylabel', 'ymtics', + 'yrange', 'ytics', 'yzeroaxis', 'zdata', 'zdtics', 'cbdata', 'cbdtics', + 'zero', 'zeroaxis', 'zlabel', 'zmtics', 'zrange', 'ztics', 'cblabel', + 'cbmtics', 'cbrange', 'cbtics' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('-+~!$*%=<>&|^?:()')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'function', func}, + {'variable', variable}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/go.lua b/lua/lexers/go.lua @@ -0,0 +1,78 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Go LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'go'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline^0 +local block_comment = '/*' * (l.any - '*/')^0 * '*/' +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local raw_str = l.delimited_range('`', false, true) +local string = token(l.STRING, sq_str + dq_str + raw_str) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer) * P('i')^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else', + 'fallthrough', 'for', 'func', 'go', 'goto', 'if', 'import', 'interface', + 'map', 'package', 'range', 'return', 'select', 'struct', 'switch', 'type', + 'var' +}) + +-- Constants. +local constant = token(l.CONSTANT, word_match{ + 'true', 'false', 'iota', 'nil' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'bool', 'byte', 'complex64', 'complex128', 'error', 'float32', 'float64', + 'int', 'int8', 'int16', 'int32', 'int64', 'rune', 'string', 'uint', 'uint8', + 'uint16', 'uint32', 'uint64', 'uintptr' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'append', 'cap', 'close', 'complex', 'copy', 'delete', 'imag', 'len', 'make', + 'new', 'panic', 'print', 'println', 'real', 'recover' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'constant', constant}, + {'type', type}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[{}]', '/%*', '%*/', '//'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/groovy.lua b/lua/lexers/groovy.lua @@ -0,0 +1,89 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Groovy LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'groovy'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1 +local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 +local regex_str = #P('/') * l.last_char_includes('=~|!<>+-*?&,:;([{') * + l.delimited_range('/', true) +local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str + + dq_str) + + token(l.REGEX, regex_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', + 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof', + 'native', 'new', 'private', 'protected', 'public', 'return', 'static', + 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile', + 'while', 'strictfp', 'package', 'import', 'as', 'assert', 'def', 'mixin', + 'property', 'test', 'using', 'in', + 'false', 'null', 'super', 'this', 'true', 'it' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'abs', 'any', 'append', 'asList', 'asWritable', 'call', 'collect', + 'compareTo', 'count', 'div', 'dump', 'each', 'eachByte', 'eachFile', + 'eachLine', 'every', 'find', 'findAll', 'flatten', 'getAt', 'getErr', 'getIn', + 'getOut', 'getText', 'grep', 'immutable', 'inject', 'inspect', 'intersect', + 'invokeMethods', 'isCase', 'join', 'leftShift', 'minus', 'multiply', + 'newInputStream', 'newOutputStream', 'newPrintWriter', 'newReader', + 'newWriter', 'next', 'plus', 'pop', 'power', 'previous', 'print', 'println', + 'push', 'putAt', 'read', 'readBytes', 'readLines', 'reverse', 'reverseEach', + 'round', 'size', 'sort', 'splitEachLine', 'step', 'subMap', 'times', + 'toInteger', 'toList', 'tokenize', 'upto', 'waitForOrKill', 'withPrintWriter', + 'withReader', 'withStream', 'withWriter', 'withWriterAppend', 'write', + 'writeLine' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface', + 'long', 'short', 'void' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'function', func}, + {'type', type}, + {'identifier', identifier}, + {'comment', comment}, + {'string', string}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[{}]', '/%*', '%*/', '//'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/gtkrc.lua b/lua/lexers/gtkrc.lua @@ -0,0 +1,71 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Gtkrc LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'gtkrc'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '#' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.digit^1 * ('.' * l.digit^1)^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'binding', 'class', 'include', 'module_path', 'pixmap_path', 'im_module_file', + 'style', 'widget', 'widget_class' +}) + +-- Variables. +local variable = token(l.VARIABLE, word_match{ + 'bg', 'fg', 'base', 'text', 'xthickness', 'ythickness', 'bg_pixmap', 'font', + 'fontset', 'font_name', 'stock', 'color', 'engine' +}) + +-- States. +local state = token(l.CONSTANT, word_match{ + 'ACTIVE', 'SELECTED', 'NORMAL', 'PRELIGHT', 'INSENSITIVE', 'TRUE', 'FALSE' +}) + +-- Functions. +local func = token(l.FUNCTION, word_match{ + 'mix', 'shade', 'lighter', 'darker' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0) + +-- Operators. +local operator = token(l.OPERATOR, S(':=,*()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'variable', variable}, + {'state', state}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[{}]', '#'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['#'] = l.fold_line_comments('#')} +} + +return M diff --git a/lua/lexers/hare.lua b/lua/lexers/hare.lua @@ -0,0 +1,59 @@ +-- Copyright 2021-2022 Mitchell. See LICENSE. +-- Hare LPeg lexer +-- https://harelang.org +-- Contributed by Qiu + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S + +local lex = lexer.new('hare') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'as', 'break', 'case', 'const', 'continue', 'def', 'defer', 'else', 'export', 'false', 'fn', + 'for', 'if', 'is', 'let', 'match', 'null', 'nullable', 'return', 'static', 'struct', 'switch', + 'true', 'type', 'use', 'yield' +})) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'len', 'alloc', 'free', 'assert', 'abort', 'size', 'append', 'insert', 'delete', 'vastart', + 'vaarg', 'vaend' +})) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'enum', 'f32', 'f64', 'i16', 'i32', 'i64', 'i8', 'int', 'u16', 'u32', 'u64', 'u8', 'uint', + 'uintptr', 'union', 'void', 'rune', 'str', 'char' +})) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local dq_str = lexer.range('"') +local raw_str = lexer.range('`') +lex:add_rule('string', token(lexer.STRING, dq_str + raw_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('//'))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>'))) + +-- At rule. +lex:add_rule('at_rule', token('at_rule', '@' * word_match('noreturn offset init fini test symbol'))) +lex:add_style('at_rule', lexer.styles.preprocessor) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) + +return lex diff --git a/lua/lexers/haskell.lua b/lua/lexers/haskell.lua @@ -0,0 +1,60 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Haskell LPeg lexer. +-- Modified by Alex Suraci. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'haskell'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '--' * l.nonnewline_esc^0 +local block_comment = '{-' * (l.any - '-}')^0 * P('-}')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"')) + +-- Chars. +local char = token(l.STRING, l.delimited_range("'", true)) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if', 'import', + 'in', 'infix', 'infixl', 'infixr', 'instance', 'let', 'module', 'newtype', + 'of', 'then', 'type', 'where', '_', 'as', 'qualified', 'hiding' +}) + +-- Identifiers. +local word = (l.alnum + S("._'#"))^0 +local identifier = token(l.IDENTIFIER, (l.alpha + '_') * word) + +-- Operators. +local op = l.punct - S('()[]{}') +local operator = token(l.OPERATOR, op) + +-- Types & type constructors. +local constructor = token(l.TYPE, (l.upper * word) + (P(":") * (op^1 - P(":")))) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', constructor}, + {'identifier', identifier}, + {'string', string}, + {'char', char}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._FOLDBYINDENTATION = true + +return M diff --git a/lua/lexers/html.lua b/lua/lexers/html.lua @@ -0,0 +1,162 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- HTML LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local M = {_NAME = 'html'} + +case_insensitive_tags = true + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local string = #S('\'"') * l.last_char_includes('=') * + token(l.STRING, sq_str + dq_str) + +local in_tag = #P((1 - S'><')^0 * '>') + +-- Numbers. +local number = #l.digit * l.last_char_includes('=') * + token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag + +-- Elements. +local known_element = token('element', '<' * P('/')^-1 * word_match({ + 'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'base', + 'bdi', 'bdo', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', + 'cite', 'code', 'col', 'colgroup', 'content', 'data', 'datalist', 'dd', + 'decorator', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'element', 'em', + 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', + 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'i', 'iframe', 'img', + 'input', 'ins', 'kbd', 'keygen', 'label', 'legend', 'li', 'link', 'main', + 'map', 'mark', 'menu', 'menuitem', 'meta', 'meter', 'nav', 'noscript', + 'object', 'ol', 'optgroup', 'option', 'output', 'p', 'param', 'pre', + 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'section', + 'select', 'shadow', 'small', 'source', 'spacer', 'spacer', 'span', 'strong', + 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'template', + 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'track', 'u', 'ul', + 'var', 'video', 'wbr' +}, nil, case_insensitive_tags)) +local unknown_element = token('unknown_element', '<' * P('/')^-1 * l.word) +local element = known_element + unknown_element + +-- Attributes. +local known_attribute = token('attribute', word_match({ + 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'async', + 'autocomplete', 'autofocus', 'autoplay', 'bgcolor', 'border', 'buffered', + 'challenge', 'charset', 'checked', 'cite', 'class', 'code', 'codebase', + 'color', 'cols', 'colspan', 'content', 'contenteditable', 'contextmenu', + 'controls', 'coords', 'data', 'data-', 'datetime', 'default', 'defer', 'dir', + 'dirname', 'disabled', 'download', 'draggable', 'dropzone', 'enctype', 'for', + 'form', 'headers', 'height', 'hidden', 'high', 'href', 'hreflang', + 'http-equiv', 'icon', 'id', 'ismap', 'itemprop', 'keytype', 'kind', 'label', + 'lang', 'language', 'list', 'loop', 'low', 'manifest', 'max', 'maxlength', + 'media', 'method', 'min', 'multiple', 'name', 'novalidate', 'open', 'optimum', + 'pattern', 'ping', 'placeholder', 'poster', 'preload', 'pubdate', + 'radiogroup', 'readonly', 'rel', 'required', 'reversed', 'role', 'rows', + 'rowspan', 'sandbox', 'spellcheck', 'scope', 'scoped', 'seamless', 'selected', + 'shape', 'size', 'sizes', 'span', 'src', 'srcdoc', 'srclang', 'start', + 'step', 'style', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap', + 'value', 'width', 'wrap' +}, '-', case_insensitive_tags) + ((P('data-') + 'aria-') * (l.alnum + '-')^1)) +local unknown_attribute = token('unknown_attribute', l.word) +local attribute = (known_attribute + unknown_attribute) * #(l.space^0 * '=') + +-- Closing tags. +local tag_close = token('element', P('/')^-1 * '>') + +-- Equals. +local equals = token(l.OPERATOR, '=') * in_tag + +-- Entities. +local entity = token('entity', '&' * (l.any - l.space - ';')^1 * ';') + +-- Doctype. +local doctype = token('doctype', '<!' * + word_match({'doctype'}, nil, case_insensitive_tags) * + (l.any - '>')^1 * '>') + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'doctype', doctype}, + {'element', element}, + {'tag_close', tag_close}, + {'attribute', attribute}, +-- {'equals', equals}, + {'string', string}, + {'number', number}, + {'entity', entity}, +} + +M._tokenstyles = { + element = l.STYLE_KEYWORD, + unknown_element = l.STYLE_KEYWORD..',italics', + attribute = l.STYLE_TYPE, + unknown_attribute = l.STYLE_TYPE..',italics', + entity = l.STYLE_OPERATOR, + doctype = l.STYLE_COMMENT +} + +M._foldsymbols = { + _patterns = {'</?', '/>', '<!%-%-', '%-%->'}, + element = {['<'] = 1, ['/>'] = -1, ['</'] = -1}, + unknown_element = {['<'] = 1, ['/>'] = -1, ['</'] = -1}, + [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1} +} + +-- Tags that start embedded languages. +M.embed_start_tag = element * + (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * + ws^0 * tag_close +M.embed_end_tag = element * tag_close + +-- Embedded CSS. +local css = l.load('css') +local style_element = word_match({'style'}, nil, case_insensitive_tags) +local css_start_rule = #(P('<') * style_element * + ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then + return index + end +end))) * M.embed_start_tag -- <style type="text/css"> +local css_end_rule = #('</' * style_element * ws^0 * '>') * + M.embed_end_tag -- </style> +l.embed_lexer(M, css, css_start_rule, css_end_rule) + +-- Embedded JavaScript. +local js = l.load('javascript') +local script_element = word_match({'script'}, nil, case_insensitive_tags) +local js_start_rule = #(P('<') * script_element * + ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then + return index + end +end))) * M.embed_start_tag -- <script type="text/javascript"> +local js_end_rule = #('</' * script_element * ws^0 * '>') * + M.embed_end_tag -- </script> +local js_line_comment = '//' * (l.nonnewline_esc - js_end_rule)^0 +local js_block_comment = '/*' * (l.any - '*/' - js_end_rule)^0 * P('*/')^-1 +js._RULES['comment'] = token(l.COMMENT, js_line_comment + js_block_comment) +l.embed_lexer(M, js, js_start_rule, js_end_rule) + +-- Embedded CoffeeScript. +local cs = l.load('coffeescript') +local script_element = word_match({'script'}, nil, case_insensitive_tags) +local cs_start_rule = #(P('<') * script_element * P(function(input, index) + if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then + return index + end +end)) * M.embed_start_tag -- <script type="text/coffeescript"> +local cs_end_rule = #('</' * script_element * ws^0 * '>') * + M.embed_end_tag -- </script> +l.embed_lexer(M, cs, cs_start_rule, cs_end_rule) + +return M diff --git a/lua/lexers/icon.lua b/lua/lexers/icon.lua @@ -0,0 +1,78 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- LPeg lexer for the Icon programming language. +-- http://www.cs.arizona.edu/icon +-- Contributed by Carl Sturtivant. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'icon'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +--Comments +local line_comment = '#' * l.nonnewline_esc^0 +local comment = token(l.COMMENT, line_comment) + +-- Strings. +local cset = l.delimited_range("'") +local str = l.delimited_range('"') +local string = token(l.STRING, cset + str) + +-- Numbers. +local radix_literal = P('-')^-1 * l.dec_num * S('rR') * l.alnum^1 +local number = token(l.NUMBER, radix_literal + l.float + l.integer) + +-- Preprocessor. +local preproc_word = word_match{ + 'include', 'line', 'define', 'undef', 'ifdef', 'ifndef', 'else', 'endif', + 'error' +} +local preproc = token(l.PREPROCESSOR, S(' \t')^0 * P('$') * preproc_word) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'break', 'by', 'case', 'create', 'default', 'do', 'else', 'end', 'every', + 'fail', 'global', 'if', 'initial', 'invocable', 'link', 'local', 'next', + 'not', 'of', 'procedure', 'record', 'repeat', 'return', 'static', 'suspend', + 'then', 'to', 'until', 'while' +}) + +-- Icon Keywords: unique to Icon; use l.TYPE, as Icon is dynamically typed +local type = token(l.TYPE, P('&') * word_match{ + 'allocated', 'ascii', 'clock', 'collections', 'cset', 'current', 'date', + 'dateline', 'digits', 'dump', 'e', 'error', 'errornumber', 'errortext', + 'errorvalue', 'errout', 'fail', 'features', 'file', 'host', 'input', 'lcase', + 'letters', 'level', 'line', 'main', 'null', 'output', 'phi', 'pi', 'pos', + 'progname', 'random', 'regions', 'source', 'storage', 'subject', 'time', + 'trace', 'ucase', 'version' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%<>~!=^&|?~@:;,.()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'comment', comment}, + {'string', string}, + {'number', number}, + {'preproc', preproc}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'%l+', '#'}, + [l.PREPROCESSOR] = {ifdef = 1, ifndef = 1, endif = -1}, + [l.KEYWORD] = { procedure = 1, ['end'] = -1}, + [l.COMMENT] = {['#'] = l.fold_line_comments('#')} +} + +return M diff --git a/lua/lexers/idl.lua b/lua/lexers/idl.lua @@ -0,0 +1,68 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- IDL LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'idl'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Preprocessor. +local preproc_word = word_match{ + 'define', 'undef', 'ifdef', 'ifndef', 'if', 'elif', 'else', 'endif', + 'include', 'warning', 'pragma' +} +local preproc = token(l.PREPROCESSOR, + l.starts_line('#') * preproc_word * l.nonnewline^0) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abstract', 'attribute', 'case', 'const', 'context', 'custom', 'default', + 'exception', 'enum', 'factory', 'FALSE', 'in', 'inout', 'interface', 'local', + 'module', 'native', 'oneway', 'out', 'private', 'public', 'raises', + 'readonly', 'struct', 'support', 'switch', 'TRUE', 'truncatable', 'typedef', + 'union', 'valuetype' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'any', 'boolean', 'char', 'double', 'fixed', 'float', 'long', 'Object', + 'octet', 'sequence', 'short', 'string', 'unsigned', 'ValueBase', 'void', + 'wchar', 'wstring' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'preprocessor', preproc}, + {'operator', operator}, +} + +return M diff --git a/lua/lexers/inform.lua b/lua/lexers/inform.lua @@ -0,0 +1,97 @@ +-- Copyright 2010-2017 Jeff Stone. See LICENSE. +-- Inform LPeg lexer for Scintillua. +-- JMS 2010-04-25. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'inform'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '!' * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local inform_hex = '$' * l.xdigit^1 +local inform_bin = '$$' * S('01')^1 +local number = token(l.NUMBER, l.integer + inform_hex + inform_bin) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'Abbreviate', 'Array', 'Attribute', 'Class', 'Constant', 'Default', 'End', + 'Endif', 'Extend', 'Global', 'Ifdef', 'Iffalse', 'Ifndef', 'Ifnot', 'Iftrue', + 'Import', 'Include', 'Link', 'Lowstring', 'Message', 'Object', 'Property', + 'Release', 'Replace', 'Serial', 'StartDaemon', 'Statusline', 'StopDaemon', + 'Switches', 'Verb', 'absent', 'action', 'actor', 'add_to_scope', 'address', + 'additive', 'after', 'and', 'animate', 'article', 'articles', 'before', + 'bold', 'box', 'break', 'cant_go', 'capacity', 'char', 'class', 'child', + 'children', 'clothing', 'concealed', 'container', 'continue', 'creature', + 'daemon', 'deadflag', 'default', 'describe', 'description', 'do', 'door', + 'door_dir', 'door_to', 'd_to', 'd_obj', 'e_to', 'e_obj', 'each_turn', + 'edible', 'else', 'enterable', 'false', 'female', 'first', 'font', 'for', + 'found_in', 'general', 'give', 'grammar', 'has', 'hasnt', 'held', 'if', 'in', + 'in_to', 'in_obj', 'initial', 'inside_description', 'invent', 'jump', 'last', + 'life', 'light', 'list_together', 'location', 'lockable', 'locked', 'male', + 'move', 'moved', 'multi', 'multiexcept', 'multiheld', 'multiinside', 'n_to', + 'n_obj', 'ne_to', 'ne_obj', 'nw_to', 'nw_obj', 'name', 'neuter', 'new_line', + 'nothing', 'notin', 'noun', 'number', 'objectloop', 'ofclass', 'off', 'on', + 'only', 'open', 'openable', 'or', 'orders', 'out_to', 'out_obj', 'parent', + 'parse_name', 'player', 'plural', 'pluralname', 'print', 'print_ret', + 'private', 'proper', 'provides', 'random', 'react_after', 'react_before', + 'remove', 'replace', 'return', 'reverse', 'rfalse','roman', 'rtrue', 's_to', + 's_obj', 'se_to', 'se_obj', 'sw_to', 'sw_obj', 'scenery', 'scope', 'score', + 'scored', 'second', 'self', 'short_name', 'short_name_indef', 'sibling', + 'spaces', 'static', 'string', 'style', 'supporter', 'switch', 'switchable', + 'talkable', 'thedark', 'time_left', 'time_out', 'to', 'topic', 'transparent', + 'true', 'underline', 'u_to', 'u_obj', 'visited', 'w_to', 'w_obj', + 'when_closed', 'when_off', 'when_on', 'when_open', 'while', 'with', + 'with_key', 'workflag', 'worn' +}) + +-- Library actions. +local action = token('action', word_match{ + 'Answer', 'Ask', 'AskFor', 'Attack', 'Blow', 'Burn', 'Buy', 'Climb', 'Close', + 'Consult', 'Cut', 'Dig', 'Disrobe', 'Drink', 'Drop', 'Eat', 'Empty', 'EmptyT', + 'Enter', 'Examine', 'Exit', 'Fill', 'FullScore', 'GetOff', 'Give', 'Go', + 'GoIn', 'Insert', 'Inv', 'InvTall', 'InvWide', 'Jump', 'JumpOver', 'Kiss', + 'LetGo', 'Listen', 'LMode1', 'LMode2', 'LMode3', 'Lock', 'Look', 'LookUnder', + 'Mild', 'No', 'NotifyOff', 'NotifyOn', 'Objects', 'Open', 'Order', 'Places', + 'Pray', 'Pronouns', 'Pull', 'Push', 'PushDir', 'PutOn', 'Quit', 'Receive', + 'Remove', 'Restart', 'Restore', 'Rub', 'Save', 'Score', 'ScriptOff', + 'ScriptOn', 'Search', 'Set', 'SetTo', 'Show', 'Sing', 'Sleep', 'Smell', + 'Sorry', 'Squeeze', 'Strong', 'Swim', 'Swing', 'SwitchOff', 'SwitchOn', + 'Take', 'Taste', 'Tell', 'Think', 'ThrowAt', 'ThrownAt', 'Tie', 'Touch', + 'Transfer', 'Turn', 'Unlock', 'VagueGo', 'Verify', 'Version', 'Wake', + 'WakeOther', 'Wait', 'Wave', 'WaveHands', 'Wear', 'Yes' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('@~=+-*/%^#=<>;:,.{}[]()&|?')) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'string', string}, + {'number', number}, + {'keyword', keyword}, + {'action', action}, + {'identifier', identifier}, + {'operator', operator}, +} + +_styles = { + {'action', l.STYLE_VARIABLE} +} + +return M diff --git a/lua/lexers/ini.lua b/lua/lexers/ini.lua @@ -0,0 +1,52 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Ini LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'ini'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, l.starts_line(S(';#')) * l.nonnewline^0) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local label = l.delimited_range('[]', true, true) +local string = token(l.STRING, sq_str + dq_str + label) + +-- Numbers. +local dec = l.digit^1 * ('_' * l.digit^1)^0 +local oct_num = '0' * S('01234567_')^1 +local integer = S('+-')^-1 * (l.hex_num + oct_num + dec) +local number = token(l.NUMBER, (l.float + integer)) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'true', 'false', 'on', 'off', 'yes', 'no' +}) + +-- Identifiers. +local word = (l.alpha + '_') * (l.alnum + S('_.'))^0 +local identifier = token(l.IDENTIFIER, word) + +-- Operators. +local operator = token(l.OPERATOR, '=') + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._LEXBYLINE = true + +return M diff --git a/lua/lexers/io_lang.lua b/lua/lexers/io_lang.lua @@ -0,0 +1,66 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Io LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'io_lang'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = (P('#') + '//') * l.nonnewline^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local tq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 +local string = token(l.STRING, tq_str + sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'block', 'method', 'while', 'foreach', 'if', 'else', 'do', 'super', 'self', + 'clone', 'proto', 'setSlot', 'hasSlot', 'type', 'write', 'print', 'forward' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'Block', 'Buffer', 'CFunction', 'Date', 'Duration', 'File', 'Future', 'List', + 'LinkedList', 'Map', 'Nop', 'Message', 'Nil', 'Number', 'Object', 'String', + 'WeakLink' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('`~@$%^&*-+/=\\<>?.,:;()[]{}')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'type', type}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'[%(%)]', '/%*', '%*/', '#', '//'}, + [l.OPERATOR] = {['('] = 1, [')'] = -1}, + [l.COMMENT] = { + ['/*'] = 1, ['*/'] = -1, ['#'] = l.fold_line_comments('#'), + ['//'] = l.fold_line_comments('//') + } +} + +return M diff --git a/lua/lexers/java.lua b/lua/lexers/java.lua @@ -0,0 +1,86 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Java LPeg lexer. +-- Modified by Brian Schott. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'java'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'", true) +local dq_str = l.delimited_range('"', true) +local string = token(l.STRING, sq_str + dq_str) + +-- Numbers. +local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abstract', 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', + 'default', 'do', 'else', 'enum', 'extends', 'final', 'finally', 'for', 'goto', + 'if', 'implements', 'import', 'instanceof', 'interface', 'native', 'new', + 'package', 'private', 'protected', 'public', 'return', 'static', 'strictfp', + 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', + 'try', 'while', 'volatile', + -- Literals. + 'true', 'false', 'null' +}) + +-- Types. +local type = token(l.TYPE, word_match{ + 'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'short', 'void', + 'Boolean', 'Byte', 'Character', 'Double', 'Float', 'Integer', 'Long', 'Short', + 'String' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')) + +-- Annotations. +local annotation = token('annotation', '@' * l.word) + +-- Functions. +local func = token(l.FUNCTION, l.word) * #P('(') + +-- Classes. +local class_sequence = token(l.KEYWORD, P('class')) * ws^1 * + token(l.CLASS, l.word) + +M._rules = { + {'whitespace', ws}, + {'class', class_sequence}, + {'keyword', keyword}, + {'type', type}, + {'function', func}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'annotation', annotation}, + {'operator', operator}, +} + +M._tokenstyles = { + annotation = l.STYLE_PREPROCESSOR +} + +M._foldsymbols = { + _patterns = {'[{}]', '/%*', '%*/', '//'}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lua/lexers/javascript.lua b/lua/lexers/javascript.lua @@ -0,0 +1,65 @@ +-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- JavaScript LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'javascript'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local line_comment = '//' * l.nonnewline_esc^0 +local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 +local comment = token(l.COMMENT, line_comment + block_comment) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local template_str = l.delimited_range('`') +local regex_str = #P('/') * l.last_char_includes('+-*%^!=&|?:;,([{<>') * + l.delimited_range('/', true) * S('igm')^0 +local string = token(l.STRING, sq_str + dq_str + template_str) + + token(l.REGEX, regex_str) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) + +-- Keywords. +local keyword = token(l.KEYWORD, word_match{ + 'abstract', 'async', 'await', 'boolean', 'break', 'byte', 'case', 'catch', + 'char', 'class', 'const', 'continue', 'debugger', 'default', 'delete', + 'do', 'double', 'else', 'enum', 'export', 'extends', 'false', 'final', + 'finally', 'float', 'for', 'function', 'get', 'goto', 'if', 'implements', + 'import', 'in', 'instanceof', 'int', 'interface', 'let', 'long', 'native', + 'new', 'null', 'of', 'package', 'private', 'protected', 'public', 'return', + 'set', 'short', 'static', 'super', 'switch', 'synchronized', 'this', + 'throw', 'throws', 'transient', 'true', 'try', 'typeof', 'var', 'void', + 'volatile', 'while', 'with', 'yield' +}) + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.word) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>')) + +M._rules = { + {'whitespac