Lexer API in Go
The 'go_lexer' package is an API to help you create hand-written lexers and parsers.
The package was inspired by Rob Pikes' video Lexical Scanning In Go and golang's 'template' package.
Below is the interface for the main Lexer type:
// lexer.Lexer helps you tokenize bytes
type Lexer interface {
// PeekRune allows you to look ahead at runes without consuming them
PeekRune(int) rune
// NetRune consumes and returns the next rune in the input
NextRune() rune
// BackupRune un-conumes the last rune from the input
// BackupRunes un-consumes the last n runes from the input
// NewLine increments the line number counter, resets the column counter
// Line returns the current line number, 1-based
Line() int
// Column returns the current column number, 1-based
Column() int
// EmitToken emits a token of the specified type, consuming matched runes
// without emitting them
// EmitTokenWithBytes emits a token along with all the consumed runes
// IgnoreToken ignores the consumed bytes without emitting any tokens
// EmitEOF emits a token of type TokenEOF
// NextToken retrieves the next emmitted token from the input
NextToken() *Token
// Marker returns a marker that you can use to reset the lexer state later
Marker() *Marker
// CanReset confirms if the marker is still valid
CanReset(*Marker) bool
// Reset resets the lexer state to the specified marker
// MatchZeroOrOneBytes consumes the next rune if it matches, always returning true
MatchZeroOrOneBytes([]byte) bool
// MatchZeroOrOneRuness consumes the next rune if it matches, always returning true
MatchZeroOrOneRunes([]rune) bool
// MatchZeroOrOneRune consumes the next rune if it matches, always returning true
MatchZeroOrOneRune(rune) bool
// MatchZeroOrOneFunc consumes the next rune if it matches, always returning true
MatchZeroOrOneFunc(MatchFn) bool
// MatchZeroOrMoreBytes consumes a run of matching runes, always returning true
MatchZeroOrMoreBytes([]byte) bool
// MatchZeroOrMoreRunes consumes a run of matching runes, always returning true
MatchZeroOrMoreRunes([]rune) bool
// MatchZeroOrMoreFunc consumes a run of matching runes, always returning true
MatchZeroOrMoreFunc(MatchFn) bool
// MatchOneBytes consumes the next rune if its in the list of bytes
MatchOneBytes([]byte) bool
// MatchOneRune consumes the next rune if its in the list of bytes
MatchOneRunes([]rune) bool
// MatchOneRune consumes the next rune if it matches
MatchOneRune(rune) bool
// MatchOneFunc consumes the next rune if it matches
MatchOneFunc(MatchFn) bool
// MatchOneOrMoreBytes consumes a run of matching runes
MatchOneOrMoreBytes([]byte) bool
// MatchOneOrMoreRunes consumes a run of matching runes
MatchOneOrMoreRunes([]rune) bool
// MatchOneOrMoreFunc consumes a run of matching runes
MatchOneOrMoreFunc(MatchFn) bool
// MatchMinMaxBytes consumes a specified run of matching runes
MatchMinMaxBytes([]byte, int, int) bool
// MatchMinMaxRunes consumes a specified run of matching runes
MatchMinMaxRunes([]rune, int, int) bool
// MatchMinMaxFunc consumes a specified run of matching runes
MatchMinMaxFunc(MatchFn, int, int) bool
// NonMatchZeroOrOneBytes consumes the next rune if it does not match, always returning true
NonMatchZeroOrOneBytes([]byte) bool
// NonMatchZeroOrOneRunes consumes the next rune if it does not match, always returning true
NonMatchZeroOrOneRunes([]rune) bool
// NonMatchZeroOrOneFunc consumes the next rune if it does not match, always returning true
NonMatchZeroOrOneFunc(MatchFn) bool
// NonMatchZeroOrMoreBytes consumes a run of non-matching runes, always returning true
NonMatchZeroOrMoreBytes([]byte) bool
// NonMatchZeroOrMoreRunes consumes a run of non-matching runes, always returning true
NonMatchZeroOrMoreRunes([]rune) bool
// NonMatchZeroOrMoreFunc consumes a run of non-matching runes, always returning true
NonMatchZeroOrMoreFunc(MatchFn) bool
// NonMatchOneBytes consumes the next rune if its NOT in the list of bytes
NonMatchOneBytes([]byte) bool
// NonMatchOneRunes consumes the next rune if its NOT in the list of runes
NonMatchOneRunes([]rune) bool
// NonMatchOneFunc consumes the next rune if it does NOT match
NonMatchOneFunc(MatchFn) bool
// NonMatchOneOrMoreBytes consumes a run of non-matching runes
NonMatchOneOrMoreBytes([]byte) bool
// NonMatchOneOrMoreRunes consumes a run of non-matching runes
NonMatchOneOrMoreRunes([]rune) bool
// NonMatchOneOrMoreFunc consumes a run of non-matching runes
NonMatchOneOrMoreFunc(MatchFn) bool
// MatchEOF tries to match the next rune against RuneEOF
MatchEOF() bool
Below is a sample word count program that uses the lexer API:
package main
import "os"
import "fmt"
import "github.com/iNamik/go_lexer"
// Usage : wordcount <filename>
func usage() {
fmt.Printf("usage: %s <filename>\n", os.Args[0])
// We define our lexer tokens starting from the pre-defined EOF token
const (
T_EOF lexer.TokenType = lexer.T_EOF
T_NIL = lexer.T_EOF + iota
// List gleaned from isspace(3) manpage
var bytesNonWord = []byte{' ', '\t', '\f', '\v', '\n', '\r'}
var bytesSpace = []byte{' ', '\t', '\f', '\v'}
const charNewLine = '\n'
const charReturn = '\r'
func main() {
if len(os.Args) < 2 {
var file *os.File
var error error
file, error = os.Open(os.Args[1])
if error != nil {
var chars int = 0
var words int = 0
var spaces int = 0
var lines int = 0
// To help us track last line
var emptyLine bool = true
// Create our lexer
// NewSize(startState, reader, readerBufLen, channelCap)
lex := lexer.NewSize(lexFunc, file, 100, 1)
var lastTokenType lexer.TokenType = T_NIL
// Process lexer-emitted tokens
for t := lex.NextToken(); lexer.T_EOF != t.Type(); t = lex.NextToken() {
chars += len(t.Bytes())
switch t.Type() {
case T_WORD:
if lastTokenType != T_WORD {
emptyLine = false
emptyLine = true
case T_SPACE:
spaces += len(t.Bytes())
emptyLine = false
lastTokenType = t.Type()
// If last line not empty, up line count
if !emptyLine {
fmt.Printf("%d words, %d spaces, %d lines, %d chars\n", words, spaces, lines, chars)
func lexFunc(l lexer.Lexer) lexer.StateFn {
// EOF
if l.MatchEOF() {
return nil // We're done here
// Non-Space run
if l.NonMatchOneOrMoreBytes(bytesNonWord) {
// Space run
} else if l.MatchOneOrMoreBytes(bytesSpace) {
// Line Feed
} else if charNewLine == l.PeekRune(0) {
// Carriage-Return with optional line-feed immediately following
} else if charReturn == l.PeekRune(0) {
if charNewLine == l.PeekRune(0) {
} else {
return lexFunc
The package is built using the Go tool. Assuming you have correctly set the $GOPATH variable, you can run the folloing command:
go get github.com/iNamik/go_lexer
- https://github.com/iNamik/go_container
- https://github.com/iNamik/go_pkg
- David Farrell