; -*- Lisp -*- ; lambda-calculus-tokeniser: an EuLisp library that tokenises a lambda ; calculus expression in preparation for parsing it. ; ; Copyright (c) 2001 Ian Hickson ; ; This program is free software; you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by ; the Free Software Foundation; either version 2 of the License, or ; (at your option) any later version. ; ; This program is distributed in the hope that it will be useful, but ; WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; General Public License for more details. ; ; You should have received a copy of the GNU General Public License ; along with this program; if not, write to the Free Software ; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. (defmodule lambda-calculus-tokeniser (import (level0 utilities)) ;;; Helper Classes and Functions ;; The syntax error condition (defcondition data: ()) ;; The class of the 'data' variables. This stores the string being ;; parsed and an index to the current position in the string. Using ;; a specially designed class for this instead of a list means we ;; can use named accessors (text and position) instead of car and ;; cdr on a list. (defclass () ((text keyword: text: reader: text requiredp: t) (position keyword: position: reader: position requiredp: t)) constructor: (string-position text: position:)) ;; A helper function which returns a copy of the data argument but ;; with the position slot increased by one. (defun advance (data) (string-position (text data) (+ (position data) 1))) ;; A helper function which retrieves the character currently ;; indicated by the data argument. (defun get-character (data) (element (text data) (position data))) ;;; Tokenisation Return Values ;; The tokenisation-result-frame, an abstract class used by the ;; token-result and end-token classes. (defclass () ((string-position-object keyword: string-position-object: reader: next requiredp: t) (result-value keyword: result-value: reader: token default: ())) abstractp: t) ;; The token-result class represents a new token. (defclass () () constructor: (return-token string-position-object: result-value:)) ;; The end-token class is a virtual token representing the end of ;; the current lambda term. (defclass () () predicate: end-token-p constructor: (return-end-token string-position-object:)) ;;; The tokeniser ;; The main function. This should be passed a . ; ; Note that missing ')'s on the end will not be considered an error ; condition. Similarly, anything after an unbalanced ')' will be ; ignored and not considered an error. These are actually bugs, but ; we shall call them features, and give them names: ; ; "Implied closing parenthesises are assumed. You may document your ; lambda expressions using trailing closing-parenthesis-delimited ; comments." ; (defun tokenise (data) (if (characterp data) (list data) (token (tokenise-lambda-term (string-position data 0))))) ;; the main tokenisation loop (defun tokenise-lambda-term (data) ; look at the current token (let ((this-token (process-token data))) (if (end-token-p this-token) ; if it closes the lambda term, return this-token ; otherwise, call ourselves recursively to process the next token (let ((next-token (tokenise-lambda-term (next this-token)))) ; Return a formed by the current token and the next ; token. If the next token was actually an , then ; that will return a cons of a token and the () value. ; This thus creates a well formed list. (return-token (next next-token) (cons (token this-token) (token next-token))))))) ;; the tokeniser for a single character (defun process-token (data) ; first check we haven't reached the end of the file (if (< (position data) (size (text data))) ; ok, still more data, so get the character (let ((character (get-character data))) ; what is it? (cond ((equal character "(") (tokenise-lambda-term (advance data))) ; start of bracketed term ((equal character ")") (return-end-token (advance data))) ; end of bracketed term (or start of "trailing closing-parenthesis-delimited comment"...) ((equal character "L") (tokenise-abstraction (advance data))) ; start of abstraction ((equal character ".") (error "syntax error: unexpected abstraction delimiter '.' found" data: data)) ; error (t (return-token (advance data) character)))) ; anything else: assume it is a variable and return it ; end of expression (and any assumed "implied closing parenthesises"...) ; note that we don't advance data here since there's nothing to which to advance (return-end-token data))) ;; the tokeniser for an abstraction (defun tokenise-abstraction (data) ; first check we haven't reached the end of the file (if (< (position data) (size (text data))) ; ok, still more data, so get the variable character (let ((variable (get-character data))) ; Was the variable really a variable? If not, raise condition. (cond ((equal variable "(") (error "syntax error: '(' found, variable expected in abstraction" data: data)) ((equal variable ")") (error "syntax error: ')' found, variable expected in abstraction" data: data)) ((equal variable "L") (error "syntax error: 'L' found, variable expected in abstraction" data: data)) ((equal variable ".") (error "syntax error: '.' found, variable expected in abstraction" data: data)) ; variable was ok, so get the next character (hopefully the abstraction delimiter '.') ; first, check there _is_ another character ((>= (position (advance data)) (size (text data))) (error "syntax error: unexpected end of expression: abstraction delimiter '.' not found" data: data)) (t (let* ((dot (get-character (advance data))) ; what we do next depends on whether the dot was really a dot or if it was another variable ; (or even an unexpected character), so check that (term (cond ((equal dot "(") (error "syntax error: '(' found in abstraction variable list" data: data)) ((equal dot ")") (error "syntax error: ')' found in abstraction variable list" data: data)) ((equal dot "L") (error "syntax error: 'L' found in abstraction variable list" data: data)) ((equal dot ".") (let* ((body (tokenise-lambda-term (advance (advance data)))) ; we have got to the body, so tokenise that (next-data (next body))) (if (< (position next-data) (size (text next-data))) ; Note that we step back the "next term" pointer because whatever ; character caused us to get an end token is actually not part of ; the abstraction (since abstractions have no terminating character). (return-token (string-position (text (next body)) (- (position (next body)) 1)) (token body)) ; well, except the end of the expression, which we don't want to step back from lest ; we find it it a variable that was in the abstraction... body))) ; Otherwise, dot is another variable, so tokenise it as an abstraction. ; This will return an abstraction structure which we can treat as a lambda term. ; In other words, this returns our one-variable abstraction's body. (t (tokenise-abstraction (advance data)))))) ; Ok, we now have a variable and a term. Create a token which contains ; a representation of this abstraction, and return that. (return-token (next term) (list "L" variable (token term))))))) ; unexpected end of expression -- complain (error "unexpected end of expression: abstraction delimiter '.' not found" data: data))) (export tokenise) ; pass it a , it returns a to be parsed )