关于Basic程序解释器及编译原理的简单化(1)--词法分析和代数式求值

类别:编程语言 点击:0 评论:0 推荐:

   在网上,看到还是有部分程序爱好者希望能编出自己的编译器.当然,这的确是件难事,许多人都说要去看什么编译原理和精通汇编语言,结果让这些爱好者都望而却步.但是,当我们亲手去做做后,发现要做一个简单的程序解释器(就像Java和Basic)那样,还是挺容易的.你根本不用去看那些东西,只要你懂C语言,在看了本文后,就可以完成那样的解释器.

   在网上,有许多大型C语言,Perl语言的编译器源代码.但当你下载后看看,竟发现点都看不懂.其实那些东西还是不看为妙.看了本文后,我相信你宁愿自己动手编,也不愿意去领会那些庞大的源代码了.

   少说费话了,我们开始讲解.

这一篇Basic解释器的代码.十分经典.而且十分简单化.

get_token()是词汇提取,譬如 PRINT A+B
通过调用一次get_token(),就在 字符串token里装上PRINT
再调用一次get_token(),token里就装上A
再调用一次get_token(),token里就装上+
再调用一次get_token(),token里就装上B
很简单吧!
putback()是将prog指针向回移动一格.

   
其中包含了词发分析和十分关键的代数式求值get_exp(int *result)
关于它的代数式求值get_exp(int *result),用到递归函数
void get_exp(),level2(),level3(),level4(),level5();
void level6(),primitive(),arith(),unary();
,确实难看懂,不过你尽管拿来用就是了.

话不多说,你看源代码就是了.最后,我将给你看看C++中完整的源代码

/*
   recursive descent parser for integer expression
   which may include variables
*/

#include <stdio.h>
#include <setjmp.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>

#define DELIMITER 1
#define VARIABLE 2
#define NUMBER 3
#define COMMAND 4
#define STRING 5
#define QUOTE 6

#define EOL 9
#define FINISHED 10

extern char *prog;  /* holds expression to be analyzed */

extern jmp_buf e_buf;  /* hold enviroment */
extern int variables[26];  /* variables */
extern struct commands {
    char command[20];
    char tok;
} table[];

extern char token[80];  /* holds string representation of token */
extern char token_type;  /* contains type of token */
extern char tok;  /* holds the internal representation of token */

void get_exp(),level2(),level3(),level4(),level5();
void level6(),primitive(),arith(),unary();
void serror(),putback();


/* entry point into parser */
void get_exp(int *result)
{
    get_token();
    if (!*token) {
        serror(2);
        return;
    }
    level2(result);
    putback();  /*return last token read to input stream */
}


/* add or subtract two terms */
void level2(int *result)
{
    register char op;
    int hold;
   
    level3(result);
    while ((op = *token) =='+' || op == '-')  {
        get_token();
        level3(&hold);
        arith(op,result,&hold);
    }
}


/* multiply or divide two factors */
void level3(int *result)
{
    register char op;
    int hold;

    level4(result);
    while ((op = *token) == '*' || op == '/' || op == '%')  {
        get_token();
        level3(&hold);
        arith(op,result,&hold);
    }
}


/* process integer exponent */
void level4(int *result)
{
    register char op;
    int hold;

    level5(result);
    if (*token == '^') {
        get_token();
        level4(&hold);
        arith(op,result,&hold);
    }
}
             

/* is a unary + or - */             
void level5(int *result)
{
    register char op;
   
    op = 0;
    if ((token_type==DELIMITER) && *token == '+' || *token == '-' )  {
        op = *token;
        get_token();
    }
    level6(result);
    if (op)  unary(op,result);
}
               

/* process parenthesized expression */               
void level6(int *result)
{
    if ((*token == '(') && (token_type == DELIMITER))  {
        get_token();
        level2(result);
        if (*token!=')')
            serror(1);
        get_token();
    }
    else
        primitive(result);
}
 

/* find value of number or variable */
void primitive(int *result)
{
    switch (token_type)  {
        case VARIABLE:
            *result = find_var(token);
            get_token();
            return;
        case NUMBER:
            *result = atoi(token);
            get_token();
            return;
        default:
            serror(0);
    }
}


/* perform the specified arithmetic */
void arith(char o,int *r,int *h)
{
    register int t,ex;
   
    switch (o)  {
        case '-':
            *r = *r-*h;
            break;
        case '+':
            *r = *r+*h;
            break;
        case '*':
            *r = *r**h;
            break;
        case '/':
            *r = (*r)/(*h);
            break;
        case '%':
            *r = (*r)%(*h);
            break;
        case '^':
            ex = *r;
            if (*h==0)  {
                *r = 1;
                break;
            }
            for (t=*h-1;t>0;--t)  *r=(*r)*ex;
            break;
    }
}


/* reverse the sign */
void unary(char o,int *r)
{
    if (o=='-')  *r = -(*r);
}


/* find the value of a variable */
int find_var(char *s)
{
    if (!isalpha(*s))  {
        serror(4);  /* not a variable */
        return 0;
    }
    return variables[toupper(*token)-'A'];
}


/* display an error message */
void serror(int error)
{
    char *e[] = {
        "syntax error",
        "unbalanced parentheses",
        "no expression present",
        "equal sign expected",
        "not a variable",
        "label table full",
        "duplicate label",
        "undefined label",
        "THEN expected",
        "TO expected",
        "too many nested FOR loops",
        "NEXT without FOR",
        "too many nested GOSUB",
        "RETURN without GOSUB"
    };

    printf ("%s\n",e[error]);
    longjmp(e_buf,1);  /* return to save point */
}


/* get a token */
get_token()
{
    register char *temp;

    token_type = 0;tok = 0;
    temp = token;
   
    if (*prog == '\0')  {   /* end of file */
        *token = 0;
        tok = FINISHED;
        return (token_type = DELIMITER);
    }
   
    while (iswhite(*prog))  ++prog;  /* skip over white space */

    if (*prog == '\r')  {   /* CR LF */
        ++prog;++prog;
        tok = EOL;*token = '\r';
        token[1] = '\n';token[2] = 0;
        return (token_type = DELIMITER);
    }
   
    if (strchr("+-*^/%=;(),><",*prog))  {  /* delimiter */
        *temp = *prog;
        prog++;  /* advance to next position */
        temp++;
        *temp=0;
        return (token_type = DELIMITER);
    }
   
    if (*prog == '"')  {   /* quote string */
        prog++;
        while (*prog!='"'&&*prog!='\r')  *temp++=*prog++;
        if (*prog=='\r')  serror(1);
        prog++;*temp=0;
        return (token_type = QUOTE);
    }
   
    if (isdigit(*prog))  {   /* number */
        while (!isdelim(*prog))  *temp++=*prog++;
        *temp = '\0';
        return (token_type = NUMBER);
    }

    if (isalpha(*prog))  {   /* var or command */
        while (!isdelim(*prog))  *temp++=*prog++;
        token_type = STRING;
    }

    *temp = '\0';

    /* see if a string is a command or a variable */
    if (token_type == STRING)  {
        tok = look_up(token);  /* convert to internal rep */
        if (!tok)  token_type = VARIABLE;
        else token_type = COMMAND;  /* is a command */
    }
    return token_type;
}


/* return a token to input stream */
void putback()
{
    char *t;
    t = token;
    for (;*t;t++)  prog--;
}


look_up(char *s)
{
    register int i,j;
    char *p;

    /* convert to lowercase */
    p = s;
    while (*p)  { *p = tolower(*p); p++;  }

    /* see if token is in table */
    for (i=0;*table[i].command;i++)
        if (!strcmp(table[i].command,s))  return table[i].tok;
    return 0;   /* unknown command */
}


/* return true if c is a delimiter */
isdelim(char c)
{
    if (strchr(";,+-<>/*%^=() ",c)||c==9||c=='\r'||c==0)
        return 1;
    return 0;
}


iswhite (char c)
{
    if (c==' '||c=='\t')  return 1;
    else return 0;
}

本文地址:http://com.8s8s.com/it/it30359.htm