自己写一个简单的C++单词扫描程序。

类别:编程语言 点击:0 评论:0 推荐:

/******************************************/
/* File:Unit2.h (scan.h)                  */
/* A Scanner for lexical analysis for C++ */
/* Author:zhanjiantao(compower)           */
/******************************************/
#include <vcl.h>
#define MAXNO 48
typedef struct TypeToken
{
  int line;
  AnsiString words;
  AnsiString type;
}aToken;
typedef aToken *Listnd;

class scan
{
public:
        bool IsReserveWord(AnsiString Token);
        void InitRW();
        void DoScan(char *infile);
        void Print(int lineno, AnsiString stoken, int strgrdl, AnsiString Type);
        void MakeTL(int line,AnsiString words,AnsiString type);
        void Compress(char *zipFname);
public:
        char *file;
        char ch;
        AnsiString strToken;
        AnsiString ReserveWs[MAXNO];
        TList *TokenList;
        Listnd Anode;
};

#endif

/******************************************/
/* File:Unit2.cpp (scan.cpp)              */
/* A Scanner for lexical analysis for C++ */
/* Author:zhanjiantao(compower)           */
/******************************************/
#include "Unit2.h"
#include <string.h>
#include <fstream.h>
//initiate Reserved Words list
void scan::InitRW()
{
     const int buflen = 10;
     char buf[buflen];
     AnsiString gotWord = "";
     ifstream iniRw("InitRW.ini");
     int i = 0;
     while(iniRw.getline(buf,buflen))
     {
      gotWord = buf;
      ReserveWs[i++] = gotWord;
     }
     iniRw.close();
     TokenList = new TList;
}

//judge the ch in the RW list or not
bool scan::IsReserveWord(AnsiString Token)
{
  bool result = false;
  int low = 0;
  int high = MAXNO-1;
  while(low<=high)
  {
    int mid = (low + high)/2;
    int rsComp = Token.AnsiCompare(ReserveWs[mid]);
    if(rsComp==0)
    {
      result = true;
      break;
    }
    if(rsComp<0)
    {
      high = mid-1;
    }
    else
    {
      low = mid+1;
    }
  }
  return result;
}

//print on StringGrid
void scan::Print(int lineno, AnsiString stoken, int strgrdl, AnsiString Type)
{
   Form1->StringGrid1->RowCount++;
   Form1->StringGrid1->Cells[0][strgrdl] = lineno;
   Form1->StringGrid1->Cells[1][strgrdl] = stoken;
   Form1->StringGrid1->Cells[2][strgrdl] = Type;
}

//make a token list
void scan::MakeTL(int line,AnsiString words,AnsiString type)
{

   Anode = new aToken;
   Anode->line = line;
   Anode->words = words;
   Anode->type = type;
   TokenList->Add(Anode);
}

//scan--the hardcore of the scanner
void scan::DoScan(char *infile)
{
   file = infile ;
   ifstream scanFile(file);

   int LineCount = 1;         //the word in which line
   strToken = "";           //member of class scan
   int strgrdLine = 1;       //temp var for show result on StringGrid

   const int bflength = 254;  //length of getline buffer
   char buffer[bflength];     //getline buffer

   bool comment = false;      //for this kind of comment--"/**/"
   char prech = '@';          //pre char for /**/ comment

   AnsiString preToken = "";   //pre Token for judging pointee and multi '*'

   while(scanFile.getline(buffer,bflength))   //get each line of the .cpp file
   {
     int lnscptr = 0;
     while(buffer[lnscptr]==' ')  //trim left space
       lnscptr++;
     ch = buffer[lnscptr];
 /*scan:important arithmetic*/
    if(comment)
    {
     prech = ch;
     goto flag1;
    }
    else
    {
     while(ch!='\0')   //while not the line finish symbol do analyse
     {
        if(isalpha(ch) || ch=='_')  // ID or KeyWord
        {
          while(isalpha(ch) || isdigit(ch) || ch=='_')
          {
           strToken = strToken + ch;
           ch = buffer[++lnscptr];
          }
          if(IsReserveWord(strToken))     //is ReserveWord
          {
           Print(LineCount,strToken,strgrdLine,"保留字");
           MakeTL(LineCount,strToken,"保留字");
          }
          else                    //is ID
          {
           Print(LineCount,strToken,strgrdLine,"标识符");
           MakeTL(LineCount,strToken,"标识符");
          }
          preToken = strToken;
          strgrdLine++;
          strToken.Delete(1,strToken.Length());
        }
        else if(isdigit(ch))     // Numerci
        {
          while(isdigit(ch) || ch=='.')
          {
           strToken = strToken + ch;
           ch = buffer[++lnscptr];
          }

          bool isInt = true;
          for(int pos=1; pos<=strToken.Length(); pos++)
          {
           if(strToken[pos]=='.')
           {
             isInt = false;
             break;
           }
          }
          if(isInt)         //is Int
          {
            Print(LineCount,strToken,strgrdLine,"整数");
            MakeTL(LineCount,strToken,"整数");
          }
          else             //is Float
          {
            Print(LineCount,strToken,strgrdLine,"浮点数");
            MakeTL(LineCount,strToken,"浮点数");
          }

          strgrdLine++;
          strToken.Delete(1,strToken.Length());
        }
        else if(ch==' ' || ch=='\t' || ch=='\n') //skip space,tab and enter
        {
          ch = buffer[++lnscptr];
        }
        else        //other special symbols
        {
          switch(ch)
          {
            case '#':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                while(ch!='\0' && ch!='/')
                 {
                  strToken = strToken + ch;
                  ch = buffer[++lnscptr];
                 }
                Print(LineCount,strToken,strgrdLine,"预定义");
                MakeTL(LineCount,strToken,"预定义");
                break;

            case '\'':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                while(ch!='\'')
                {
                  strToken = strToken + ch;
                  ch = buffer[++lnscptr];
                }
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"字符常量");
                MakeTL(LineCount,strToken,"字符常量");
                break;
            case '"':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                while(ch!='"')
                {
                  strToken = strToken + ch;
                  ch = buffer[++lnscptr];
                }
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"字符串");
                MakeTL(LineCount,strToken,"字符串");
                break;

            case '=':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"逻辑等");
                 MakeTL(LineCount,strToken,"逻辑等");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"赋值号");
                 MakeTL(LineCount,strToken,"赋值号");
                }
                break;

            case '+':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"+=运算符");
                 MakeTL(LineCount,strToken,"+=运算符");
                }
                else if(ch=='+')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"递增运算符");
                 MakeTL(LineCount,strToken,"递增运算符");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"加号运算符");
                 MakeTL(LineCount,strToken,"加号运算符");
                }
                break;

            case '-':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"-=运算符");
                 MakeTL(LineCount,strToken,"-=运算符");
                }
                else if(ch=='>')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"指针运算符");
                 MakeTL(LineCount,strToken,"指针运算符");
                }
                else if(ch=='-')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"递减运算符");
                 MakeTL(LineCount,strToken,"递减运算符");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"减号运算符");
                 MakeTL(LineCount,strToken,"减号运算符");
                }
                break;

            case '*':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"*=运算符");
                 MakeTL(LineCount,strToken,"*=运算符");
                }
                else if(IsReserveWord(preToken))
                {
                 Print(LineCount,strToken,strgrdLine,"指针定义符");
                 MakeTL(LineCount,strToken,"指针定义符");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"乘号运算符");
                 MakeTL(LineCount,strToken,"乘号运算符");
                }
                break;

            case '/':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='/')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 while(ch!='\0')
                 {
                  strToken = strToken + ch;
                  ch = buffer[++lnscptr];
                 }
                 Print(LineCount,strToken,strgrdLine,"注释");
                }

                else if(ch=='*')
                {
           flag1:strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 while((prech!='*' || ch!='/') && ch!='\0')
                 {
                  strToken = strToken + ch;
                  ch = buffer[++lnscptr];
                  prech = buffer[lnscptr-1];

                 }
                 if(ch!='\0')
                   {strToken = strToken + ch;}

                 if(prech=='*' && ch=='/')
                 { comment = false;}
                 else
                 { comment = true;}

                 if(ch!='\0')
                   {ch = buffer[++lnscptr];}
                 Print(LineCount,strToken,strgrdLine,"注释");
                }

                else if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"/=运算符");
                 MakeTL(LineCount,strToken,"/=运算符");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"除号运算符");
                 MakeTL(LineCount,strToken,"除号运算符");
                }
                break;

            case '%':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"%=运算符");
                 MakeTL(LineCount,strToken,"%=运算符");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"模运算符");
                 MakeTL(LineCount,strToken,"模运算符");
                }
                break;

            case '<':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"小于等于号");
                 MakeTL(LineCount,strToken,"小于等于号");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"小于号");
                 MakeTL(LineCount,strToken,"小于号");
                }
                break;

            case '>':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"大于等于号");
                 MakeTL(LineCount,strToken,"大于等于号");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"大于号");
                 MakeTL(LineCount,strToken,"大于号");
                }
                break;

            case '!':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='=')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"不等于号");
                 MakeTL(LineCount,strToken,"不等于号");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"逻辑非");
                 MakeTL(LineCount,strToken,"逻辑非");
                }
                break;

            case '&':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='&')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"逻辑与");
                 MakeTL(LineCount,strToken,"逻辑与");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"位与运算符");
                 MakeTL(LineCount,strToken,"位与运算符");
                }
                break;

            case '|':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                if(ch=='|')
                {
                 strToken = strToken + ch;
                 ch = buffer[++lnscptr];
                 Print(LineCount,strToken,strgrdLine,"逻辑或");
                 MakeTL(LineCount,strToken,"逻辑或");
                }
                else
                {
                 Print(LineCount,strToken,strgrdLine,"位或运算符");
                 MakeTL(LineCount,strToken,"位或运算符");
                }
                break;

            case '^':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"异或运算符");
                MakeTL(LineCount,strToken,"异或运算符");
                break;

            case '[':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"左方括号");
                MakeTL(LineCount,strToken,"左方括号");
                break;
            case ']':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"右方括号");
                MakeTL(LineCount,strToken,"右方括号");
                break;
            case '(':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"左圆括号");
                MakeTL(LineCount,strToken,"左圆括号");
                break;
            case ')':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"右圆括号");
                MakeTL(LineCount,strToken,"右圆括号");
                break;
            case '{':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"左花括号");
                MakeTL(LineCount,strToken,"左花括号");
                break;
            case '}':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"右花括号");
                MakeTL(LineCount,strToken,"右花括号");
                break;
            case ',':
            case ';':
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"分界符");
                MakeTL(LineCount,strToken,"分界符");
                break;

            default:
                strToken = strToken + ch;
                ch = buffer[++lnscptr];
                Print(LineCount,strToken,strgrdLine,"其他特殊符号");
                MakeTL(LineCount,strToken,"其他特殊符号");
                break;
          }//switch
          strgrdLine++;
          strToken.Delete(1,strToken.Length());
        }//else

     }//_while buffer[]!='/0'
    }
 /*scan:important arithmetic*/
    LineCount++;
   } //_while getline
  scanFile.close();
}

void scan::Compress(char *zipFname)
{
   ofstream compress(zipFname,ios::app);

   for(int i=0; i<TokenList->Count; i++)
   {
        Anode = (Listnd)TokenList->Items[i];
        if(Anode->type=="预定义")
        {
         compress<<Anode->words.c_str()<<endl;
        }
        else if(Anode->type=="保留字")
        {
         compress<<Anode->words.c_str();
         compress<<" ";
        }
        else
        {
         compress<<Anode->words.c_str();
        }
   }
   compress.close();

}

本文地址:http://com.8s8s.com/it/it22568.htm