一、实验目的
实现简单的词法分析程序;
能够使编写的分析程序对简单的程序段进行词法分析
二、实验软硬件要求
相关软件:VC++2010 或者Dev-Cpp,推荐VC++2019
操作系统:windows操作系统
三、实验要求
1. 对单词的构词规则有明确的定义;
2. 编写的分析程序能够正确识别源程序中的单词符号,包括:标识符、关键字、常见运算符、分隔符、整数、小数、单行注释,字符常数,字符串常数;
3. 识别出的单词以<种别码,值>的形式输出或保存;
4. *对于源程序中的词法错误,能够做出简单的错误处理,给出简单的错误提示,保证顺利完成整个源程序的词法分析;(由于时间、精力受限,暂不解决)
5. *识别科学计数法数据,多行注释
四、实验内容
自定义一种程序设计语言,或者选择已有的一种高级语言,利用状态转换图编制它的词法分析程序。词法分析程序的实现可以采用任何一种编程工具。
五、实验步骤(程序代码,运行结果等)
//头文件
#include <iostream>
#include <map>
#include <algorithm>
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <stdlib.h>
using namespace std;
string instr;//输入符号串
int index;//当前输入符号读入字符的位置
char character;//全局变量字符,存放最新读入的字符
string token;//字符数组,存放已读入的字符序列
const int len = 100;
string Reserve[len];//保留字表
string Boundary[2*len];//界符
string Operator[3 * len];//运算符
struct Binary {
Binary(int c,string v="-") {
category = c;
value = v;
}//使用自定义构造函数就能够单独初始化某些变量,而不需要全部变量必须整体赋值。
int category; //种别码
string value;//值
};
void init_Reserve() {//构造保留字表的函数
Reserve[1] = "main";
Reserve[2] = "int";
Reserve[3] = "if";
Reserve[4] = "else";
Reserve[5] = "while";
Reserve[6] = "for";
Reserve[7] = "read";
Reserve[8] = "write";
Reserve[9] = "bool";
Reserve[10] = "break";
Reserve[11] = "case";
Reserve[12] = "catch";
Reserve[13] = "char";
Reserve[14] = "class";
Reserve[15] = "const";
Reserve[16] = "continue";
Reserve[17] = "default";
Reserve[18] = "delete";
Reserve[19] = "do";
Reserve[20] = "double";
Reserve[21] = "enum";
Reserve[22] = "false";
Reserve[23] = "true";
Reserve[24] = "float";
Reserve[25] = "friend";
Reserve[26] = "goto";
Reserve[27] = "inline";
Reserve[28] = "long";
Reserve[29] = "new";
Reserve[30] = "private";
Reserve[31] = "protected";
Reserve[32] = "public";
Reserve[33] = "return";
Reserve[34] = "short";
Reserve[35] = "signed";
Reserve[36] = "sizeof";
Reserve[37] = "static";
Reserve[38] = "struct";
Reserve[39] = "switch";
Reserve[40] = "this";
Reserve[41] = "try";
Reserve[42] = "typedef";
Reserve[43] = "unsigned";
Reserve[44] = "using";
Reserve[45] = "virtual";
Reserve[46] = "void";
Reserve[47] = "include";
Reserve[48] = "iostream";
Reserve[49] = "namespace";
Reserve[50] = "std";
}
void init_Operator() {//初始化运算符表
Operator[210] = "+";
Operator[211] = "-";
Operator[212] = "*";
Operator[213] = "/";
Operator[214] = "<";
Operator[215] = "<=";
Operator[216] = ">";
Operator[217] = ">=";
Operator[218] = "!=";
Operator[219] = "==";
Operator[220] = "=";
}
void init_Boundary() {//界符表初始化
Boundary[121] = "(";
Boundary[122] = ")";
Boundary[123] = ",";
Boundary[124] = ";";
Boundary[125] = "{";
Boundary[126] = "}";
Boundary[127] = "#";
Boundary[128] = "\'";
Boundary[129] = "\"";
Boundary[130] = "//";
Boundary[131] = "/*";
Boundary[132] = "*/";
}
bool isWs(){
if(character==' ' || character == '\t' || character =='\n'||
character=='\f'||character=='\v'||character=='\0')
//已进行补充cf
return true;
else
return false;
}
bool isOperator(){//运算符表
for(int i=210;i<=220;i++)
if(Operator[i][0]==character)
return true;
return false;
}
bool isBoundary(){//界符表
for(int i=121;i<=132;i++)
if(Boundary[i][0]==character)
return true;
return false;
}
void getChar() {//读入一个字符
character = instr[index++];
}
void getnbc() {//读入非空白字符
while (isWs()) {
getChar();//读取空格、tab、换行cf
}
}
void concat() {//连接字符串
token = token + character;
}
bool letter() {//判断是否为字母
if ((character >= 'A'&&character <= 'Z') || (character >= 'a'&&character <= 'z'))
return true;
return false;
}
bool hex(){
if ((character >= 'A'&&character <= 'F')||(character >= 'a'&&character <= 'f')|| (character >= '0'&&character <= '9'))
return true;
return false;
}
bool digit() {//判断是否为数字
if (character >= '0'&&character <= '9')
return true;
return false;
}
void retract(){//回退字符的函数
character = ' ';
index--;
}
int reserve() {//匹配保留字符
for (int i = 0; i < len; i++)
if (Reserve[i] == token)return i;
return -1;
}
int operator1() {//匹配运算符
for (int i = 210; i <= 220; i++)
if (Operator[i]==token)return i;
return -1;
}
int boundary() {//匹配界符表
for (int i = 121; i <= 132; i++)
if (Boundary[i] == token)return i;
return -1;
}
int digit1() {//匹配整数或浮点型
int i=0;
while (token[i] != '\0')
{
if (token[i] == '.')
return 800;
i++;
}
return 400;
}
bool findstring(string str1,string str2)
{
int i = 0;
string str3="";
for (i = 0; str1[i+1] != '\0'; i++)
{
str3 = str1[i] + str1[i + 1] ;
if (str3 == str2)
{
return true;
}
str3 = "";
}
return false;
}
Binary error() {
cout << token << "\t-->\t该单词不合法" << endl;
return Binary(0,"-");
}
//词法分析函数,逐个识别单词
Binary LexAnalyze() {
token = "";
getChar();
getnbc(); //读取到第一个非空白的字符
string val;
int num = -1;
switch (character) {
case'a':
case'b':
case'c':
case'd':
case'e':
case'f':
case'g':
case'h':
case'i':
case'j':
case'k':
case'l':
case'm':
case'n':
case'o':
case'p':
case'q':
case'r':
case's':
case't':
case'u':
case'v':
case'w':
case'x':
case'y':
case'z':
case'A':
case'B':
case'C':
case'D':
case'E':
case'F':
case'G':
case'H':
case'I':
case'J':
case'K':
case'L':
case'M':
case'N':
case'O':
case'P':
case'Q':
case'R':
case'S':
case'T':
case'U':
case'V':
case'W':
case'X':
case'Y':
case'Z':
//识别关键字、标识符
concat();//追加到token末尾
getChar();//读取下一个字符
while (letter() || digit()) {//为字母或数字
concat();//追加到token末尾
getChar();//读取下一个字符
}
retract();//回退一个字符
num = reserve();//查看保留字表,判断是否匹配
if (num != -1) {
return Binary(num,token);//找到关键字
}
else {
return Binary(700, token);//不是关键字,那就是标识符
}
break;
case'*':
concat();//追加到token末尾
num = operator1();
return Binary(num, token);
break;
case'<':
case'>':
case'=':
case'!':
case'+':
case'-':
case'/':
//识别运算符
concat();//追加到token末尾
getChar();//读取下一个字符
if (character == '=')
concat();
else retract();
num = operator1();
//判断单行注释
if (token == "/" )
{
getChar();
if (character == '/')
{
concat();
num = boundary();
getChar();
while (character != '\n')
{
concat();
getChar();
}
retract();
}
}
//判断多行注释
if (token == "/")
{
if (character == '*')
{
bool a = true;
concat();
num = boundary();
getChar();
string token2 = "*/";
while (a)
{
while (character != '*'&& isWs())
{
getChar();
concat();
}
getChar();
if (character == '/')
{
concat();
a = false;
break;
}
else retract();
}
}
}
return Binary(num, token);
break;
case'(':
case')':
case',':
case';':
case'{':
case'}':
case'#':
concat();
if (isBoundary())
{
num = boundary();
return Binary(num, token);
}
break;
case '\'':
concat();
getChar();
if(character>='a'&&character<='z'|| character>='A'&& character<='Z')
concat();
else if (character == '\\')
{
concat();
getChar();
if (character == 'n' || character == 't' || character == 'r' || character == '0' || character == '\'' || character == '\"'
|| character == '\\')
{
concat();
}
}
getChar();
if (character == '\'')
{
concat();
return Binary(500, token);
}
else retract();
num = boundary();
return Binary(num, token);
break;
case '"':
//识别字符串常量
concat();
getChar();
while (character != '"')
{
concat();
getChar();
}
concat();
return Binary(600, token);
break;
case'0':
//识别十六进制
concat();
getChar();
if (character == 'x')
{
concat();
getChar();
while (hex())
{
concat();
getChar();
}
retract();
return Binary(160, token);
}
else {
while (digit()) {//为数字
concat();//追加到token末尾
getChar();//读取下一个字符
}
//识别小数
if (character == '.')
{
concat();
getChar();
while (digit())
{
concat();//追加到token末尾
getChar();//读取下一个字符
}
if (character == 'e')
{
concat();
getChar();
while (digit())
{
concat();//追加到token末尾
getChar();//读取下一个字符
}
}
}
retract();//回退一个字符
num = digit1();//查看保留字表,判断是否匹配
return Binary(num, token);//找到关键字
}
break;
case'1':
case'2':
case'3':
case'4':
case'5':
case'6':
case'7':
case'8':
case'9':
//识别常数单词
concat();//追加到token末尾
getChar();//读取下一个字符
while (digit()) {//为数字
concat();//追加到token末尾
getChar();//读取下一个字符
}
//识别小数
if (character == '.')
{
concat();
getChar();
while (digit())
{
concat();//追加到token末尾
getChar();//读取下一个字符
}
if (character == 'e'||character=='E')
{
concat();
getChar();
while (digit())
{
concat();//追加到token末尾
getChar();//读取下一个字符
}
}
}
retract();//回退一个字符
num = digit1();//查看保留字表,判断是否匹配
return Binary(num, token);//找到关键字
break;
case '\\':
//识别转义字符
concat();
getChar();
if (character == 'n' || character == 't' || character == 'r' || character == '0'|| character == '\''|| character == '\"'
|| character == '\\')
{
concat();
return Binary(500, token);
}
else {
retract();
num = boundary();
return Binary(num, token);
}
break;
default:
//遇到无法识别的字符,提示错误信息
concat();
return error();
}
}
void show_table() {
cout << "=================="<<"保留字"<<"==================" << endl;
cout << "保留字符\t类别编码" << endl;
for (int i = 0; i < len; i++) {
if (Reserve[i] != "") {
if(Reserve[i].size()>=8)
cout << Reserve[i] << "\t" << i << endl;
else
cout << Reserve[i] << "\t\t" << i << endl;
}
}
cout << "\n==================" << "界符" << "==================" << endl;
cout << "界符\t\t类别编码" << endl;
for (int i = 0; i < 2 * len; i++) {
if (Boundary[i] != "") {
cout << Boundary[i] << "\t\t" << i << endl;
}
}
cout << "\n==================" << "运算符" << "==================" << endl;
cout << "运算符\t\t类别编码" << endl;
for (int i = 0; i < 3 * len; i++) {
if (Operator[i] != "") {
cout << Operator[i] << "\t\t" << i << endl;
}
}
}
//从文件读入到string里
string readFileIntoString(char * filename)
{
ifstream ifile(filename);//读文件类的对象,并直接打开
//将文件读入到ostringstream对象buf中
ostringstream buf;
char ch;
while(buf&&ifile.get(ch))
buf.put(ch);
//返回与流对象buf关联的字符串
return buf.str();
}
int main() {
init_Reserve();//保留字表初始化
init_Boundary();//界符表初始化
init_Operator();//运算符表初始化
index = 0;
character = ' ';
token = "";
//文件名
char fn1[] = "a.txt";
char* fn=fn1;
//CONST CHAR类型的值不能用于初始化CHAR类型的实体.cf
string str;
str=readFileIntoString(fn);
instr = str;
cout <<instr<<endl;//输出符号串
//识别二元组初始化
Binary word(0,"-");
//循环进行词法分析直到识别所有单词符号
cout << "\n------------------------识别结果------------------------" << endl;
while (index < instr.size()) {
word=LexAnalyze();
if (word.category != 0) {
cout << "识别单词:\t(" << word.category << "," << word.value << ")" << endl;
}//种别码,值
}
//展示构造的各种词汇表
cout << "\n------------------------词汇表展示------------------------\n" << endl;
show_table();
system("pause");
return 0;
}
程序可以实现标识符、关键字、常见运算符、分隔符、整数、十六进制数、小数、单行注释,多行注释,字符常数,字符串常数、科学计数法数据的识别。
测试文本
/*
飞行荷兰人
*/
//测试程序1*/
void main()
{
0;
int a12d=3.6e2;
a=3.14;
b=576;
c=0182976;01234 +056;0+45;
d=0xa48d+092;
e+=1;
int b="hello";
a++;
sum=a+b;//求两个数之和
if(a >b+c)
a='a';
else
a='\n';
}
测试结果
由于打印出来的篇幅过长,就节选几个比较有代表性的。