利用c++进行程序词法分析_c++ 编写词法分析-CSDN博客

本文链接：https://blog.csdn.net/m0_58138734/article/details/127140116

一、实验目的

实现简单的词法分析程序；

能够使编写的分析程序对简单的程序段进行词法分析

二、实验软硬件要求

相关软件：VC++2010 或者Dev-Cpp，推荐VC++2019

操作系统：windows操作系统

三、实验要求

1. 对单词的构词规则有明确的定义；

2. 编写的分析程序能够正确识别源程序中的单词符号，包括：标识符、关键字、常见运算符、分隔符、整数、小数、单行注释，字符常数，字符串常数；

3. 识别出的单词以<种别码，值>的形式输出或保存；

4. *对于源程序中的词法错误，能够做出简单的错误处理，给出简单的错误提示，保证顺利完成整个源程序的词法分析；（由于时间、精力受限，暂不解决）

5. *识别科学计数法数据，多行注释

四、实验内容

自定义一种程序设计语言，或者选择已有的一种高级语言，利用状态转换图编制它的词法分析程序。词法分析程序的实现可以采用任何一种编程工具。

五、实验步骤（程序代码，运行结果等）

//头文件
#include <iostream>
#include <map>
#include <algorithm>
#include <string>

#include <fstream>
#include <sstream>
#include <iostream>
#include <stdlib.h>


using namespace std;
string instr;//输入符号串
int index;//当前输入符号读入字符的位置
char character;//全局变量字符，存放最新读入的字符
string token;//字符数组，存放已读入的字符序列
const int len = 100;
string Reserve[len];//保留字表
string Boundary[2*len];//界符
string Operator[3 * len];//运算符
struct Binary {
	Binary(int c,string v="-") {
		category = c;
		value = v;
	}//使用自定义构造函数就能够单独初始化某些变量，而不需要全部变量必须整体赋值。
	int category; //种别码
	string value;//值
};
void init_Reserve() {//构造保留字表的函数
	Reserve[1] = "main";
	Reserve[2] = "int";
	Reserve[3] = "if";
	Reserve[4] = "else";
	Reserve[5] = "while";
	Reserve[6] = "for";
	Reserve[7] = "read";
	Reserve[8] = "write";
	Reserve[9] = "bool";
	Reserve[10] = "break";
	Reserve[11] = "case";
	Reserve[12] = "catch";
	Reserve[13] = "char";
	Reserve[14] = "class";
	Reserve[15] = "const";
	Reserve[16] = "continue";
	Reserve[17] = "default";
	Reserve[18] = "delete";
	Reserve[19] = "do";
	Reserve[20] = "double";
	Reserve[21] = "enum";
	Reserve[22] = "false";
	Reserve[23] = "true";
	Reserve[24] = "float";
	Reserve[25] = "friend";
	Reserve[26] = "goto";
	Reserve[27] = "inline";
	Reserve[28] = "long";
	Reserve[29] = "new";
	Reserve[30] = "private";
	Reserve[31] = "protected";
	Reserve[32] = "public";
	Reserve[33] = "return";
	Reserve[34] = "short";
	Reserve[35] = "signed";
	Reserve[36] = "sizeof";
	Reserve[37] = "static";
	Reserve[38] = "struct";
	Reserve[39] = "switch";
	Reserve[40] = "this";
	Reserve[41] = "try";
	Reserve[42] = "typedef";
	Reserve[43] = "unsigned";
	Reserve[44] = "using";
	Reserve[45] = "virtual";
	Reserve[46] = "void";
	Reserve[47] = "include";
	Reserve[48] = "iostream";
	Reserve[49] = "namespace";
	Reserve[50] = "std";
}
void init_Operator() {//初始化运算符表
	Operator[210] = "+";
	Operator[211] = "-";
	Operator[212] = "*";
	Operator[213] = "/";
	Operator[214] = "<";
	Operator[215] = "<=";
	Operator[216] = ">";
	Operator[217] = ">=";
	Operator[218] = "!=";
	Operator[219] = "==";
	Operator[220] = "=";
}
void init_Boundary() {//界符表初始化
	Boundary[121] = "(";
	Boundary[122] = ")";
	Boundary[123] = ",";
	Boundary[124] = ";";
	Boundary[125] = "{";
	Boundary[126] = "}";
	Boundary[127] = "#";
	Boundary[128] = "\'";
	Boundary[129] = "\"";
	Boundary[130] = "//";
	Boundary[131] = "/*";
	Boundary[132] = "*/";
}
bool isWs(){
	if(character==' ' || character == '\t' || character =='\n'||
		character=='\f'||character=='\v'||character=='\0')
		//已进行补充cf
		return true;
	else
		return false;
}
bool isOperator(){//运算符表
	for(int i=210;i<=220;i++)
		if(Operator[i][0]==character)
			return true;
	return false;
}
bool isBoundary(){//界符表
	for(int i=121;i<=132;i++)
		if(Boundary[i][0]==character)
			return true;
	return false;	
}
void getChar() {//读入一个字符
	character = instr[index++];
}
void getnbc() {//读入非空白字符
	while (isWs()) {
		getChar();//读取空格、tab、换行cf
	}
}
void concat() {//连接字符串
	token = token + character;
}

bool letter() {//判断是否为字母
	if ((character >= 'A'&&character <= 'Z') || (character >= 'a'&&character <= 'z'))
		return true;
	return false;
}
bool hex(){
	if ((character >= 'A'&&character <= 'F')||(character >= 'a'&&character <= 'f')|| (character >= '0'&&character <= '9'))
		return true;
	return false;
}
bool digit() {//判断是否为数字
	if (character >= '0'&&character <= '9')
		return true;
	return false;
}
void retract(){//回退字符的函数
	character = ' ';
	index--;
}
int reserve() {//匹配保留字符
	for (int i = 0; i < len; i++)
		if (Reserve[i] == token)return i;
	return -1;
}

int operator1() {//匹配运算符
	for (int i = 210; i <= 220; i++)
		if (Operator[i]==token)return i;
	return -1;
}

int boundary() {//匹配界符表
	for (int i = 121; i <= 132; i++)
		if (Boundary[i] == token)return i;
	return -1;
}

int digit1() {//匹配整数或浮点型
	int i=0;
	while (token[i] != '\0')
	{
		if (token[i] == '.')
			return 800;
		i++;
	}
	return 400;
}

bool findstring(string str1,string str2)
{
	int i = 0;
	string str3="";
	
	for (i = 0; str1[i+1] != '\0'; i++)
	{
		str3 = str1[i] + str1[i + 1] ;
		if (str3 == str2)
		{
			return true;
		}
		str3 = "";
	}
	return false;
}
Binary error() {
	cout << token << "\t-->\t该单词不合法" << endl;
	return Binary(0,"-");
}
//词法分析函数，逐个识别单词
Binary LexAnalyze() {
	token = "";
	getChar();
	getnbc(); //读取到第一个非空白的字符
	string val;
	int num = -1;
	switch (character) {
	
	case'a':
	case'b':
	case'c':
	case'd':
	case'e':
	case'f':
	case'g':
	case'h':
	case'i':
	case'j':
	case'k':
	case'l':
	case'm':
	case'n':
	case'o':
	case'p':
	case'q':
	case'r':
	case's':
	case't':
	case'u':
	case'v':
	case'w':
	case'x':
	case'y':
	case'z':
	case'A':
	case'B':
	case'C':
	case'D':
	case'E':
	case'F':
	case'G':
	case'H':
	case'I':
	case'J':
	case'K':
	case'L':
	case'M':
	case'N':
	case'O':
	case'P':
	case'Q':
	case'R':
	case'S':
	case'T':
	case'U':
	case'V':
	case'W':
	case'X':
	case'Y':
	case'Z':
		//识别关键字、标识符 
		concat();//追加到token末尾
		getChar();//读取下一个字符
		while (letter() || digit()) {//为字母或数字
			concat();//追加到token末尾
			getChar();//读取下一个字符
		}
		retract();//回退一个字符
		num = reserve();//查看保留字表,判断是否匹配
		if (num != -1) {
			return Binary(num,token);//找到关键字 
		}
		else {
			return Binary(700, token);//不是关键字，那就是标识符 
		}
		break;
	case'*':
		concat();//追加到token末尾
		num = operator1();
		return Binary(num, token);
		break;
	case'<':
	
	case'>':
	
	case'=':
	
	case'!':
	
	case'+':
	
	case'-':

	case'/':
		//识别运算符
		concat();//追加到token末尾
		getChar();//读取下一个字符
		if (character == '=')
			concat();	
		else retract();
		num = operator1();
		//判断单行注释
		if (token == "/" )
		{
			getChar();
			if (character == '/')
			{
				concat();
				num = boundary();
				getChar();
				while (character != '\n')
				{
					concat();
					getChar();
				}
				retract();
			}
		}
		//判断多行注释
		if (token == "/")
		{
			if (character == '*')
			{
				bool a = true;
				concat();
				num = boundary();
				getChar();
				string token2 = "*/";
				while (a) 
				{
					while (character != '*'&& isWs())
					{
						getChar();
						concat();
					}
					getChar();
					if (character == '/')
					{
						concat();
						a = false;
						break;
					}
					else retract();
				}
				
			}
		}
		return Binary(num, token);
		break;
	case'(':

	case')':
	
	case',':
	
	case';':
	
	case'{':

	case'}':
	
	case'#':
		concat();
		if (isBoundary())
		{
			num = boundary();
			return Binary(num, token);
		}
		break;
	case '\'':
		concat();
		getChar();
		if(character>='a'&&character<='z'|| character>='A'&& character<='Z')
		concat();
		else if (character == '\\')
		{
			concat();
			getChar();
			if (character == 'n' || character == 't' || character == 'r' || character == '0' || character == '\'' || character == '\"'
				|| character == '\\')
			{
				concat();
			}
		}
			
		getChar();
		if (character == '\'')
		{
			concat();
			return Binary(500, token);
		}
		else retract();
		num = boundary();
		return Binary(num, token);
		break;
	case '"':
		//识别字符串常量
		concat();
		getChar();
		while (character != '"')
		{
			concat();
			getChar();
		}
		concat();
		return Binary(600, token);
		break;
	case'0':
		//识别十六进制
		concat();
		getChar();
		if (character == 'x')
		{
			concat();
			getChar();
			while (hex())
			{
				concat();
				getChar();
			}
			retract();
			return Binary(160, token);
		}
		else {
			while (digit()) {//为数字
				concat();//追加到token末尾
				getChar();//读取下一个字符
			}
			//识别小数
			if (character == '.')
			{
				concat();
				getChar();
				while (digit())
				{
					concat();//追加到token末尾
					getChar();//读取下一个字符
				}
				if (character == 'e')
				{
					concat();
					getChar();
					while (digit())
					{
						concat();//追加到token末尾
						getChar();//读取下一个字符
					}
				}
			}
			retract();//回退一个字符
			num = digit1();//查看保留字表,判断是否匹配
			return Binary(num, token);//找到关键字 
		}
		break;
	case'1':
	case'2':
	case'3':
	case'4':
	case'5':
	case'6':
	case'7':
	case'8':
	case'9':

		//识别常数单词
		concat();//追加到token末尾
		getChar();//读取下一个字符
		while (digit()) {//为数字
			concat();//追加到token末尾
			getChar();//读取下一个字符
		}
		//识别小数
		if (character == '.')
		{
			concat();
			getChar();
			while (digit())
			{
				concat();//追加到token末尾
				getChar();//读取下一个字符
			}
			if (character == 'e'||character=='E')
			{
				concat();
				getChar();
				while (digit())
				{
					concat();//追加到token末尾
					getChar();//读取下一个字符
				}
			}
		}
		retract();//回退一个字符
		num = digit1();//查看保留字表,判断是否匹配
		return Binary(num, token);//找到关键字 
		break;
	case '\\':
		//识别转义字符
		concat();
		getChar();
		if (character == 'n' || character == 't' || character == 'r' || character == '0'|| character == '\''|| character == '\"'
			|| character == '\\')
		{
			concat();
			return Binary(500, token);
		}
		else {
			retract();
			num = boundary();
			return Binary(num, token);
		}
		break;
	default:
		//遇到无法识别的字符，提示错误信息 
		concat();
		return error();
	}
}
void show_table() {
	cout << "=================="<<"保留字"<<"==================" << endl;
	cout << "保留字符\t类别编码" << endl;
	for (int i = 0; i < len; i++) {
		if (Reserve[i] != "") {
			if(Reserve[i].size()>=8)
				cout << Reserve[i] << "\t" << i << endl;
			else
				cout << Reserve[i] << "\t\t" << i << endl;
		}
	}
	cout << "\n==================" << "界符" << "==================" << endl;
	cout << "界符\t\t类别编码" << endl;
	for (int i = 0; i < 2 * len; i++) {
		if (Boundary[i] != "") {
			cout << Boundary[i] << "\t\t" << i << endl;
		}
	}
	cout << "\n==================" << "运算符" << "==================" << endl;
	cout << "运算符\t\t类别编码" << endl;
	for (int i = 0; i < 3 * len; i++) {
		if (Operator[i] != "") {
			cout << Operator[i] << "\t\t" << i << endl;
		}
	}
}
//从文件读入到string里
string readFileIntoString(char * filename)
{
	ifstream ifile(filename);//读文件类的对象，并直接打开
	//将文件读入到ostringstream对象buf中
	ostringstream buf;
	char ch;
	while(buf&&ifile.get(ch))
	buf.put(ch);
	//返回与流对象buf关联的字符串
	return buf.str();
}

int main() {
	init_Reserve();//保留字表初始化
	init_Boundary();//界符表初始化
	init_Operator();//运算符表初始化
	index = 0;
	character = ' ';
	token = "";
	//文件名
	char fn1[] = "a.txt";
	char* fn=fn1;
	//CONST CHAR类型的值不能用于初始化CHAR类型的实体.cf
	string str;
	str=readFileIntoString(fn);
    instr = str;
	cout <<instr<<endl;//输出符号串
    //识别二元组初始化
	Binary word(0,"-");
	//循环进行词法分析直到识别所有单词符号
	cout << "\n------------------------识别结果------------------------" << endl;
	while (index < instr.size()) {
		word=LexAnalyze();
		if (word.category != 0) {
			cout << "识别单词:\t(" << word.category << "," << word.value << ")" << endl;
		}//种别码，值

	}
	//展示构造的各种词汇表
	cout << "\n------------------------词汇表展示------------------------\n" << endl;
	show_table();
 
	system("pause");
	return 0;
}

程序可以实现标识符、关键字、常见运算符、分隔符、整数、十六进制数、小数、单行注释，多行注释，字符常数，字符串常数、科学计数法数据的识别。

测试文本

/*
飞行荷兰人
*/
 //测试程序1*/
void main()
{
       0;
        int a12d=3.6e2;
        a=3.14;
        b=576;
       c=0182976;01234 +056;0+45;
       d=0xa48d+092;
        e+=1;
	int b="hello";
	a++;
	sum=a+b;//求两个数之和
	if(a >b+c)
		a='a';
	else
		a='\n';
}