问题描述预测分析算法LL(1):
L: Left-to-right scan of the tokens从左到右扫描
L: Leftmost derivation最左推导
(1): One token of lookahead只向前看一个token
输入若干个字符流形式的产生式,计算非终结符的First集和Follow集,并构造预测分析表。
算法设计2.1注意点
输入为字符流,应首先转为token流;
输入产生式可能很长,用‘’连接,最好全部转为最短形式,去掉‘’;
产生式、终结符、非终结符、First集、Follow集等应当用合适的数据结构存储,一次计算便写入,便于查找和调用;
First集和Follow集计算应采用递归算法。
2.2用到的算法
2.2.1First集
如果 X 是终结符号,那么FIRST(X)={X}
如果 X 是非终结符号,且 X -> Y1Y2Y3…Yk 是产生式
如果a在FIRST(Yi)中,且 ε 在FIRST(Y1),FIRST(Y2),…,FIRST(Yi-1)中,那么a也在FIRST(X)中;
如果ε 在FIRST(Y1),FIRST(Y2),…,FIRST(Yk)中,那么ε在FIRST(X)中;
如果X是非终结符号,且有X->ε,那么ε在FIRST(X)中。
2.2.2Follow集
将右端结束标记 $ 放到 Follow (S) 中 ;
按照下面两个规则不断迭代,知道所有的Follow集合都不再增长为止 ;
如果存在产生式A -> αBβ ,那么 FIRST(β)中所有非ε的符号都在Follow (B)中;
如果存在产生式A -> αB,或者A -> αBβ 且FIRST(β)包含ε,那么Follow (A)中的所有符号都加入到Follow (B)中。
2.2.3预测分析表
对于每个产生式A->α:
对于First(α)中每个终结符α,将A->α加入到Table[A,a]中;
如果ε在First(α)中,那么对于Follow(A)中的每个终结符b,将A->α加入到Table[A,b]中;如果ε在First(α)中,且$在Follow(A)中,也将A->α加入到Table[A,$]中。
测试结果为了方便验证结果的正确性,测试时采用了4组课件中提到的例题,结果对比如下:
图3-1例题1
图3-2例题1
图3-3例题2
图3-4例题2
图3-5例题3
图3-6例题3
图3-7例题4
图3-8例题4
图3-9例题4
经过多次调试修改,4个例题测试结果都已经正确,应当足以说明程序的可靠性。
实验代码123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542// LL(1)Parse_Tables.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。#include #include #include #include #include #include using namespace std;struct item{ //辅助数据类型 string str; vector vec;};class LL_1{public: vector- CA; //产生式condition_action vector T; //终结符 vector N; //非终结符 vector
- FIRST_SET; //非终结符FIRST(A)={tA->*tv} vector
- FOLLOW_SET; //哪些终结符会跟在非终结符后面FOLLOW(A)={tB->*wAtv} int PARSE_TABLE[100][100]; //分析表 void init(); //输入及初始化 void display_state(); //输出初始化后的状态 void display_first(); //输出first集 void display_follow(); //输出follow集 void display_table(); //输出LL_1分析表 bool isTerminal(string str); //判断是否为终结符 bool containEpsilon(string str); //判断first集是否包含epsilon int find_index(string str); //获取某个符号在终结符向量或非终结符向量中的下标 bool find_first(string str, item &new_item); // first集 bool find_follow(string str, item &new_item); // follow集 bool create_parse_table(int i); //构造预测分析表};void LL_1::display_state(){ //输出等价产生式、归纳出终结符和非终结符 int len1 = int(CA.size()); int len2 = int(T.size()); int len3 = int(N.size()); cout << "\n等价产生式:\n"; for (int i = 0; i < len1; i++) { cout << i + 1 << ". " << CA[i].str << " -> "; for (int j = 0; j < int(CA[i].vec.size()); j++) { cout << CA[i].vec[j] << " "; } cout << endl; } cout << "\n终结符:\n"; for (int i = 0; i < len2; i++) { cout << T[i]; if (i < len2 - 1) cout << " ,"; } cout << endl; cout << "\n非终结符:\n"; for (int i = 0; i < len3; i++) { cout << N[i]; if (i < len3 - 1) cout << " ,"; } cout << endl;}void LL_1::display_first(){ //输出first集 int len = int(FIRST_SET.size()); cout << "\n非终结符的FIRST集:\n"; for (int i = 0; i < len; i++) { int k = int(FIRST_SET[i].vec.size()); cout << "FIRST(" << FIRST_SET[i].str << ")={ "; for (int j = 0; j < k; j++) { cout << FIRST_SET[i].vec[j]; if (j < k - 1) cout << ","; } cout << " }\n"; }}void LL_1::display_follow(){ //输出follow集 int len = int(FOLLOW_SET.size()); cout << "\n非终结符的FOLLOW集:\n"; for (int i = 0; i < len; i++) { int k = int(FOLLOW_SET[i].vec.size()); cout << "FOLLOW(" << FOLLOW_SET[i].str << ")={ "; for (int j = 0; j < k; j++) { cout << FOLLOW_SET[i].vec[j]; if (j < k - 1) cout << ","; } cout << " }\n"; }}void LL_1::display_table(){ //输出分析表 cout << "\nLL(1)分析表:\n"; int len_T = int(T.size()); int len_N = int(N.size()); cout << setw(8) << " "; for (int i = 0; i < len_T; i++) { cout << setw(8) << T[i]; } cout << endl; for (int i = 0; i < len_N; i++) { cout << setw(8) << N[i]; for (int j = 0; j < len_T; j++) { if (PARSE_TABLE[i][j] == 0) cout << setw(8) << "x"; else cout << setw(8) << PARSE_TABLE[i][j]; } cout << endl; }}bool LL_1::create_parse_table(int i){ //构造分析表 int len_T = int(T.size()); int len_N = int(N.size()); int len_CA = int(CA.size()); int x = 0, y = 0, content = 0; // table的第x行,第y列,填入第content条产生式 content = i + 1; //从1开始 x = find_index(CA[i].str); string temp = CA[i].vec[0]; if (isTerminal(temp)) { if (temp != "epsilon") { y = find_index(temp); if (PARSE_TABLE[x][y] == content) return true; //同一内容写第二次,进入了递归死循环,直接跳出 else PARSE_TABLE[x][y] = content; } else { //是空产生式 vector &vec = FOLLOW_SET[x].vec; for (int j = 0; j < int(vec.size()); j++) { //遍历Follow集 y = find_index(vec[j]); if (PARSE_TABLE[x][y] == content) return true; //同一内容写第二次,进入了递归死循环,直接跳出 else PARSE_TABLE[x][y] = content; } } } else { //非终结符 int len = int(CA[i].vec.size()); int p = 1; bool flag = false; do { vector &vec = FIRST_SET[find_index(temp)].vec; int s = int(vec.size()); for (int j = 0; j < s; j++) { //遍历First集 if (vec[j] == "epsilon") { flag = true; continue; } y = find_index(vec[j]); if (PARSE_TABLE[x][y] == content) return true; //同一内容写第二次,进入了递归死循环,直接跳出 else PARSE_TABLE[x][y] = content; } if (p < len && flag) { temp = CA[i].vec[p]; if (!isTerminal(temp)) { for (int j = 0; j < len_CA; j++) { if (CA[j].str == temp) { create_parse_table(j); //函数进入递归 } } p++; } else { y = find_index(temp); if (PARSE_TABLE[x][y] == content) return true; //同一内容写第二次,进入了递归死循环,直接跳出 else PARSE_TABLE[x][y] = content; flag = false; } } if (p == len && flag) { //产生式右边所有都是非终结符,且都能推出epsilon vector &vec_temp = FOLLOW_SET[x].vec; for (int j = 0; j < int(vec_temp.size()); j++) { //遍历Follow集 y = find_index(vec_temp[j]); if (PARSE_TABLE[x][y] == content) return true; //同一内容写第二次,进入了递归死循环,直接跳出 else PARSE_TABLE[x][y] = content; } } } while (flag); } return true;}void LL_1::init(){ string str; vector v; cout << "请输入若干条形如‘ STMT -> if EXPR then STMT EXPR ’的产生式:\n"; cout << "(每个token之间用空格分隔,输入0结束)\n"; while (1) { getline(cin, str); //获取一行标准输入,即字符流形式的产生式 if (str == "0") break; int l = str.length(); vector index; int begin = 0; int end = 0; //以下将每一行化成若干个最短产生式 for (int i = 0; i < l; i++) { if (str[i] == '>' && i > 0) { if (str[i - 1] == '-') begin = i; } if (str[i] == '') { index.push_back(i); } } string prefix = str.substr(0, begin + 1); int len = int(index.size()); if (len == 0) v.push_back(str); else { for (int i = 0; i < len; i++) { if (i == 0) { v.push_back(str.substr(0, index[0])); end = index[0]; continue; } begin = index[i - 1]; end = index[i]; v.push_back(prefix + str.substr(begin + 1, end - begin - 1)); } v.push_back(prefix + str.substr(end + 1, l - end)); } } //以上将每一行化成若干个最短产生式,放到向量V中 //以下给token归类,分为终结符和非终结符 vector left, right; int len = int(v.size()); for (int i = 0; i < len; i++) { istringstream temp(v[i]); string out; int cal = 0; item new_item; while (temp >> out) { if (cal == 0) { left.push_back(out); new_item.str = out; } else if (cal == 1) { cal++; continue; } else { right.push_back(out); new_item.vec.push_back(out); } cal++; } CA.push_back(new_item); } int len1 = int(left.size()); int len2 = int(right.size()); vector::iterator it; for (int k = 0; k < len1; k++) { it = find(N.begin(), N.end(), left[k]); if (it == N.end()) //不存在 { N.push_back(left[k]); //产生式左边的一定是非终结符 } } for (int i = 0; i < len2; i++) { bool flag = true; //默认是终结符 for (int j = 0; j < len1; j++) { if (right[i] == left[j]) //产生式左边出现过 { flag = false; //是非终结符 break; } } if (flag) { it = find(T.begin(), T.end(), right[i]); if (it == T.end()) //不存在 { T.push_back(right[i]); } } else { it = find(N.begin(), N.end(), right[i]); if (it == N.end()) //不存在 { N.push_back(right[i]); } } } T.push_back("$"); for (int iter = 0; iter < int(T.size()); iter++) { if (T[iter] == "epsilon") { T.erase(T.begin() + iter); break; } }}bool LL_1::isTerminal(string str){ //判断某个token是否为终结符 if (str == "epsilon") return true; vector::iterator it; it = find(T.begin(), T.end(), str); if (it == T.end()) //不存在 { return false; } return true;}bool LL_1::containEpsilon(string str){ //判断某个非终结符的first集中是否包含epsilon for (int i = 0; i < int(CA.size()); i++) { if (CA[i].str == str) { string temp = CA[i].vec[0]; if (!isTerminal(temp)) containEpsilon(temp); else { if (temp == "epsilon") return true; } } } return false;}int LL_1::find_index(string str){ int len_N = int(N.size()); int len_T = int(T.size()); for (int i = 0; i < len_N; i++) { if (N[i] == str) return i; } for (int i = 0; i < len_T; i++) { if (T[i] == str) return i; } return -1;}bool LL_1::find_first(string str, item &new_item){ //计算某个非终结符的first集 for (int i = 0; i < int(CA.size()); i++) { if (CA[i].str == str) { string temp = CA[i].vec[0]; if (!isTerminal(temp)) { //非终结符 find_first(temp, new_item); if (containEpsilon(temp)) { //能产生epsilon int p = 1; int s = int(CA[i].vec.size()); while (s > p) { if (!isTerminal(CA[i].vec[p])) { if (containEpsilon(CA[i].vec[p])) { find_first(CA[i].vec[p], new_item); for (int iter = 0; iter < int(new_item.vec.size()); iter++) { if (new_item.vec[iter] == "epsilon") { new_item.vec.erase(new_item.vec.begin() + iter); break; } } p++; } else break; } else { new_item.vec.push_back(CA[i].vec[p]); break; } } if (p == s - 1) { vector::iterator it; for (int k = 0; k < s; k++) { it = find(N.begin(), N.end(), "epsilon"); if (it == N.end()) //不存在 { N.push_back("epsilon"); } } } } } else { //是终结符,直接加入,去重 vector::iterator it; it = find(new_item.vec.begin(), new_item.vec.end(), temp); if (it == new_item.vec.end()) //不存在 { new_item.vec.push_back(temp); } } } } return true;}bool LL_1::find_follow(string str, item &new_item){ //计算某个非终结符的follow集 if (str == CA[0].str) { //是第一个产生式的左边符号,加入结束符$ vector::iterator it; it = find(new_item.vec.begin(), new_item.vec.end(), "$"); if (it == new_item.vec.end()) //不存在 { //开始符号FOLLOW集中加入结束符$ new_item.vec.push_back("$"); } } for (int i = 0; i < int(CA.size()); i++) { //遍历所有产生式,在其右边找目标非终结符 int s = int(CA[i].vec.size()); for (int j = 0; j < s; j++) { if (CA[i].vec[j] == str) { //找到 if (j + 1 < s) // B->wAt { if (!isTerminal(CA[i].vec[j + 1])) //下一个是非终结符 { if (containEpsilon(CA[i].vec[j + 1])) // first集包含epsilon//B->wA find_follow(CA[i].str, new_item); // follow(A)=follow(B)+follow(A) find_first(CA[i].vec[j + 1], new_item); // follow(A)=first(A)-epsilon for (int iter = 0; iter < int(new_item.vec.size()); iter++) { if (new_item.vec[iter] == "epsilon") { new_item.vec.erase(new_item.vec.begin() + iter); break; } } } else { //下一个是终结符,直接加入,去重 vector::iterator it; it = find(new_item.vec.begin(), new_item.vec.end(), CA[i].vec[j + 1]); if (it == new_item.vec.end()) //不存在 { new_item.vec.push_back(CA[i].vec[j + 1]); } } } else // B->wA { // follow(A)=follow(B)+follow(A) if (CA[i].str != CA[i].vec[j]) //防止A->wA形式死循环 find_follow(CA[i].str, new_item); } } } } return true;}int main(){ LL_1 tag; tag.init(); tag.display_state(); int len = int(tag.N.size()); for (int i = 0; i < len; i++) { //遍历非终结符,逐个计算其first集 item new_item; item &new_ = new_item; new_item.str = tag.N[i]; tag.find_first(tag.N[i], new_); // FIRST tag.FIRST_SET.push_back(new_); } tag.display_first(); for (int i = 0; i < len; i++) { //遍历非终结符,逐个计算其follow集 item new_item; item &new_ = new_item; new_item.str = tag.N[i]; tag.find_follow(tag.N[i], new_); // FOLLOW tag.FOLLOW_SET.push_back(new_); } tag.display_follow(); for (int i = 0; i < int(tag.CA.size()); i++) tag.create_parse_table(i); //按产生式填写table tag.display_table(); return 0;}
实验总结本次实验相比较于之前的实验难度较大,用到的算法较多,情况判别也有些复杂,其中有3处要使用递归思想计算(First集和Follow集,以非终结符为单位进行计算;Parse_table,以最短产生式为单位进行计算),否则比较难处理。实验耗时较长,但好在完成度比较高,也达到了预期效果,能够实现对几若干条字符流形式的产生式进行自动分析计算:转为token流、列出等价最短产生式、自动归类终结符和非终结符、列出每个非终结符的First集和Follow集、列出准确的预测分析表,且有令人舒适的输出格式。不足之处:多次递归使用了较大的堆栈空间、预测分析表用了100*100的静态表存储…总的来讲本次实验让我对LL(1)算法有了深刻理解,帮助很大。