ZZU编译原理实验二(语法分析)

164 阅读3分钟

ZZU编译原理实验二(语法分析)自上而下的预测分析程序

输入LL(1)文法,输出每个产生式的first集合follow集,输出预测分析表。

输入待匹配的字符串,输出分析过程及是否匹配的信息(因为java比较方便,就用java语言写的)

版本2.修改总控程序

import java.io.BufferedInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;

public class Main {
	public static void main(String[] args) {
		Scanner sc = new Scanner(new BufferedInputStream(System.in));
		ArrayList<Production> prods = new ArrayList<Production>();
		Production S = new Production();
		HashMap<Character, Integer> mp = new HashMap<Character, Integer>();
		System.out.print("请输入LL(1)文法的产生式个数及产生式:");
		int cnt = sc.nextInt(); sc.nextLine();
		for (int i = 0; i < cnt; ++i) { //读入LL(1)文法
			String prodstr = sc.nextLine(); 			
			Production prod = new Production(prodstr);
			mp.put(prod.left, i); //键值对,为了有非终结符快速找到其产生式id
			prods.add(prod);
			S.term.addAll(prods.get(prods.size() - 1).term);
			S.notterm.addAll(prods.get(prods.size() - 1).notterm);
		}
		System.out.println("终结符 与 非终结符");
		S.output(); //第一步,输出 终结符 && 非终结符
		//第二步,输出每个非终结符的 First集 && Fellow集。下面为求FIRST集做准备
		ArrayList<String> activityQueue = new ArrayList<String>();
		for (int i = prods.size() - 1; i >= 0; --i) { //遍历产生式
			for (String candidate : prods.get(i).right) { //遍历候选式
				for (int j = 0; j < candidate.length(); j++) { //判断首符
					Character ch = candidate.charAt(j);
					if(ch >= 'A' && ch <= 'Z') {
						prods.get(i).first.addAll(prods.get(mp.get(ch).intValue()).first);
						activityQueue.add(prods.get(i).left + ch.toString()); //待将First(ch)并入First(A)
						if (!prods.get(mp.get(ch).intValue()).epsilon) {
							break;
						}	
					}
					else {
						prods.get(i).first.add(ch); //版本2,修改bug
						break;
					}
				}
			}
		}
		while(true) { //会收敛的,求first集
			boolean fg = true;
			for (String AB : activityQueue) {
				int idxA= mp.get(AB.charAt(0)).intValue(), idxB = mp.get(AB.charAt(1));
				int oldcnt = prods.get(idxA).first.size();
				prods.get(idxA).first.addAll(prods.get(idxB).first);
				if(prods.get(idxA).first.size() > oldcnt) fg = false;
			}
			if(fg) break;
		}
		System.out.println("\nFIRST集:"); //输出FIRST集
		for (Production prod : prods) {
			System.out.print(prod.left + ":");
			for (Character ch : prod.first) {
				System.out.print(" " + ch);
			}
			System.out.println();
		}
		//求FOLLOW集
		if(prods.size() > 0) prods.get(0).follow.add('#'); //规则1
		for (Production prod : prods) { //遍历每个产生式  //规则2
			for (int i = 0; i < prod.right.length; ++i) { //遍历候选式
				for (int j = prod.right[i].length() - 2; j >= 0; --j) {
					Character ch = prod.right[i].charAt(j), beta = prod.right[i].charAt(j + 1);					
					if(ch >= 'A' && ch <= 'Z') {
						int idxB = mp.get(ch).intValue();
						if (beta < 'A' || beta > 'Z') { 
							prods.get(idxB).follow.add(beta); 
						} // first(beta) => follow(B)
						else prods.get(idxB).follow.addAll(prods.get(mp.get(beta).intValue()).first);
					}
					else break;//终结符早已加进去
				}
			}			
			if(prod.follow.contains('ε')) {
				prod.follow.remove('ε');
			} 
		}
		activityQueue.clear(); 
		for (Production prod : prods) { //遍历每个产生式  //规则3
			for (int i = 0; i < prod.right.length; ++i) { //遍历候选式				
				for (int j = prod.right[i].length() - 1; j >= 0; --j) {
					Character ch = prod.right[i].charAt(j); 
					if (ch >= 'A' && ch <= 'Z') {
						if(ch != prod.left)	activityQueue.add("" + prod.left + ch);
						if(!prods.get(mp.get(ch).intValue()).first.contains('ε')) break;					
					}
					else break;
				}
			}
		}
		while (true) {
			boolean fg = true;
			for (String AB : activityQueue) {
				int idxA= mp.get(AB.charAt(0)).intValue(), idxB = mp.get(AB.charAt(1));
				int oldcnt = prods.get(idxB).follow.size();
				prods.get(idxB).follow.addAll(prods.get(idxA).follow);
				if(prods.get(idxB).follow.size() > oldcnt) fg = false;
			}
			if(fg) break;
		}
		System.out.println("\nFOLLOW集:"); //输出FOLLOW集
		for (Production prod : prods) {
			System.out.print(prod.left + ":");
			for (Character ch : prod.follow) {
				System.out.print(" " + ch);
			}
			System.out.println();
		}	
		//第三步,打印预测分析表。为了求预测分析表,要求出每个候选式首选符
		HashMap<String, HashSet<Character>> candiFirst = new HashMap<String, HashSet<Character> >();
		S.term.remove('ε');
		S.term.add('#');
		for (Production prod : prods) {
			for (String candi : prod.right) { //每一个候选式
				candiFirst.put(candi, new HashSet<Character>());
				for (int i = 0; i < candi.length(); ++i) {
					if (Character.isUpperCase(candi.charAt(i))) {
						candiFirst.get(candi).addAll(prods.get(mp.get(candi.charAt(i)).intValue()).first);
						if(!prods.get(mp.get(candi.charAt(i)).intValue()).epsilon) break;						
					}
					else {
						candiFirst.get(candi).add(candi.charAt(i));
						break;
					}
				}
			}
		}
		System.out.println("\n该文法的预测分析表:");
		HashMap<String, String> M = new HashMap<String, String>();
		for(Character ch : S.term) {
			System.out.print("\t" + ch);
		}
		System.out.println();
		for (Production prod : prods) { //遍历产生式
			System.out.print(prod.left); //非终结符
			for(Character ch : S.term) { //预测分析表的行字段
				if(prod.first.contains(ch)) { //非终结符的first包含终结符ch ?
					for (String candi : prod.right) { //判断 非终结符的那个候选式的首符集包含终结符ch
						if(candiFirst.get(candi).contains(ch)) {
							System.out.print("\t" + prod.left + "->" + candi);
							M.put("" + prod.left + ch, candi);
							break;
						}
					}
				}
				else {
					if(!prod.first.contains('ε') || !prod.follow.contains(ch)) System.out.print("\t");
					else {
						System.out.print("\t" + prod.left + "->" + 'ε');
						M.put("" + prod.left + ch, "ε");
					}
				}
			}
			System.out.println();
		}
		//第四步,输入字符串(句型),判断其是否符合该文法
		System.out.println();
		ArrayList<Character> charStack = new ArrayList<Character>();
		while(sc.hasNextLine()) { 
			charStack.clear(); charStack.add('#');
			if(prods.size() > 0) charStack.add(prods.get(0).left); //开始符
			String str = sc.nextLine();
			int idx = 0;
			str += '#';
			boolean fg = false;
			int stepCnt = 0;
			System.out.println("步骤\t分析栈\t\t输入串\t\t所用产生式");
			while(true) {
				System.out.print("" + stepCnt++ + "\t" + list2string(charStack) + "\t\t" + str.substring(idx) + "\t\t");
				Character X = charStack.remove(charStack.size() - 1);
				if(!Character.isUpperCase(X)) { //终结符
					if(X == str.charAt(idx)) {
						if(X.equals('#')) {							
							fg = true; break;} //规则1
						else idx++;//规则2
					}
					else break; //不匹配					
				}
				else { //非终结符
					if(M.keySet().contains("" + X + str.charAt(idx))) {
						String candi = M.get("" + X + str.charAt(idx));
						System.out.print(X + "->" + candi);
						if(!M.get("" + X + str.charAt(idx)).equals("ε")) {							
							for (int i = candi.length() - 1; i >= 0; --i) {
								charStack.add(candi.charAt(i)); //入分析栈
							}
						}
					}
					else if(!prods.get(mp.get(X).intValue()).first.contains('ε')) break;//X用空替换
					else System.out.print(X + "->ε");
				}
				System.out.println();
			}			
			System.out.println("\n" + (fg ? "YES" : "NO"));
		}
		sc.close(); //Window下Ctrl + Z 结束输入
	}	
	static String list2string(ArrayList<Character> list) { //便于输出格式优雅
		String ret = new String();
		for (Character ch : list) ret += ch;
		return ret;
	}
}

\

import java.util.HashSet;

public class Production { //产生式(每一个非终结符都有一个产生式)
	public Character left; //产生式左部
	public String[] right; //产生式右部
	public HashSet<Character> term, notterm, first, follow;
	public boolean epsilon; //非终结符是否含有epsilon候选式 
	public Production() {
		left = new Character('\0');
		right = new String[0];
		term = new HashSet<Character>();
		notterm = new HashSet<Character>();
		first = new HashSet<Character>();
		follow = new HashSet<Character>();
		epsilon = false; 
	}
	public Production(String rule) {
		int guard = rule.indexOf("->");
		left = rule.charAt(0);
		right = rule.substring(guard + 2).split("\\|"); //正则表达式
		term = new HashSet<Character>();
		notterm = new HashSet<Character>();
		classify();
		first = initFirst();
		follow = new HashSet<Character>();
	}
	public void output() { //输出终结符与非终结符
		for (Character ch : term) {
			System.out.print(ch + " ");
		}
		System.out.println();
		for (Character ch : notterm) {
			System.out.print(ch + " ");
		}
		System.out.println();
	}
	private void classify() { //识别终结符、非终结符
		for (int i = right.length - 1; i >= 0; --i) {
			if(right[i].equals("ε")) epsilon = true;
			for (int j = right[i].length() - 1; j >= 0; --j) {
				Character ch = right[i].charAt(j);
				if(ch >= 'A' && ch <= 'Z') {
					notterm.add(ch);
				}
				else {
					term.add(ch);
				}
			}
		}
	}
	private HashSet<Character> initFirst() { //获得部分first集
		HashSet<Character> set = new HashSet<Character>();
		for (String prod : right) {
			Character ch = prod.charAt(0);
			if(ch < 'A' || ch > 'Z') {
				set.add(ch);
			}
		}
		return set;
	}
}

输入样例

5
E->TB
B->+TB|ε
T->FP
P->*FP|ε
F->(E)|i

i*i+i

3
S->AB
A->a+b
B->a*b

3
S->Abc
A->a|ε
B->b|ε

abc
bc

3
S->Ab
A->a|B|ε
B->b|ε

非LL(1)文法



\

\

\