import java.awt.*;
import java.util.Scanner;

/**
 * Created by s2002 on 2016/9/30.
 */
public class com {
    public static void main(String[] args) {

        //限制字符串最大值
        final int MAX_LEN = 100;

        //关键字
        String[] key_word = new String[]{"begin","end","if","then","while","do"};

        //录入用户输入
        Scanner input = new Scanner(System.in);
        System.out.print("Please input a string <end with '#'>:");
        String uString = input.nextLine();

        char[] analyseData = new char[MAX_LEN];

        int index = 0, key = 0;
        List list = new List();

        do {
            String compareStr = null;
            char temp = uString.charAt(index);
            list = extactCharacters(temp, analyseData, key_word, uString, index, compareStr);
            if (list.getItemCount() == 0) {
                index++;
                continue;
            }
            // 规定List的第一个元素为index,第二个元素为key
            index = Integer.parseInt(list.getItem(0));
            key = Integer.parseInt(list.getItem(1));
            String words = list.getItem(2);
            System.out.println("< " + key + " ," + words + " >");
        } while (key != 0);

    }


    public static List extactCharacters(char temp, char[] analyseDate, String[] keywords, String uString, int index,
                                        String compareStr) {

        int keyID = -1, m = 0;

        List list = new List();
        //判断下一个读入的字符是否为空格,若读取到空格则跳过,提取下一个字符进行判断
        while (temp != ' ') {
            //判断当前字符是字母或者数字和字母的组合
            if (temp >= 'a' && temp <= 'z') {
                m = 0;

                // 当读取到不是大小写字母或者数字时候判断为一个单词读取完成
                while (temp >= 'a' && temp <= 'z' || temp >= 'A' && temp <= 'Z' || temp >= '0' && temp <= '9') {
                    analyseDate[m++] = temp;
                    compareStr += temp + "";
                    temp = uString.charAt(++index);
                }

                // 与读取出来的字符判断是否为关键字
                compareStr = compareStr.substring(4);
                for (int i = 0; i < 6; i++) {
                    if (compareStr.equals(keywords[i])) {
                        keyID = i + 1;
                        list.add(index + "");
                        list.add(keyID + "");
                        list.add(compareStr);
                        return list;
                    }
                }

                //若为非关键字就当作为标识符
                keyID = 10;
                list.add(index + "");
                list.add(keyID + "");
                list.add(compareStr);
                return list;
            }

            //判断当前字符是否为数字
            else if (temp >= '0' && temp <= '9') {
                m = 0;
                String tempTokens = null;
                // 对后面的字符进行判断是否为数字
                while (temp >= '0' && temp <= '9') {
                    analyseDate[m++] = temp;
                    tempTokens += temp;
                    temp = uString.charAt(++index);
                }
                // 不是数字则返回种别码,结束当前方法
                keyID = 11;
                tempTokens = tempTokens.substring(4);
                list.add(index + "");
                list.add(keyID + "");
                list.add(tempTokens + "");
                return list;
            }
            m = 0;
            //判断当前字符是否为其他关系运算符
            String token = null;
            switch (temp) {
                case '<':
                    // String token = null;
                    analyseDate[m++] = temp;
                    token += temp;
                    if (uString.charAt(++index) == '=') {
                        analyseDate[m++] = temp;
                        keyID = 22;
                        token += uString.charAt(index++);
                    } else if (uString.charAt(++index) == '>') {
                        analyseDate[m++] = temp;
                        keyID = 21;
                        token += uString.charAt(index++);
                    } else {
                        keyID = 23;
                    }
                    list.add(index + "");
                    list.add(keyID + "");
                    token = token.substring(4);
                    list.add(token);
                    return list;
                case '>':
                    analyseDate[m++] = temp;
                    token += temp;
                    if (uString.charAt(++index) == '=') {
                        keyID = 24;
                        analyseDate[m++] = temp;
                        token += uString.charAt(index++);
                    } else {
                        keyID = 20;
                    }
                    list.add(index + "");
                    list.add(keyID + "");
                    token = token.substring(4);
                    list.add(token);
                    return list;
                case ':':

                    analyseDate[m++] = temp;
                    token += temp;
                    if (uString.charAt(++index) == '=') {
                        keyID = 18;
                        // analyseDate[m++] = temp;
                        analyseDate[m++] = uString.charAt(index);
                        token += uString.charAt(index++);
                    } else {
                        keyID = 17;
                    }
                    list.add(index + "");
                    list.add(keyID + "");
                    token = token.substring(4);
                    list.add(token);
                    return list;
                case '*':
                    keyID = 13;
                    break;
                case '/':
                    keyID = 14;
                    break;
                case '+':
                    keyID = 15;
                    break;
                case '-':
                    keyID = 16;
                    break;
                case '=':
                    keyID = 25;
                    break;
                case ';':
                    keyID = 26;
                    break;
                case '(':
                    keyID = 27;
                    break;
                case ')':
                    keyID = 28;
                    break;
                case '#':
                    keyID = 0;
                    break;
                default:
                    keyID = -1;
                    break;
            }
            analyseDate[m++] = temp;
            list.add(++index + "");
            list.add(keyID + "");
            list.add(temp + "");
            return list;
        }
        return list;
    }
}

程序功能:可以模拟将用户输入的代码分解成词,进行词法分析,并将其字符与种别码对应输出。

种别码表:

单词符号

种别码

单词符号

种别码

begin

1

:

17

if

2

:=

18

then

3

<

20

while

4

<=

21

do

5

<>

22

end

6

>

23

l(l|d)*

10

>=

24

dd*

11

=

25

+

13

;

26

-

14

(

27

*

15

)

28

/

16

#

0