예제 #1
0
        private void Tokenize_Click(object sender, EventArgs e)
        {
            //taking user input from rich textbox
            String userInput = Input.Text;
            //List of keywords which will be used to seperate keywords from variables
            List <String> keywordList = new List <String>();

            keywordList.Add("int");
            keywordList.Add("float");
            keywordList.Add("while");
            keywordList.Add("main");
            keywordList.Add("if");
            keywordList.Add("else");
            keywordList.Add("new");
            //row is an index counter for symbol table
            int row = 1;

            //count is a variable to incremenet variable id in tokens
            int count = 1;

            //line_num is a counter for lines in user input
            int line_num = 0;

            //SymbolTable is a 2D array that has the following structure
            //[Index][Variable Name][type][value][line#]
            //rows are incremented with each variable information entry

            String[,] SymbolTable = new String[20, 6];
            List <String> varListinSymbolTable = new List <String>();

            //Input Buffering

            ArrayList finalArray  = new ArrayList();
            ArrayList finalArrayc = new ArrayList();
            ArrayList tempArray   = new ArrayList();

            char[] charinput = userInput.ToCharArray();

            //Regular Expression for Variables
            Regex variable_Reg = new Regex(@"^[A-Za-z|_][A-Za-z|0-9]*$");
            //Regular Expression for Constants
            Regex constants_Reg = new Regex(@"^[0-9]+([.][0-9]+)?([e]([+|-])?[0-9]+)?$");
            //Regular Expression for Operators
            Regex operators_Reg = new Regex(@"^[-*+/><&&||=]$");
            //Regular Expression for Special_Characters
            Regex Special_Reg = new Regex(@"^[.,'\[\]{}();:?]$");

            for (int itr = 0; itr < charinput.Length; itr++)
            {
                Match Match_Variable = variable_Reg.Match(charinput[itr] + "");
                Match Match_Constant = constants_Reg.Match(charinput[itr] + "");
                Match Match_Operator = operators_Reg.Match(charinput[itr] + "");
                Match Match_Special  = Special_Reg.Match(charinput[itr] + "");
                if (Match_Variable.Success || Match_Constant.Success || Match_Operator.Success || Match_Special.Success || charinput[itr].Equals(' '))
                {
                    tempArray.Add(charinput[itr]);
                }
                if (charinput[itr].Equals('\n'))
                {
                    if (tempArray.Count != 0)
                    {
                        int    j   = 0;
                        String fin = "";
                        for (; j < tempArray.Count; j++)
                        {
                            fin += tempArray[j];
                        }

                        finalArray.Add(fin);
                        tempArray.Clear();
                    }
                }
            }
            if (tempArray.Count != 0)
            {
                int    j   = 0;
                String fin = "";
                for (; j < tempArray.Count; j++)
                {
                    fin += tempArray[j];
                }
                finalArray.Add(fin);
                tempArray.Clear();
            }

            //  Final Array SO far correct
            Output.Clear();

            ST.Clear();

            //looping on all lines in user input
            for (int i = 0; i < finalArray.Count; i++)
            {
                String line = finalArray[i].ToString();
                //tfTokens.AppendText(line + "\n");
                char[] lineChar = line.ToCharArray();
                line_num++;
                //taking current line and splitting it into lexemes by space

                for (int itr = 0; itr < lineChar.Length; itr++)
                {
                    Match Match_Variable = variable_Reg.Match(lineChar[itr] + "");
                    Match Match_Constant = constants_Reg.Match(lineChar[itr] + "");
                    Match Match_Operator = operators_Reg.Match(lineChar[itr] + "");
                    Match Match_Special  = Special_Reg.Match(lineChar[itr] + "");
                    if (Match_Variable.Success || Match_Constant.Success)
                    {
                        tempArray.Add(lineChar[itr]);
                    }
                    if (lineChar[itr].Equals(' '))
                    {
                        if (tempArray.Count != 0)
                        {
                            int    j   = 0;
                            String fin = "";
                            for (; j < tempArray.Count; j++)
                            {
                                fin += tempArray[j];
                            }
                            finalArrayc.Add(fin);
                            tempArray.Clear();
                        }
                    }
                    if (Match_Operator.Success || Match_Special.Success)
                    {
                        if (tempArray.Count != 0)
                        {
                            int    j   = 0;
                            String fin = "";
                            for (; j < tempArray.Count; j++)
                            {
                                fin += tempArray[j];
                            }
                            finalArrayc.Add(fin);
                            tempArray.Clear();
                        }
                        finalArrayc.Add(lineChar[itr]);
                    }
                }
                if (tempArray.Count != 0)
                {
                    String fina = "";
                    for (int k = 0; k < tempArray.Count; k++)
                    {
                        fina += tempArray[k];
                    }

                    finalArrayc.Add(fina);
                    tempArray.Clear();
                }

                // we have asplitted line here



                for (int x = 0; x < finalArrayc.Count; x++)
                {
                    Match operators    = operators_Reg.Match(finalArrayc[x].ToString());
                    Match variables    = variable_Reg.Match(finalArrayc[x].ToString());
                    Match digits       = constants_Reg.Match(finalArrayc[x].ToString());
                    Match punctuations = Special_Reg.Match(finalArrayc[x].ToString());

                    if (operators.Success)
                    {
                        // if a current lexeme is an operator then make a token e.g. < op, = >
                        Output.AppendText("< op, " + finalArrayc[x].ToString() + "> ");
                    }
                    else if (digits.Success)
                    {
                        // if a current lexeme is a digit then make a token e.g. < digit, 12.33 >
                        Output.AppendText("< digit, " + finalArrayc[x].ToString() + "> ");
                    }
                    else if (punctuations.Success)
                    {
                        // if a current lexeme is a punctuation then make a token e.g. < punc, ; >
                        Output.AppendText("< punc, " + finalArrayc[x].ToString() + "> ");
                    }

                    else if (variables.Success)
                    {
                        // if a current lexeme is a variable and not a keyword
                        if (!keywordList.Contains(finalArrayc[x].ToString()))     // if it is not a keyword
                        {
                            // check what is the category of varaible, handling only two cases here
                            //Category1- Variable initialization of type digit e.g. int count = 10 ;
                            //Category2- Variable initialization of type String e.g. String var = ' Hello ' ;

                            Regex reg1      = new Regex(@"^(int|float|double)\s([A-Za-z|_][A-Za-z|0-9]{0,10})\s(=)\s([0-9]+([.][0-9]+)?([e][+|-]?[0-9]+)?)\s(;)$"); // line of type int alpha = 2 ;
                            Match category1 = reg1.Match(line);

                            Regex reg2      = new Regex(@"^(String|char)\s([A-Za-z|_][A-Za-z|0-9]{0,10})\s(=)\s[']\s([A-Za-z|_][A-Za-z|0-9]{0,30})\s[']\s(;)$"); // line of type String alpha = ' Hello ' ;
                            Match category2 = reg2.Match(line);

                            //if it is a category 1 then add a row in symbol table containing the information related to that variable

                            if (category1.Success)
                            {
                                SymbolTable[row, 1] = row.ToString();                //index

                                SymbolTable[row, 2] = finalArrayc[x].ToString();     //variable name

                                SymbolTable[row, 3] = finalArrayc[x - 1].ToString(); //type

                                SymbolTable[row, 4] = finalArrayc[x + 2].ToString(); //value

                                SymbolTable[row, 5] = line_num.ToString();           // line number

                                Output.AppendText("<var" + count + ", " + row + "> ");
                                ST.AppendText(SymbolTable[row, 1].ToString() + " \t ");
                                ST.AppendText(SymbolTable[row, 2].ToString() + " \t ");
                                ST.AppendText(SymbolTable[row, 3].ToString() + " \t ");
                                ST.AppendText(SymbolTable[row, 4].ToString() + " \t ");
                                ST.AppendText(SymbolTable[row, 5].ToString() + " \n ");
                                row++;
                                count++;
                            }
                            //if it is a category 2 then add a row in symbol table containing the information related to that variable
                            else if (category2.Success)
                            {
                                // if  a line such as String var = ' Hello ' ; comes and the loop moves to index of array containing Hello ,
                                //then this if condition prevents addition of Hello in symbol Table because it is not a variable it is just a string

                                if (!(finalArrayc[x - 1].ToString().Equals("'") && finalArrayc[x + 1].ToString().Equals("'")))

                                {
                                    SymbolTable[row, 1] = row.ToString();                // index

                                    SymbolTable[row, 2] = finalArrayc[x].ToString();     //varname

                                    SymbolTable[row, 3] = finalArrayc[x - 1].ToString(); //type

                                    SymbolTable[row, 4] = finalArrayc[x + 3].ToString(); //value

                                    SymbolTable[row, 5] = line_num.ToString();           // line number

                                    Output.AppendText("<var" + count + ", " + row + "> ");
                                    ST.AppendText(SymbolTable[row, 1].ToString() + " \t ");
                                    ST.AppendText(SymbolTable[row, 2].ToString() + " \t ");
                                    ST.AppendText(SymbolTable[row, 3].ToString() + " \t ");
                                    ST.AppendText(SymbolTable[row, 4].ToString() + " \t ");
                                    ST.AppendText(SymbolTable[row, 5].ToString() + " \n ");
                                    row++;
                                    count++;
                                }

                                else
                                {
                                    Output.AppendText("<String" + count + ", " + finalArrayc[x].ToString() + "> ");
                                }
                            }

                            else
                            {
                                // if any other category line comes in we check if we have initializes that varaible before,
                                // if we have initiazed it before then we put the index of that variable in symbol table, in its token
                                String ind = "Default";
                                String ty  = "Default";
                                String val = "Default";
                                String lin = "Default";
                                for (int r = 1; r <= SymbolTable.GetLength(0); r++)
                                {
                                    //search in the symbol table if variable entry already exists
                                    if (SymbolTable[r, 2].Equals(finalArrayc[x].ToString()))
                                    {
                                        ind = SymbolTable[r, 1];
                                        ty  = SymbolTable[r, 3];
                                        val = SymbolTable[r, 4];
                                        lin = SymbolTable[r, 5];
                                        Output.AppendText("<var" + ind + ", " + ind + "> ");

                                        break;
                                    }
                                }
                            }
                        }
                        // if a current lexeme is not a variable but a keyword then make a token such as: <keyword, int>
                        else
                        {
                            Output.AppendText("<keyword, " + finalArrayc[x].ToString() + "> ");
                        }
                    }
                }
                Output.AppendText("\n");
                finalArrayc.Clear();
            }
            #region Display Symbol Table
            for (int j = 0; j < Symboltable.Count; j++)
            {
                for (int z = 0; z < Symboltable[j].Count; z++)
                {
                    ST.AppendText(Symboltable[j][z] + "\t");
                }
                ST.AppendText("\n");
            }
            #endregion
        }