Example #1
0
        /**
         * Creates the backtrace table.  This is magic.  The basic idea is that the penalty table contains a set of penalty
         * values based on some strategically selected numbers.  I'm not quite sure what they are, but they help determine
         * the backtrace table values.  The backtrace table contains information used to help determine if words matched
         * (OK), were inserted (INSERTION), substituted (SUBSTITUTION), or deleted (DELETION).
         *
         * @param referenceItems  the ordered list of reference words
         * @param hypothesisItems the ordered list of hypothesis words
         * @return the backtrace table
         */
        int[,] CreateBacktraceTable <T>(LinkedList <T> referenceItems, LinkedList <T> hypothesisItems, IComparator comparator)
        {
            int[,] penaltyTable;
            int[,] backtraceTable;
            int penalty;
            int minPenalty;

            penaltyTable = new int[referenceItems.Count + 1, hypothesisItems.Count + 1];

            backtraceTable = new int[referenceItems.Count + 1, hypothesisItems.Count + 1];

            // Initialize the penaltyTable and the backtraceTable.  The
            // rows of each table represent the words in the reference
            // string.  The columns of each table represent the words in
            // the hypothesis string.
            //
            penaltyTable[0, 0]   = 0;
            backtraceTable[0, 0] = Ok;

            // The lower left of the tables represent deletions.  If you
            // think about this, a shorter hypothesis string will have
            // deleted words from the reference string.
            //
            for (var i = 1; i <= referenceItems.Count; i++)
            {
                penaltyTable[i, 0]   = DeletionPenalty * i;
                backtraceTable[i, 0] = Deletion;
            }

            // The upper right of the tables represent insertions.  If
            // you think about this, a longer hypothesis string will have
            // inserted words.
            //
            for (var j = 1; j <= hypothesisItems.Count; j++)
            {
                penaltyTable[0, j]   = InsertionPenalty * j;
                backtraceTable[0, j] = Insertion;
            }

            // Row-by-row, column-by-column, fill out the tables.
            // The goal is to keep the penalty for each cell to a
            // minimum.
            //
            for (var i = 1; i <= referenceItems.Count; i++)
            {
                for (var j = 1; j <= hypothesisItems.Count; j++)
                {
                    minPenalty = MaxPenalty;

                    // First assume that this represents a deletion.
                    //
                    penalty = penaltyTable[i - 1, j] + DeletionPenalty;
                    if (penalty < minPenalty)
                    {
                        minPenalty           = penalty;
                        penaltyTable[i, j]   = penalty;
                        backtraceTable[i, j] = Deletion;
                    }

                    // If the words match, we'll assume it's OK.
                    // Otherwise, we assume we have a substitution.
                    //
                    if (comparator.IsSimilar(referenceItems.ElementAt(i - 1), (hypothesisItems.ElementAt(j - 1))))
                    {
                        penalty = penaltyTable[i - 1, j - 1];
                        if (penalty < minPenalty)
                        {
                            minPenalty           = penalty;
                            penaltyTable[i, j]   = penalty;
                            backtraceTable[i, j] = Ok;
                        }
                    }
                    else
                    {
                        penalty = penaltyTable[i - 1, j - 1] + SubstitutionPenalty;
                        if (penalty < minPenalty)
                        {
                            minPenalty           = penalty;
                            penaltyTable[i, j]   = penalty;
                            backtraceTable[i, j] = Substitution;
                        }
                    }

                    // If you've made it this far, it should be obvious I
                    // have no idea what the heck this code is doing.  I'm
                    // just doing a transliteration.
                    //
                    penalty = penaltyTable[i, j - 1] + InsertionPenalty;
                    if (penalty < minPenalty)
                    {
                        minPenalty           = penalty;
                        penaltyTable[i, j]   = penalty;
                        backtraceTable[i, j] = Insertion;
                    }
                }
            }
            return(backtraceTable);
        }