/** * Creates the backtrace table. This is magic. The basic idea is that the penalty table contains a set of penalty * values based on some strategically selected numbers. I'm not quite sure what they are, but they help determine * the backtrace table values. The backtrace table contains information used to help determine if words matched * (OK), were inserted (INSERTION), substituted (SUBSTITUTION), or deleted (DELETION). * * @param referenceItems the ordered list of reference words * @param hypothesisItems the ordered list of hypothesis words * @return the backtrace table */ int[,] CreateBacktraceTable <T>(LinkedList <T> referenceItems, LinkedList <T> hypothesisItems, IComparator comparator) { int[,] penaltyTable; int[,] backtraceTable; int penalty; int minPenalty; penaltyTable = new int[referenceItems.Count + 1, hypothesisItems.Count + 1]; backtraceTable = new int[referenceItems.Count + 1, hypothesisItems.Count + 1]; // Initialize the penaltyTable and the backtraceTable. The // rows of each table represent the words in the reference // string. The columns of each table represent the words in // the hypothesis string. // penaltyTable[0, 0] = 0; backtraceTable[0, 0] = Ok; // The lower left of the tables represent deletions. If you // think about this, a shorter hypothesis string will have // deleted words from the reference string. // for (var i = 1; i <= referenceItems.Count; i++) { penaltyTable[i, 0] = DeletionPenalty * i; backtraceTable[i, 0] = Deletion; } // The upper right of the tables represent insertions. If // you think about this, a longer hypothesis string will have // inserted words. // for (var j = 1; j <= hypothesisItems.Count; j++) { penaltyTable[0, j] = InsertionPenalty * j; backtraceTable[0, j] = Insertion; } // Row-by-row, column-by-column, fill out the tables. // The goal is to keep the penalty for each cell to a // minimum. // for (var i = 1; i <= referenceItems.Count; i++) { for (var j = 1; j <= hypothesisItems.Count; j++) { minPenalty = MaxPenalty; // First assume that this represents a deletion. // penalty = penaltyTable[i - 1, j] + DeletionPenalty; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i, j] = penalty; backtraceTable[i, j] = Deletion; } // If the words match, we'll assume it's OK. // Otherwise, we assume we have a substitution. // if (comparator.IsSimilar(referenceItems.ElementAt(i - 1), (hypothesisItems.ElementAt(j - 1)))) { penalty = penaltyTable[i - 1, j - 1]; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i, j] = penalty; backtraceTable[i, j] = Ok; } } else { penalty = penaltyTable[i - 1, j - 1] + SubstitutionPenalty; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i, j] = penalty; backtraceTable[i, j] = Substitution; } } // If you've made it this far, it should be obvious I // have no idea what the heck this code is doing. I'm // just doing a transliteration. // penalty = penaltyTable[i, j - 1] + InsertionPenalty; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i, j] = penalty; backtraceTable[i, j] = Insertion; } } } return(backtraceTable); }