} // SrSlSlSr public static Dictionary <DeltaKey, int> CreateScanVerticalDeltaTable() { Dictionary <DeltaKey, int> deltaTable = CreateInitialTable(-1); // Want to match lefts and rights mostly DeltaKey key = new DeltaKey("Su", "Su"); deltaTable[key] = 1; key = new DeltaKey("Sd", "Sd"); deltaTable[key] = 1; foreach (Token y in tokens) { // Penalise any kind of insertions key = new DeltaKey("", Wordbook.TokenToString(y)); deltaTable[key] = -10; // and deletions key = new DeltaKey(Wordbook.TokenToString(y), ""); deltaTable[key] = -10; } return(deltaTable); } // SuSdSdSu
} // SuSdSdSu public static Dictionary <DeltaKey, int> CreateScanCornerDeltaTable() { Dictionary <DeltaKey, int> deltaTable = CreateInitialTable(-5); // Want to match lefts and rights, ups and down DeltaKey key = new DeltaKey("Su", "Su"); deltaTable[key] = 1; key = new DeltaKey("Sd", "Sd"); deltaTable[key] = 1; key = new DeltaKey("Sl", "Sl"); deltaTable[key] = 1; key = new DeltaKey("Sr", "Sr"); deltaTable[key] = 1; foreach (Token y in tokens) { // Penalise any kind of insertions key = new DeltaKey("", Wordbook.TokenToString(y)); deltaTable[key] = -10; // and deletions key = new DeltaKey(Wordbook.TokenToString(y), ""); deltaTable[key] = -10; } return(deltaTable); } // SrSlSuSd, and all variants
public static Dictionary <DeltaKey, int> CreateCompareVerticalDeltaTable() // SuSdSuSd { Dictionary <DeltaKey, int> deltaTable = CreateInitialTable(-2); // Want to match lefts and rights mostly, perfect match == 8 DeltaKey key = new DeltaKey("Su", "Su"); deltaTable[key] = 2; key = new DeltaKey("Sd", "Sd"); deltaTable[key] = 2; foreach (Token y in tokens) { // Penalise any kind of insertions key = new DeltaKey("", Wordbook.TokenToString(y)); deltaTable[key] = -2; // Penalise any kind of deletion key = new DeltaKey(Wordbook.TokenToString(y), ""); deltaTable[key] = -2; } // Insertions of short lefts and rights are okay key = new DeltaKey("", "Sl"); deltaTable[key] = -1; key = new DeltaKey("", "Sr"); deltaTable[key] = -1; // Therefore, for one insertion of an up or down, the threshold will be 7 or higher. // Perfect match is 8, minus 1 for 1 insertion of an Su or Sd. return(deltaTable); }
} // SrSrSrSrLl public static Dictionary <DeltaKey, int> CreateLongLineDeltaTable() { Dictionary <DeltaKey, int> deltaTable = CreateInitialTable(-1); DeltaKey key = new DeltaKey("Sr", "Sr"); deltaTable[key] = 5; key = new DeltaKey("Sr", "Mr"); deltaTable[key] = 2; key = new DeltaKey("Sr", "Sl"); deltaTable[key] = 0; key = new DeltaKey("Ll", "Ll"); deltaTable[key] = 10; key = new DeltaKey("Ll", ""); deltaTable[key] = -10; foreach (Token y in tokens) { key = new DeltaKey("", Wordbook.TokenToString(y)); deltaTable[key] = -10; } return(deltaTable); } // SrSrSrSrLl
// Returns a delta table all initialised to -1. public static Dictionary <DeltaKey, int> CreateInitialTable(int initialValue) { Dictionary <DeltaKey, int> deltaTable = new Dictionary <DeltaKey, int>(); // Add all possibilities foreach (Token x in tokens) { foreach (Token y in tokens) { DeltaKey key = new DeltaKey(Wordbook.TokenToString(x), Wordbook.TokenToString(y)); deltaTable[key] = initialValue; } } // Add epsilons for deletion and insertions foreach (Token y in tokens) { DeltaKey key = new DeltaKey("", Wordbook.TokenToString(y)); deltaTable[key] = -1; } foreach (Token x in tokens) { DeltaKey key = new DeltaKey(Wordbook.TokenToString(x), ""); deltaTable[key] = -1; } return(deltaTable); }
// Local Alignment for ATOMS via Smith–Waterman algorithm // Returns a list tuples that show the position (startIndex, length) of every match of x in the longer string y, (that is above the threshold). static public List <Tuple <int, int> > GetLocationsOfLocalMatches(List <Token> x, List <Token> y, Dictionary <DeltaKey, int> delta, int threshold) { // Create table //############# int numColumns = x.Count + 1; int numRows = y.Count + 1; int[,] table = new int[numColumns, numRows]; // Zero out first row and column. for (int column = 0; column < numColumns; column++) { table[column, 0] = 0; } for (int row = 0; row < numRows; row++) { table[0, row] = 0; } // Starting at (1, 1) going left to right, to bottom fill in the spaces in the table. for (int row = 1; row < numRows; row++) { for (int column = 1; column < numColumns; column++) { int left = table[column - 1, row] + delta[new DeltaKey(Wordbook.TokenToString(x[column - 1]), "")]; // deletion int top = table[column, row - 1] + delta[new DeltaKey("", Wordbook.TokenToString(y[row - 1]))];; // insertion int topLeft = table[column - 1, row - 1] + delta[new DeltaKey(Wordbook.TokenToString(x[column - 1]), Wordbook.TokenToString(y[row - 1]))]; // match or replace table[column, row] = Max(left, top, topLeft, 0); } } //PrintTable(table, numColumns, numRows); Tuple <int, int> maxLocation = FindMaxInTable(table, numColumns, numRows); // Display all substring matches that are above the threshold var startingLocations = FindMatchLocationsAboveThreshold(table, numColumns, numRows, threshold); List <Tuple <int, int> > subStringLocations = new List <Tuple <int, int> >(); foreach (var location in startingLocations) { Tuple <int, int> subStringPosition = FindMatchingSubstring(table, location, x, y, delta); int subStringLength = subStringPosition.Item2 - subStringPosition.Item1; Tuple <int, int> subStringLocation = new Tuple <int, int>(subStringPosition.Item1, subStringLength); subStringLocations.Add(subStringLocation); } return(subStringLocations); }
} // SuSuSuSu public static Dictionary <DeltaKey, int> CreateStringRightDeltaTable() { Dictionary <DeltaKey, int> deltaTable = CreateInitialTable(-1); // Want to match only rights. Perfect match would score 8. DeltaKey key = new DeltaKey("Sr", "Sr"); deltaTable[key] = 2; // Replacing Sr with any number of Mr is okay too. key = new DeltaKey("Sr", "Mr"); deltaTable[key] = 2; // Replacing a short right in the string with a short left is okay. key = new DeltaKey("Sr", "Sl"); deltaTable[key] = 1; // Short ups and downs are okay key = new DeltaKey("Sr", "Su"); deltaTable[key] = 1; key = new DeltaKey("Sr", "Sd"); deltaTable[key] = 1; // If there is one replacement of an Sr either with Sl Su Sd, and the rest match with Sr, then the matching score would be 7. 6 for two replacements, etc. // We want to allow only one replacement somewhere in the string. So we will want a threshold of 7 when finding matches. // todo: need a way to specify the first and last tokens in the atom cannot change // perhaps something like Sr - - Sr, where Sr matching with Sr score is high and whatever the dummy - char is has an equally high score for being replace with Sr, whilst slightly lower score for being replaced with Su, Sl, Sd. foreach (Token y in tokens) { // Penalise any kind of insertions key = new DeltaKey("", Wordbook.TokenToString(y)); deltaTable[key] = -10; // Penalise any kind of deletion key = new DeltaKey(Wordbook.TokenToString(y), ""); deltaTable[key] = -10; } return(deltaTable); } // SrSrSrSr, see notes for matching information
static private Tuple <int, int> FindMatchingSubstring(int[,] table, Tuple <int, int> maxLocation, List <Token> x, List <Token> y, Dictionary <DeltaKey, int> delta) { int column = maxLocation.Item1; int row = maxLocation.Item2; bool searching = true; while (searching) { // Keep searching until we git a 0 in the table. if (table[column, row] == 0) { searching = false; break; } // Otherwise, keep moving up the table following the path we took to get here. int topLeft = table[column - 1, row - 1] + delta[new DeltaKey(Wordbook.TokenToString(x[column - 1]), Wordbook.TokenToString(y[row - 1]))]; // match or replace if (table[column, row] == topLeft) { column--; row--; continue; } int left = table[column - 1, row] + delta[new DeltaKey(Wordbook.TokenToString(x[column - 1]), "")]; // deletion int top = table[column, row - 1] + delta[new DeltaKey("", Wordbook.TokenToString(y[row - 1]))]; // insertion if (table[column, row] == left) { column--; continue; } if (table[column, row] == top) { row--; continue; } } return(Tuple.Create(row, maxLocation.Item2)); }
} // SdSdSdSd public static Dictionary <DeltaKey, int> CreateStringLeftDeltaTable() { Dictionary <DeltaKey, int> deltaTable = CreateInitialTable(-1); // Want to match only lefts. Perfect match would score 8. DeltaKey key = new DeltaKey("Sl", "Sl"); deltaTable[key] = 2; // Replacing Sl with any number of Ml is okay too. key = new DeltaKey("Sl", "Ml"); deltaTable[key] = 2; // Short left rights and ups are okay. key = new DeltaKey("Sl", "Sd"); deltaTable[key] = 1; key = new DeltaKey("Sl", "Sr"); deltaTable[key] = 1; key = new DeltaKey("Sl", "Su"); deltaTable[key] = 1; foreach (Token y in tokens) { // Penalise any kind of insertions key = new DeltaKey("", Wordbook.TokenToString(y)); deltaTable[key] = -10; // Penalise any kind of deletion key = new DeltaKey(Wordbook.TokenToString(y), ""); deltaTable[key] = -10; } return(deltaTable); } // SlSlSlSl