/// <summary> /// Main Edlib method. /// </summary> /// <param name="originalQuery">String a</param> /// <param name="originalTarget">String b</param> /// <param name="config">Configuration</param> /// <returns>Result of the alignment</returns> public static AlignmentResult Align(string originalQuery, string originalTarget, AlignmentConfig config) { // NOTE: Is other mode useful? If so, why? if (config.Mode == AlignmentMode.Hw || config.Mode == AlignmentMode.Shw) { throw new NotImplementedException(); } AlignmentResult result = new AlignmentResult { Status = (int)AlignmentStatus.Ok, EditDistance = -1, EndLocations = null, StartingLocations = null, NumberOfLocations = 0, Alignment = null, AlignmentLength = 0, AlphabetLength = 0, }; // Transform sequences and recognize alphabet byte[] query, target; string alphabet = TransformSequences(originalQuery, originalTarget, out query, out target); result.AlphabetLength = alphabet.Length; // Initialization int queryLength = originalQuery.Length; int targetLength = originalTarget.Length; int maxBlocks = CeilDivision(queryLength, WordSize); int w = maxBlocks * WordSize - queryLength; EqualityDefinition equalityDefinition = new EqualityDefinition(alphabet, config.AdditionalEqualities, config.AdditionalEqualitiesLength); ulong[] peq = BuildPeq(alphabet.Length, query, queryLength, ref equalityDefinition); // Main Calculation int positionNw = -1; AlignmentData alignmentData = new AlignmentData(); bool dynamicK = false; int k = config.K; if (k < 0) // If valid k is not given, auto-adjust k until solution is found. { dynamicK = true; k = WordSize; // Gives better results than smaller k. } do { MyersEditDistanceNw(peq, w, maxBlocks, queryLength, target, targetLength, k, false, -1, ref result.EditDistance, ref positionNw, ref alignmentData); k *= 2; } while (dynamicK && result.EditDistance == -1); // NOTE: Do we need this block code? We only care about editing distance // Since we only care about the editing distance score, this is where we stop if (result.EditDistance >= 0) { // If NW mode, set end location explicitly. if (config.Mode == AlignmentMode.Nw) { result.EndLocations = new int[sizeof(int) * 1]; result.EndLocations[0] = targetLength - 1; result.NumberOfLocations = 1; } // if (!(config.Task == AlignmentTask.TaskLocate || config.Task == AlignmentTask.TaskPath)) // { // for (int i = 0; i < result.NumberOfLocations; i++) { // result.StartingLocations[i] = 0; // } // } } return(result); }
/// <summary> /// Build Peq table for given query and alphabet. /// Peq is table of dimensions alphabetLength+1 x maxNumBlocks. /// Bit i of Peq[s * maxNumBlocks + b] is 1 if i-th symbol from block b of query equals symbol s, otherwise it is 0. /// </summary> /// <param name="alphabetLength"></param> /// <param name="query"></param> /// <param name="queryLength"></param> /// <param name="equalityDefinition"></param> /// <returns>Table of dimensions alphabetLength+1 x maxNumBlocks.</returns> private static ulong[] BuildPeq(int alphabetLength, byte[] query, int queryLength, ref EqualityDefinition equalityDefinition) { int maxBlocks = CeilDivision(queryLength, WordSize); // Table of dimensions alphabetLength+1 x maxNumBlocks. Last symbol is wildcard. ulong[] peq = new ulong[(alphabetLength + 1) * maxBlocks]; // Build Peq (1 is match, 0 is mismatch). NOTE: last column is wildcard(symbol that matches anything) with just 1s for (byte symbol = 0; symbol <= alphabetLength; symbol++) { for (int b = 0; b < maxBlocks; b++) { if (symbol < alphabetLength) { peq[symbol * maxBlocks + b] = 0; for (int r = (b + 1) * WordSize - 1; r >= b * WordSize; r--) { peq[symbol * maxBlocks + b] <<= 1; // NOTE: We pretend like query is padded at the end with W wildcard symbols if (r >= queryLength || equalityDefinition.IsEqual(query[r], symbol)) { peq[symbol * maxBlocks + b] += 1; } } } else { peq[symbol * maxBlocks + b] = UInt64.MaxValue; } } } return(peq); }