public List <int> FindSubstring(string text, string pattern, bool isSaveStatisticsForEmpty = true) { stopwatch = new Stopwatch(); stopwatch.Start(); StatisticAccumulator.CreateStatistics(text, pattern); List <int> result = new List <int>(); PrePreprocessString(pattern); long textAsNumber = 0; long lastBit = 1 << (pattern.Length - 1); for (int i = 0; i < text.Length; i++) { textAsNumber <<= 1; textAsNumber |= 1; long letterVector = 0; letterVectors.TryGetValue(text[i], out letterVector); textAsNumber &= letterVector; if ((textAsNumber & lastBit) != 0) { result.Add(i - pattern.Length + 1); } } stopwatch.Stop(); if (result.Count > 0 || isSaveStatisticsForEmpty) { long elapsedTicks = stopwatch.ElapsedTicks; long durationMilliSeconds = stopwatch.ElapsedMilliseconds; _outputPresentation = string.Join(",", result.Select(p => p.ToString())); StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null); } else { StatisticAccumulator.RemoveStatisticData(); } return(result); }
//private string _outputPresentation; //public string OutputPresentation //{ // get // { // return _outputPresentation; // } //} public List <int> FindSubstring(string text, string pattern, bool isSaveStatisticsForEmpty = true) { stopwatch = new Stopwatch(); stopwatch.Start(); StatisticAccumulator.CreateStatistics(text, pattern); List <int> result = new List <int>(); string totalString = pattern + "#" + text; zValue = PreprocessString(totalString); int lenPattern = pattern.Length; int textShift = lenPattern + 1; for (int i = lenPattern; i < totalString.Length; i++) { StatisticAccumulator.IterationCountInc(); if (zValue[i] == lenPattern) { result.Add(i - textShift); } } stopwatch.Stop(); if (result.Count > 0 || isSaveStatisticsForEmpty) { long elapsedTicks = stopwatch.ElapsedTicks; long durationMilliSeconds = stopwatch.ElapsedMilliseconds; _outputPresentation = string.Join(",", result.Select(p => p.ToString())); StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null); } else { StatisticAccumulator.RemoveStatisticData(); } return(result); }
public override SuffixTreeNode Execute(string text) { if (StatisticAccumulator != null) { stopwatch = new Stopwatch(); stopwatch.Start(); StatisticAccumulator.CreateStatistics(text); } int lastPositionInText = text.Length; text += "$"; root = new SuffixTreeNode(); for (int i = 0; i < text.Length - 1; i++) { int j = i; SuffixTreeNode current = root; while (current != null) { if (current.Chields.ContainsKey(text[j])) { SuffixTreeNode next = current.Chields[text[j]]; int j0 = j; int k = next.StarSegment; if (StatisticAccumulator != null) { StatisticAccumulator.IterationCountInc(3); } while (k <= next.EndSegment) { if (StatisticAccumulator != null) { StatisticAccumulator.IterationCountInc(); StatisticAccumulator.NumberOfComparisonInc(); } if (text[j++] != text[k]) { break; } k++; } if (k > next.EndSegment) { if (StatisticAccumulator != null) { StatisticAccumulator.IterationCountInc(); } current = next; } else { if (StatisticAccumulator != null) { StatisticAccumulator.IterationCountInc(14); } SuffixTreeNode newMiddle = new SuffixTreeNode() { Parent = next.Parent, StarSegment = next.StarSegment, EndSegment = k - 1, StartSymbol = next.StartSymbol }; SuffixTreeNode newLeaf = new SuffixTreeNode() { Parent = newMiddle, StarSegment = j - 1, EndSegment = lastPositionInText, StartSymbol = text[j - 1], StarPosition = i }; newMiddle.Chields.Add(text[k], next); newMiddle.Chields.Add(text[j - 1], newLeaf); newMiddle.Parent.Chields[newMiddle.StartSymbol] = newMiddle; next.Parent = newMiddle; next.StarSegment = k; next.StartSymbol = text[k]; break; } } else { if (StatisticAccumulator != null) { StatisticAccumulator.IterationCountInc(2); } current.Chields.Add(text[j], new SuffixTreeNode() { Parent = current, StarSegment = j, EndSegment = lastPositionInText, StartSymbol = text[j], StarPosition = i }); break; } } } string outputPresentation = NodePresentationAsString(root); if (StatisticAccumulator != null) { stopwatch.Stop(); long elapsedTicks = stopwatch.ElapsedTicks; long durationMilliSeconds = stopwatch.ElapsedMilliseconds; StatisticAccumulator.SaveStatisticData(outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null); } return(root); }
//-------------------------------------------------------------------------------------- public List <int> FindSubstring(string text, string pattern, SuffixTreeBase suffixTreeBase, bool isSaveStatisticsForEmpty = true) { stopwatch = new Stopwatch(); stopwatch.Start(); StatisticAccumulator.CreateStatistics(text, pattern); SuffixTreeNode root = suffixTreeBase.Execute(text); List <int> result = new List <int>(); SuffixTreeNode lastNode = null; SuffixTreeNode currentNode = root; int patternPosition = 0; while (lastNode == null) { SuffixTreeNode nextNode = null; if (!currentNode.Chields.TryGetValue(pattern[patternPosition], out nextNode)) { break; } int i = 0; for (i = nextNode.StarSegment; i <= nextNode.EndSegment; i++) { if (text[i] != pattern[patternPosition]) { break; } if (patternPosition++ == pattern.Length - 1) { lastNode = nextNode; break; } } if (i < nextNode.EndSegment || lastNode != null) { break; } } if (lastNode != null) { Collectleave(lastNode, result); } stopwatch.Stop(); if (result.Count > 0 || isSaveStatisticsForEmpty) { long elapsedTicks = stopwatch.ElapsedTicks; long durationMilliSeconds = stopwatch.ElapsedMilliseconds; _outputPresentation = string.Join(",", result.Select(p => p.ToString())); StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null); } else { StatisticAccumulator.RemoveStatisticData(); } return(result); }
//-------------------------------------------------------------------------------------- public List <int> FindSubstringByGoodSuffixBadSymbolAdv(string text, string pattern, bool isSaveStatisticsForEmpty = true) { stopwatch = new Stopwatch(); stopwatch.Start(); StatisticAccumulator.CreateStatistics(text, pattern); result.Clear(); LliPreprocessString(pattern); LiByNPreprocessString(pattern); BadSymbolAdvPreprocessString(pattern); int lenPattern = pattern.Length; int i = 0; while (i <= text.Length - pattern.Length) { int j = pattern.Length - 1; int j0 = j + i; StatisticAccumulator.IterationCountInc(2); while (j >= 0) { StatisticAccumulator.IterationCountInc(); StatisticAccumulator.NumberOfComparisonInc(); if (pattern[j] != text[j0]) { break; } j--; j0--; StatisticAccumulator.IterationCountInc(2); } if (j < 0) { result.Add(i); i += lenPattern - llisValue[1]; StatisticAccumulator.IterationCountInc(2); } else { int suffixStiff = 1; int symbolStiff = 1; StatisticAccumulator.IterationCountInc(4); List <int> rAdv = null; if (rAdvValue.TryGetValue(text[j0], out rAdv)) { StatisticAccumulator.IterationCountInc(3); int l = 0; while (l < rAdv.Count && rAdv[l] < j) { l++; StatisticAccumulator.IterationCountInc(3); } if (l > 0) { l--; StatisticAccumulator.IterationCountInc(); } StatisticAccumulator.IterationCountInc(3); int maxPos = rAdv[l]; if (maxPos >= j) { symbolStiff = j + 1; } else { symbolStiff = j - maxPos; } } else { symbolStiff = j0 + 1 - i; } if (j < pattern.Length - 1) { StatisticAccumulator.IterationCountInc(3); // j++; if (lisValue[j] > 0) { suffixStiff = lenPattern - lisValue[j] - 1; } else { suffixStiff = lenPattern - llisValue[j]; } } StatisticAccumulator.IterationCountInc(2); int stiff = Math.Max(symbolStiff, suffixStiff); i += stiff; } } stopwatch.Stop(); if (result.Count > 0 || isSaveStatisticsForEmpty) { long elapsedTicks = stopwatch.ElapsedTicks; long durationMilliSeconds = stopwatch.ElapsedMilliseconds; _outputPresentation = string.Join(",", result.Select(p => p.ToString())); StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null); } else { StatisticAccumulator.RemoveStatisticData(); } return(result); }
//-------------------------------------------------------------------------------------- public List <int> FindSubstringGoodSuffix(string text, string pattern, bool isSaveStatisticsForEmpty = true) { stopwatch = new Stopwatch(); stopwatch.Start(); StatisticAccumulator.CreateStatistics(text, pattern); result.Clear(); LliPreprocessString(pattern); LiByNPreprocessString(pattern); int lenPattern = pattern.Length; int i = 0; while (i <= text.Length - pattern.Length) { int j = pattern.Length - 1; int j0 = j + i; StatisticAccumulator.IterationCountInc(2); while (j >= 0) { StatisticAccumulator.IterationCountInc(); StatisticAccumulator.NumberOfComparisonInc(); if (pattern[j] != text[j0]) { break; } StatisticAccumulator.IterationCountInc(2); j--; j0--; } if (j < 0) { result.Add(i); i += lenPattern - llisValue[1]; StatisticAccumulator.IterationCountInc(2); } else { StatisticAccumulator.IterationCountInc(); if (j < pattern.Length - 1) { // j++; StatisticAccumulator.IterationCountInc(); if (lisValue[j] > 0) { i += lenPattern - lisValue[j] - 1; } else { i += lenPattern - llisValue[j]; } } else { i++; } } } stopwatch.Stop(); if (result.Count > 0 || isSaveStatisticsForEmpty) { long elapsedTicks = stopwatch.ElapsedTicks; long durationMilliSeconds = stopwatch.ElapsedMilliseconds; _outputPresentation = string.Join(",", result.Select(p => p.ToString())); StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null); } else { StatisticAccumulator.RemoveStatisticData(); } return(result); }
//-------------------------------------------------------------------------------------- // public List<int> FindSubstringBadSymbolAdv(string text, string pattern) // { //#if (DEBUG) // elapsedTicksList.Clear(); //#endif // stopwatch = new Stopwatch(); // stopwatch.Start(); //#if (DEBUG) // elapsedTicksList.Add(stopwatch.ElapsedTicks); // coreProcess = 0; // dictionaryProcess = 0; // outerLoop = 0; //#endif // int dictAppl = 0; // StatisticAccumulator.CreateStatistics(text, pattern); //#if (DEBUG) // elapsedTicksList.Add(stopwatch.ElapsedTicks); //#endif // result.Clear(); //#if (DEBUG) // elapsedTicksList.Add(stopwatch.ElapsedTicks); //#endif // BadSymbolAdvPreprocessString(pattern); // int lenPattern = pattern.Length; // int i = 0; //#if (DEBUG) // elapsedTicksList.Add(stopwatch.ElapsedTicks); //#endif // while (i <= text.Length - pattern.Length) // { // int j = pattern.Length - 1; // int j0 = j + i; // StatisticAccumulator.IterationCountInc(2); //#if (DEBUG) // outerLoop++; //#endif // while (j >= 0) // { // StatisticAccumulator.NumberOfComparisonInc(); // StatisticAccumulator.IterationCountInc(); // if (pattern[j] != text[j0]) // break; // j--; // j0--; // StatisticAccumulator.IterationCountInc(2); //#if (DEBUG) // coreProcess++; //#endif // } // if (j < 0) // { // result.Add(i); // i++; // StatisticAccumulator.IterationCountInc(2); // } // else // { // StatisticAccumulator.IterationCountInc(); // if (rAdvValue.ContainsKey(text[j0])) // { // int l = 0; // StatisticAccumulator.IterationCountInc(3); // dictAppl++; // while (l < rAdvValue[text[j0]].Count && rAdvValue[text[j0]][l] < j) // { //#if (DEBUG) // dictionaryProcess++; //#endif // dictAppl++; // dictAppl++; // l++; // StatisticAccumulator.IterationCountInc(3); // } // if (l > 0) // { // l--; // StatisticAccumulator.IterationCountInc(); // } // StatisticAccumulator.IterationCountInc(2); // dictAppl++; // dictAppl++; // int maxPos = rAdvValue[text[j0]][l]; // if (rAdvValue[text[j0]][l] >= j) // { // i += j + 1; // } // else // { // i += j - maxPos; // } // } // else // { // StatisticAccumulator.IterationCountInc(); // i = j0 + 1; // } // } //#if (DEBUG) // elapsedTicksList.Add(stopwatch.ElapsedTicks); //#endif // } // stopwatch.Stop(); // elapsedTicks = stopwatch.ElapsedTicks; // durationMilliSeconds = stopwatch.ElapsedMilliseconds; // _outputPresentation = string.Join(",", result.Select(p => p.ToString())); // coreElapsedTicks = durationMilliSeconds - preprocessingElapsedTicks; // additionalInfo = $"Pre({PreprocessingElapsedTicks}) Core({CoreElapsedTicks}) ({string.Join(",", elapsedTicksList.Select(p => p.ToString()))})"; // StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, additionalInfo); // ReturnToPool(); // return result; // } //-------------------------------------------------------------------------------------- public List <int> FindSubstringBadSymbolAdv(string text, string pattern, bool isSaveStatisticsForEmpty = true) { stopwatch = new Stopwatch(); stopwatch.Start(); StatisticAccumulator.CreateStatistics(text, pattern); #if (DEBUG) elapsedTicksList.Clear(); coreProcess = 0; dictionaryProcess = 0; outerLoop = 0; #endif int dictAppl = 0; result.Clear(); #if (DEBUG) elapsedTicksList.Add(stopwatch.ElapsedTicks); #endif BadSymbolAdvPreprocessString(pattern); int lenPattern = pattern.Length; int i = 0; #if (DEBUG) elapsedTicksList.Add(stopwatch.ElapsedTicks); #endif while (i <= text.Length - pattern.Length) { int j = pattern.Length - 1; int j0 = j + i; StatisticAccumulator.IterationCountInc(2); #if (DEBUG) outerLoop++; #endif while (j >= 0) { StatisticAccumulator.NumberOfComparisonInc(); StatisticAccumulator.IterationCountInc(); if (pattern[j] != text[j0]) { break; } j--; j0--; StatisticAccumulator.IterationCountInc(2); #if (DEBUG) coreProcess++; #endif } if (j < 0) { result.Add(i); i++; StatisticAccumulator.IterationCountInc(2); } else { StatisticAccumulator.IterationCountInc(); List <int> rAdv = null; if (rAdvValue.TryGetValue(text[j0], out rAdv)) { int l = 0; StatisticAccumulator.IterationCountInc(3); dictAppl++; while (l < rAdv.Count && rAdv[l] < j) { #if (DEBUG) dictionaryProcess++; #endif dictAppl++; dictAppl++; l++; StatisticAccumulator.IterationCountInc(3); } if (l > 0) { l--; StatisticAccumulator.IterationCountInc(); } StatisticAccumulator.IterationCountInc(2); dictAppl++; dictAppl++; int maxPos = rAdv[l]; if (maxPos >= j) { i += j + 1; } else { i += j - maxPos; } } else { StatisticAccumulator.IterationCountInc(); i = j0 + 1; } } #if (DEBUG) elapsedTicksList.Add(stopwatch.ElapsedTicks); #endif } stopwatch.Stop(); if (result.Count > 0 || isSaveStatisticsForEmpty) { elapsedTicks = stopwatch.ElapsedTicks; durationMilliSeconds = stopwatch.ElapsedMilliseconds; _outputPresentation = string.Join(",", result.Select(p => p.ToString())); StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null); } else { StatisticAccumulator.RemoveStatisticData(); } ReturnToPool(); return(result); }