Exemple #1
0
        public List <int> FindSubstring(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            List <int> result = new List <int>();

            PrePreprocessString(pattern);
            long textAsNumber = 0;
            long lastBit      = 1 << (pattern.Length - 1);

            for (int i = 0; i < text.Length; i++)
            {
                textAsNumber <<= 1;
                textAsNumber  |= 1;
                long letterVector = 0;
                letterVectors.TryGetValue(text[i], out letterVector);
                textAsNumber &= letterVector;
                if ((textAsNumber & lastBit) != 0)
                {
                    result.Add(i - pattern.Length + 1);
                }
            }
            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }
            return(result);
        }
Exemple #2
0
        //private string _outputPresentation;
        //public string OutputPresentation
        //{
        //    get
        //    {
        //        return _outputPresentation;
        //    }
        //}

        public List <int> FindSubstring(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            List <int> result      = new List <int>();
            string     totalString = pattern + "#" + text;

            zValue = PreprocessString(totalString);
            int lenPattern = pattern.Length;
            int textShift  = lenPattern + 1;

            for (int i = lenPattern; i < totalString.Length; i++)
            {
                StatisticAccumulator.IterationCountInc();
                if (zValue[i] == lenPattern)
                {
                    result.Add(i - textShift);
                }
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            return(result);
        }
        public override SuffixTreeNode Execute(string text)
        {
            if (StatisticAccumulator != null)
            {
                stopwatch = new Stopwatch();
                stopwatch.Start();
                StatisticAccumulator.CreateStatistics(text);
            }

            int lastPositionInText = text.Length;

            text += "$";
            root  = new SuffixTreeNode();
            for (int i = 0; i < text.Length - 1; i++)
            {
                int            j       = i;
                SuffixTreeNode current = root;
                while (current != null)
                {
                    if (current.Chields.ContainsKey(text[j]))
                    {
                        SuffixTreeNode next = current.Chields[text[j]];
                        int            j0   = j;
                        int            k    = next.StarSegment;
                        if (StatisticAccumulator != null)
                        {
                            StatisticAccumulator.IterationCountInc(3);
                        }
                        while (k <= next.EndSegment)
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc();
                                StatisticAccumulator.NumberOfComparisonInc();
                            }
                            if (text[j++] != text[k])
                            {
                                break;
                            }
                            k++;
                        }
                        if (k > next.EndSegment)
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc();
                            }
                            current = next;
                        }
                        else
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc(14);
                            }
                            SuffixTreeNode newMiddle = new SuffixTreeNode()
                            {
                                Parent      = next.Parent,
                                StarSegment = next.StarSegment,
                                EndSegment  = k - 1,
                                StartSymbol = next.StartSymbol
                            };
                            SuffixTreeNode newLeaf = new SuffixTreeNode()
                            {
                                Parent       = newMiddle,
                                StarSegment  = j - 1,
                                EndSegment   = lastPositionInText,
                                StartSymbol  = text[j - 1],
                                StarPosition = i
                            };
                            newMiddle.Chields.Add(text[k], next);
                            newMiddle.Chields.Add(text[j - 1], newLeaf);
                            newMiddle.Parent.Chields[newMiddle.StartSymbol] = newMiddle;
                            next.Parent      = newMiddle;
                            next.StarSegment = k;
                            next.StartSymbol = text[k];
                            break;
                        }
                    }
                    else
                    {
                        if (StatisticAccumulator != null)
                        {
                            StatisticAccumulator.IterationCountInc(2);
                        }
                        current.Chields.Add(text[j], new SuffixTreeNode()
                        {
                            Parent       = current,
                            StarSegment  = j,
                            EndSegment   = lastPositionInText,
                            StartSymbol  = text[j],
                            StarPosition = i
                        });
                        break;
                    }
                }
            }

            string outputPresentation = NodePresentationAsString(root);

            if (StatisticAccumulator != null)
            {
                stopwatch.Stop();
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                StatisticAccumulator.SaveStatisticData(outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }


            return(root);
        }
Exemple #4
0
        //--------------------------------------------------------------------------------------

        public List <int> FindSubstring(string text, string pattern, SuffixTreeBase suffixTreeBase, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            SuffixTreeNode root = suffixTreeBase.Execute(text);

            List <int> result = new List <int>();

            SuffixTreeNode lastNode        = null;
            SuffixTreeNode currentNode     = root;
            int            patternPosition = 0;

            while (lastNode == null)
            {
                SuffixTreeNode nextNode = null;
                if (!currentNode.Chields.TryGetValue(pattern[patternPosition], out nextNode))
                {
                    break;
                }
                int i = 0;
                for (i = nextNode.StarSegment; i <= nextNode.EndSegment; i++)
                {
                    if (text[i] != pattern[patternPosition])
                    {
                        break;
                    }
                    if (patternPosition++ == pattern.Length - 1)
                    {
                        lastNode = nextNode;
                        break;
                    }
                }
                if (i < nextNode.EndSegment || lastNode != null)
                {
                    break;
                }
            }

            if (lastNode != null)
            {
                Collectleave(lastNode, result);
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            return(result);
        }
        //--------------------------------------------------------------------------------------
        public List <int> FindSubstringByGoodSuffixBadSymbolAdv(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            result.Clear();
            LliPreprocessString(pattern);
            LiByNPreprocessString(pattern);
            BadSymbolAdvPreprocessString(pattern);
            int lenPattern = pattern.Length;
            int i          = 0;

            while (i <= text.Length - pattern.Length)
            {
                int j  = pattern.Length - 1;
                int j0 = j + i;
                StatisticAccumulator.IterationCountInc(2);
                while (j >= 0)
                {
                    StatisticAccumulator.IterationCountInc();
                    StatisticAccumulator.NumberOfComparisonInc();
                    if (pattern[j] != text[j0])
                    {
                        break;
                    }
                    j--;
                    j0--;
                    StatisticAccumulator.IterationCountInc(2);
                }
                if (j < 0)
                {
                    result.Add(i);
                    i += lenPattern - llisValue[1];
                    StatisticAccumulator.IterationCountInc(2);
                }
                else
                {
                    int suffixStiff = 1;
                    int symbolStiff = 1;

                    StatisticAccumulator.IterationCountInc(4);
                    List <int> rAdv = null;

                    if (rAdvValue.TryGetValue(text[j0], out rAdv))
                    {
                        StatisticAccumulator.IterationCountInc(3);
                        int l = 0;
                        while (l < rAdv.Count && rAdv[l] < j)
                        {
                            l++;
                            StatisticAccumulator.IterationCountInc(3);
                        }
                        if (l > 0)
                        {
                            l--;
                            StatisticAccumulator.IterationCountInc();
                        }
                        StatisticAccumulator.IterationCountInc(3);
                        int maxPos = rAdv[l];
                        if (maxPos >= j)
                        {
                            symbolStiff = j + 1;
                        }
                        else
                        {
                            symbolStiff = j - maxPos;
                        }
                    }
                    else
                    {
                        symbolStiff = j0 + 1 - i;
                    }

                    if (j < pattern.Length - 1)
                    {
                        StatisticAccumulator.IterationCountInc(3);
//                        j++;
                        if (lisValue[j] > 0)
                        {
                            suffixStiff = lenPattern - lisValue[j] - 1;
                        }
                        else
                        {
                            suffixStiff = lenPattern - llisValue[j];
                        }
                    }

                    StatisticAccumulator.IterationCountInc(2);
                    int stiff = Math.Max(symbolStiff, suffixStiff);
                    i += stiff;
                }
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            return(result);
        }
        //--------------------------------------------------------------------------------------
        public List <int> FindSubstringGoodSuffix(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            result.Clear();
            LliPreprocessString(pattern);
            LiByNPreprocessString(pattern);
            int lenPattern = pattern.Length;
            int i          = 0;

            while (i <= text.Length - pattern.Length)
            {
                int j  = pattern.Length - 1;
                int j0 = j + i;
                StatisticAccumulator.IterationCountInc(2);
                while (j >= 0)
                {
                    StatisticAccumulator.IterationCountInc();
                    StatisticAccumulator.NumberOfComparisonInc();
                    if (pattern[j] != text[j0])
                    {
                        break;
                    }
                    StatisticAccumulator.IterationCountInc(2);
                    j--;
                    j0--;
                }
                if (j < 0)
                {
                    result.Add(i);
                    i += lenPattern - llisValue[1];
                    StatisticAccumulator.IterationCountInc(2);
                }
                else
                {
                    StatisticAccumulator.IterationCountInc();
                    if (j < pattern.Length - 1)
                    {
//                        j++;
                        StatisticAccumulator.IterationCountInc();
                        if (lisValue[j] > 0)
                        {
                            i += lenPattern - lisValue[j] - 1;
                        }
                        else
                        {
                            i += lenPattern - llisValue[j];
                        }
                    }
                    else
                    {
                        i++;
                    }
                }
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            return(result);
        }
        //--------------------------------------------------------------------------------------
//        public List<int> FindSubstringBadSymbolAdv(string text, string pattern)
//        {
//#if (DEBUG)
//            elapsedTicksList.Clear();
//#endif
//            stopwatch = new Stopwatch();
//            stopwatch.Start();

//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//            coreProcess = 0;
//            dictionaryProcess = 0;
//            outerLoop = 0;
//#endif

//        int dictAppl = 0;

//            StatisticAccumulator.CreateStatistics(text, pattern);
//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            result.Clear();
//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            BadSymbolAdvPreprocessString(pattern);
//            int lenPattern = pattern.Length;
//            int i = 0;
//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            while (i <= text.Length - pattern.Length)
//            {
//                int j = pattern.Length - 1;
//                int j0 = j + i;
//                StatisticAccumulator.IterationCountInc(2);
//#if (DEBUG)
//                outerLoop++;
//#endif
//                while (j >= 0)
//                {
//                    StatisticAccumulator.NumberOfComparisonInc();
//                    StatisticAccumulator.IterationCountInc();
//                    if (pattern[j] != text[j0])
//                        break;
//                    j--;
//                    j0--;
//                    StatisticAccumulator.IterationCountInc(2);
//#if (DEBUG)
//                    coreProcess++;
//#endif
//                }
//                if (j < 0)
//                {
//                    result.Add(i);
//                    i++;
//                    StatisticAccumulator.IterationCountInc(2);
//                }
//                else
//                {
//                    StatisticAccumulator.IterationCountInc();
//                    if (rAdvValue.ContainsKey(text[j0]))
//                    {
//                        int l = 0;
//                        StatisticAccumulator.IterationCountInc(3);
//                        dictAppl++;
//                        while (l < rAdvValue[text[j0]].Count && rAdvValue[text[j0]][l] < j)
//                        {
//#if (DEBUG)
//                            dictionaryProcess++;
//#endif
//                            dictAppl++;
//                            dictAppl++;
//                            l++;
//                            StatisticAccumulator.IterationCountInc(3);
//                        }
//                        if (l > 0)
//                        {
//                            l--;
//                            StatisticAccumulator.IterationCountInc();
//                        }
//                        StatisticAccumulator.IterationCountInc(2);
//                        dictAppl++;
//                        dictAppl++;
//                        int maxPos = rAdvValue[text[j0]][l];
//                        if (rAdvValue[text[j0]][l] >= j)
//                        {
//                            i += j + 1;
//                        }
//                        else
//                        {
//                            i += j - maxPos;
//                        }
//                    }
//                    else
//                    {
//                        StatisticAccumulator.IterationCountInc();
//                        i = j0 + 1;
//                    }
//                }
//#if (DEBUG)
//                elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            }

//            stopwatch.Stop();
//            elapsedTicks = stopwatch.ElapsedTicks;
//            durationMilliSeconds = stopwatch.ElapsedMilliseconds;
//            _outputPresentation = string.Join(",", result.Select(p => p.ToString()));
//            coreElapsedTicks = durationMilliSeconds - preprocessingElapsedTicks;

//            additionalInfo = $"Pre({PreprocessingElapsedTicks}) Core({CoreElapsedTicks}) ({string.Join(",", elapsedTicksList.Select(p => p.ToString()))})";

//            StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, additionalInfo);
//            ReturnToPool();
//            return result;
//        }
        //--------------------------------------------------------------------------------------
        public List <int> FindSubstringBadSymbolAdv(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

#if (DEBUG)
            elapsedTicksList.Clear();
            coreProcess       = 0;
            dictionaryProcess = 0;
            outerLoop         = 0;
#endif

            int dictAppl = 0;

            result.Clear();
#if (DEBUG)
            elapsedTicksList.Add(stopwatch.ElapsedTicks);
#endif
            BadSymbolAdvPreprocessString(pattern);
            int lenPattern = pattern.Length;
            int i          = 0;
#if (DEBUG)
            elapsedTicksList.Add(stopwatch.ElapsedTicks);
#endif
            while (i <= text.Length - pattern.Length)
            {
                int j  = pattern.Length - 1;
                int j0 = j + i;
                StatisticAccumulator.IterationCountInc(2);
#if (DEBUG)
                outerLoop++;
#endif
                while (j >= 0)
                {
                    StatisticAccumulator.NumberOfComparisonInc();
                    StatisticAccumulator.IterationCountInc();
                    if (pattern[j] != text[j0])
                    {
                        break;
                    }
                    j--;
                    j0--;
                    StatisticAccumulator.IterationCountInc(2);
#if (DEBUG)
                    coreProcess++;
#endif
                }
                if (j < 0)
                {
                    result.Add(i);
                    i++;
                    StatisticAccumulator.IterationCountInc(2);
                }
                else
                {
                    StatisticAccumulator.IterationCountInc();
                    List <int> rAdv = null;

                    if (rAdvValue.TryGetValue(text[j0], out rAdv))
                    {
                        int l = 0;
                        StatisticAccumulator.IterationCountInc(3);
                        dictAppl++;
                        while (l < rAdv.Count && rAdv[l] < j)
                        {
#if (DEBUG)
                            dictionaryProcess++;
#endif
                            dictAppl++;
                            dictAppl++;
                            l++;
                            StatisticAccumulator.IterationCountInc(3);
                        }
                        if (l > 0)
                        {
                            l--;
                            StatisticAccumulator.IterationCountInc();
                        }
                        StatisticAccumulator.IterationCountInc(2);
                        dictAppl++;
                        dictAppl++;
                        int maxPos = rAdv[l];
                        if (maxPos >= j)
                        {
                            i += j + 1;
                        }
                        else
                        {
                            i += j - maxPos;
                        }
                    }
                    else
                    {
                        StatisticAccumulator.IterationCountInc();
                        i = j0 + 1;
                    }
                }
#if (DEBUG)
                elapsedTicksList.Add(stopwatch.ElapsedTicks);
#endif
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                elapsedTicks         = stopwatch.ElapsedTicks;
                durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation  = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            ReturnToPool();
            return(result);
        }