Пример #1
0
        public override SuffixTreeNode Execute(string text)
        {
            if (StatisticAccumulator != null)
            {
                stopwatch = new Stopwatch();
                stopwatch.Start();
                StatisticAccumulator.CreateStatistics(text);
            }

            int lastPositionInText = text.Length;

            text += "$";
            root  = new SuffixTreeNode();
            for (int i = 0; i < text.Length - 1; i++)
            {
                int            j       = i;
                SuffixTreeNode current = root;
                while (current != null)
                {
                    if (current.Chields.ContainsKey(text[j]))
                    {
                        SuffixTreeNode next = current.Chields[text[j]];
                        int            j0   = j;
                        int            k    = next.StarSegment;
                        if (StatisticAccumulator != null)
                        {
                            StatisticAccumulator.IterationCountInc(3);
                        }
                        while (k <= next.EndSegment)
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc();
                                StatisticAccumulator.NumberOfComparisonInc();
                            }
                            if (text[j++] != text[k])
                            {
                                break;
                            }
                            k++;
                        }
                        if (k > next.EndSegment)
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc();
                            }
                            current = next;
                        }
                        else
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc(14);
                            }
                            SuffixTreeNode newMiddle = new SuffixTreeNode()
                            {
                                Parent      = next.Parent,
                                StarSegment = next.StarSegment,
                                EndSegment  = k - 1,
                                StartSymbol = next.StartSymbol
                            };
                            SuffixTreeNode newLeaf = new SuffixTreeNode()
                            {
                                Parent       = newMiddle,
                                StarSegment  = j - 1,
                                EndSegment   = lastPositionInText,
                                StartSymbol  = text[j - 1],
                                StarPosition = i
                            };
                            newMiddle.Chields.Add(text[k], next);
                            newMiddle.Chields.Add(text[j - 1], newLeaf);
                            newMiddle.Parent.Chields[newMiddle.StartSymbol] = newMiddle;
                            next.Parent      = newMiddle;
                            next.StarSegment = k;
                            next.StartSymbol = text[k];
                            break;
                        }
                    }
                    else
                    {
                        if (StatisticAccumulator != null)
                        {
                            StatisticAccumulator.IterationCountInc(2);
                        }
                        current.Chields.Add(text[j], new SuffixTreeNode()
                        {
                            Parent       = current,
                            StarSegment  = j,
                            EndSegment   = lastPositionInText,
                            StartSymbol  = text[j],
                            StarPosition = i
                        });
                        break;
                    }
                }
            }

            string outputPresentation = NodePresentationAsString(root);

            if (StatisticAccumulator != null)
            {
                stopwatch.Stop();
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                StatisticAccumulator.SaveStatisticData(outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }


            return(root);
        }
Пример #2
0
        //--------------------------------------------------------------------------------------
        public int[] PreprocessNValueString(string line)
        {
            int len  = line.Length;
            int last = len - 1;
            int li   = last;
            int ri   = last;

            int[] nvalue = new int[line.Length];
            StatisticAccumulator.IterationCountInc(5);
            for (int i = last - 1; i >= 0; i--)
            {
                StatisticAccumulator.IterationCountInc();
                if (i < ri)
                {
                    StatisticAccumulator.IterationCountInc(2);
                    if (line[i] == line[last])
                    {
                        int j = 0;
                        StatisticAccumulator.IterationCountInc();
                        StatisticAccumulator.NumberOfComparisonInc();
                        while (line[last - j] == line[i - j])
                        {
                            j--;
                            StatisticAccumulator.NumberOfComparisonInc();
                            StatisticAccumulator.IterationCountInc(2);
                        }
                        li        = i;
                        ri        = i - j + 1;
                        nvalue[i] = j;
                        StatisticAccumulator.IterationCountInc(3);
                    }
                    else
                    {
                        nvalue[i] = 0;
                    }
                }
                else
                {
                    StatisticAccumulator.IterationCountInc(2);
                    int i0 = len - li + i;
                    if (nvalue[i0] + li - i < nvalue[li])
                    {
                        StatisticAccumulator.IterationCountInc(2);
                        nvalue[i] = nvalue[i0];
                    }
                    else
                    {
                        int j  = ri;
                        int j2 = last - (i - ri);
                        StatisticAccumulator.IterationCountInc(3);
                        StatisticAccumulator.NumberOfComparisonInc();
                        while (line[j] == line[j2--])
                        {
                            j--;
                            StatisticAccumulator.NumberOfComparisonInc();
                            StatisticAccumulator.IterationCountInc(2);
                        }
                        li        = i;
                        ri        = j + 1;
                        nvalue[i] = li - ri + 1;
                        StatisticAccumulator.IterationCountInc(3);
                    }
                }
            }
            return(nvalue);
        }
Пример #3
0
        //--------------------------------------------------------------------------------------
        public int[] PreprocessString(string line)
        {
            int li = 0;
            int ri = 0;

            int[] zvalue = new int[line.Length];
            for (int i = 1; i < line.Length; i++)
            {
                StatisticAccumulator.IterationCountInc();
                if (ri < i)
                {
                    StatisticAccumulator.IterationCountInc();
                    StatisticAccumulator.NumberOfComparisonInc();
                    if (line[i] == line[0])
                    {
                        int j = 0;
                        StatisticAccumulator.IterationCountInc();
                        StatisticAccumulator.NumberOfComparisonInc();
                        while (i + j < line.Length && line[j] == line[i + j])
                        {
                            j++;
                            StatisticAccumulator.NumberOfComparisonInc();
                            StatisticAccumulator.IterationCountInc();
                        }
                        StatisticAccumulator.IterationCountInc(3);
                        ri        = i + j - 1;
                        li        = i;
                        zvalue[i] = j;
                    }
                    else
                    {
                        StatisticAccumulator.IterationCountInc();
                        zvalue[i] = 0;
                    }
                }
                else
                {
                    StatisticAccumulator.IterationCountInc();
                    int i0 = i - li;
                    if (zvalue[i0] + i0 < zvalue[li])
                    {
                        StatisticAccumulator.IterationCountInc();
                        zvalue[i] = zvalue[i0];
                    }
                    else
                    {
                        StatisticAccumulator.IterationCountInc(2);
                        int j  = ri + 1;
                        int j_ = j - i;
                        StatisticAccumulator.NumberOfComparisonInc();
                        while (j < line.Length && line[j_] == line[j])
                        {
                            j++;
                            j_++;
                            StatisticAccumulator.NumberOfComparisonInc();
                            StatisticAccumulator.IterationCountInc();
                        }
                        li        = i;
                        ri        = j - 1;
                        zvalue[i] = ri - li + 1;
                        StatisticAccumulator.IterationCountInc(3);
                    }
                }
            }
            return(zvalue);
        }
Пример #4
0
        //--------------------------------------------------------------------------------------
        public List <int> FindSubstringByGoodSuffixBadSymbolAdv(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            result.Clear();
            LliPreprocessString(pattern);
            LiByNPreprocessString(pattern);
            BadSymbolAdvPreprocessString(pattern);
            int lenPattern = pattern.Length;
            int i          = 0;

            while (i <= text.Length - pattern.Length)
            {
                int j  = pattern.Length - 1;
                int j0 = j + i;
                StatisticAccumulator.IterationCountInc(2);
                while (j >= 0)
                {
                    StatisticAccumulator.IterationCountInc();
                    StatisticAccumulator.NumberOfComparisonInc();
                    if (pattern[j] != text[j0])
                    {
                        break;
                    }
                    j--;
                    j0--;
                    StatisticAccumulator.IterationCountInc(2);
                }
                if (j < 0)
                {
                    result.Add(i);
                    i += lenPattern - llisValue[1];
                    StatisticAccumulator.IterationCountInc(2);
                }
                else
                {
                    int suffixStiff = 1;
                    int symbolStiff = 1;

                    StatisticAccumulator.IterationCountInc(4);
                    List <int> rAdv = null;

                    if (rAdvValue.TryGetValue(text[j0], out rAdv))
                    {
                        StatisticAccumulator.IterationCountInc(3);
                        int l = 0;
                        while (l < rAdv.Count && rAdv[l] < j)
                        {
                            l++;
                            StatisticAccumulator.IterationCountInc(3);
                        }
                        if (l > 0)
                        {
                            l--;
                            StatisticAccumulator.IterationCountInc();
                        }
                        StatisticAccumulator.IterationCountInc(3);
                        int maxPos = rAdv[l];
                        if (maxPos >= j)
                        {
                            symbolStiff = j + 1;
                        }
                        else
                        {
                            symbolStiff = j - maxPos;
                        }
                    }
                    else
                    {
                        symbolStiff = j0 + 1 - i;
                    }

                    if (j < pattern.Length - 1)
                    {
                        StatisticAccumulator.IterationCountInc(3);
//                        j++;
                        if (lisValue[j] > 0)
                        {
                            suffixStiff = lenPattern - lisValue[j] - 1;
                        }
                        else
                        {
                            suffixStiff = lenPattern - llisValue[j];
                        }
                    }

                    StatisticAccumulator.IterationCountInc(2);
                    int stiff = Math.Max(symbolStiff, suffixStiff);
                    i += stiff;
                }
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            return(result);
        }
Пример #5
0
        //--------------------------------------------------------------------------------------
        public List <int> FindSubstringGoodSuffix(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            result.Clear();
            LliPreprocessString(pattern);
            LiByNPreprocessString(pattern);
            int lenPattern = pattern.Length;
            int i          = 0;

            while (i <= text.Length - pattern.Length)
            {
                int j  = pattern.Length - 1;
                int j0 = j + i;
                StatisticAccumulator.IterationCountInc(2);
                while (j >= 0)
                {
                    StatisticAccumulator.IterationCountInc();
                    StatisticAccumulator.NumberOfComparisonInc();
                    if (pattern[j] != text[j0])
                    {
                        break;
                    }
                    StatisticAccumulator.IterationCountInc(2);
                    j--;
                    j0--;
                }
                if (j < 0)
                {
                    result.Add(i);
                    i += lenPattern - llisValue[1];
                    StatisticAccumulator.IterationCountInc(2);
                }
                else
                {
                    StatisticAccumulator.IterationCountInc();
                    if (j < pattern.Length - 1)
                    {
//                        j++;
                        StatisticAccumulator.IterationCountInc();
                        if (lisValue[j] > 0)
                        {
                            i += lenPattern - lisValue[j] - 1;
                        }
                        else
                        {
                            i += lenPattern - llisValue[j];
                        }
                    }
                    else
                    {
                        i++;
                    }
                }
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            return(result);
        }
Пример #6
0
        //--------------------------------------------------------------------------------------
//        public List<int> FindSubstringBadSymbolAdv(string text, string pattern)
//        {
//#if (DEBUG)
//            elapsedTicksList.Clear();
//#endif
//            stopwatch = new Stopwatch();
//            stopwatch.Start();

//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//            coreProcess = 0;
//            dictionaryProcess = 0;
//            outerLoop = 0;
//#endif

//        int dictAppl = 0;

//            StatisticAccumulator.CreateStatistics(text, pattern);
//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            result.Clear();
//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            BadSymbolAdvPreprocessString(pattern);
//            int lenPattern = pattern.Length;
//            int i = 0;
//#if (DEBUG)
//            elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            while (i <= text.Length - pattern.Length)
//            {
//                int j = pattern.Length - 1;
//                int j0 = j + i;
//                StatisticAccumulator.IterationCountInc(2);
//#if (DEBUG)
//                outerLoop++;
//#endif
//                while (j >= 0)
//                {
//                    StatisticAccumulator.NumberOfComparisonInc();
//                    StatisticAccumulator.IterationCountInc();
//                    if (pattern[j] != text[j0])
//                        break;
//                    j--;
//                    j0--;
//                    StatisticAccumulator.IterationCountInc(2);
//#if (DEBUG)
//                    coreProcess++;
//#endif
//                }
//                if (j < 0)
//                {
//                    result.Add(i);
//                    i++;
//                    StatisticAccumulator.IterationCountInc(2);
//                }
//                else
//                {
//                    StatisticAccumulator.IterationCountInc();
//                    if (rAdvValue.ContainsKey(text[j0]))
//                    {
//                        int l = 0;
//                        StatisticAccumulator.IterationCountInc(3);
//                        dictAppl++;
//                        while (l < rAdvValue[text[j0]].Count && rAdvValue[text[j0]][l] < j)
//                        {
//#if (DEBUG)
//                            dictionaryProcess++;
//#endif
//                            dictAppl++;
//                            dictAppl++;
//                            l++;
//                            StatisticAccumulator.IterationCountInc(3);
//                        }
//                        if (l > 0)
//                        {
//                            l--;
//                            StatisticAccumulator.IterationCountInc();
//                        }
//                        StatisticAccumulator.IterationCountInc(2);
//                        dictAppl++;
//                        dictAppl++;
//                        int maxPos = rAdvValue[text[j0]][l];
//                        if (rAdvValue[text[j0]][l] >= j)
//                        {
//                            i += j + 1;
//                        }
//                        else
//                        {
//                            i += j - maxPos;
//                        }
//                    }
//                    else
//                    {
//                        StatisticAccumulator.IterationCountInc();
//                        i = j0 + 1;
//                    }
//                }
//#if (DEBUG)
//                elapsedTicksList.Add(stopwatch.ElapsedTicks);
//#endif
//            }

//            stopwatch.Stop();
//            elapsedTicks = stopwatch.ElapsedTicks;
//            durationMilliSeconds = stopwatch.ElapsedMilliseconds;
//            _outputPresentation = string.Join(",", result.Select(p => p.ToString()));
//            coreElapsedTicks = durationMilliSeconds - preprocessingElapsedTicks;

//            additionalInfo = $"Pre({PreprocessingElapsedTicks}) Core({CoreElapsedTicks}) ({string.Join(",", elapsedTicksList.Select(p => p.ToString()))})";

//            StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, additionalInfo);
//            ReturnToPool();
//            return result;
//        }
        //--------------------------------------------------------------------------------------
        public List <int> FindSubstringBadSymbolAdv(string text, string pattern, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

#if (DEBUG)
            elapsedTicksList.Clear();
            coreProcess       = 0;
            dictionaryProcess = 0;
            outerLoop         = 0;
#endif

            int dictAppl = 0;

            result.Clear();
#if (DEBUG)
            elapsedTicksList.Add(stopwatch.ElapsedTicks);
#endif
            BadSymbolAdvPreprocessString(pattern);
            int lenPattern = pattern.Length;
            int i          = 0;
#if (DEBUG)
            elapsedTicksList.Add(stopwatch.ElapsedTicks);
#endif
            while (i <= text.Length - pattern.Length)
            {
                int j  = pattern.Length - 1;
                int j0 = j + i;
                StatisticAccumulator.IterationCountInc(2);
#if (DEBUG)
                outerLoop++;
#endif
                while (j >= 0)
                {
                    StatisticAccumulator.NumberOfComparisonInc();
                    StatisticAccumulator.IterationCountInc();
                    if (pattern[j] != text[j0])
                    {
                        break;
                    }
                    j--;
                    j0--;
                    StatisticAccumulator.IterationCountInc(2);
#if (DEBUG)
                    coreProcess++;
#endif
                }
                if (j < 0)
                {
                    result.Add(i);
                    i++;
                    StatisticAccumulator.IterationCountInc(2);
                }
                else
                {
                    StatisticAccumulator.IterationCountInc();
                    List <int> rAdv = null;

                    if (rAdvValue.TryGetValue(text[j0], out rAdv))
                    {
                        int l = 0;
                        StatisticAccumulator.IterationCountInc(3);
                        dictAppl++;
                        while (l < rAdv.Count && rAdv[l] < j)
                        {
#if (DEBUG)
                            dictionaryProcess++;
#endif
                            dictAppl++;
                            dictAppl++;
                            l++;
                            StatisticAccumulator.IterationCountInc(3);
                        }
                        if (l > 0)
                        {
                            l--;
                            StatisticAccumulator.IterationCountInc();
                        }
                        StatisticAccumulator.IterationCountInc(2);
                        dictAppl++;
                        dictAppl++;
                        int maxPos = rAdv[l];
                        if (maxPos >= j)
                        {
                            i += j + 1;
                        }
                        else
                        {
                            i += j - maxPos;
                        }
                    }
                    else
                    {
                        StatisticAccumulator.IterationCountInc();
                        i = j0 + 1;
                    }
                }
#if (DEBUG)
                elapsedTicksList.Add(stopwatch.ElapsedTicks);
#endif
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                elapsedTicks         = stopwatch.ElapsedTicks;
                durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation  = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            ReturnToPool();
            return(result);
        }