Beispiel #1
0
        public static SuffixArray <T> .tuple_t[] Build(
            IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter)
        {
            CreateIsLetterOrDigitArray();

            var totalSuffixCount = (from value in objs.Skip(index).Take(length)
                                    select GetSuffixCount(stringValueGetter.GetStringValue(value))
                                    ).Sum();
            var suffixIndex = 0;
            var suffixes    = new suffix_t[totalSuffixCount];

            for (int i = index, end = index + length; i < end; i++)
            {
                var str = stringValueGetter.GetStringValue(objs[i]);
                //if ( str == "м.бабий" )
                //System.Diagnostics.Debugger.Break();
                //var __ = GetSuffix( i, str ).Distinct().ToArray();
                foreach (var _suffix in GetSuffixes_v2(i, str).Distinct())
                {
                    suffixes[suffixIndex++] = _suffix;
                }
            }
            Array.Resize <suffix_t>(ref suffixes, suffixIndex);
            Array.Sort <suffix_t>(suffixes, suffixComparison);


            var tuples = new SuffixArray <T> .tuple_t[suffixes.Length];

            suffixIndex = 0;
            var suffix     = suffixes[suffixIndex];
            var suffixText = suffix.Suffix;
            var data       = new SimplyLinkedList <SuffixArray <T> .data_t>();

            tuples[suffixIndex++] = new SuffixArray <T> .tuple_t()
            {
                Suffix = suffixText, Data = data
            };
            data.Add(new SuffixArray <T> .data_t(suffix.SuffixIndex, suffix.WordIndex));
            for (int i = 1, len = suffixes.Length; i < len; i++)
            {
                suffix = suffixes[i];
                if (!suffixText.StartsWith(suffix.Suffix))
                {
                    suffixText            = suffix.Suffix;
                    data                  = new SimplyLinkedList <SuffixArray <T> .data_t>();
                    tuples[suffixIndex++] = new SuffixArray <T> .tuple_t()
                    {
                        Suffix = suffixText, Data = data
                    };
                }
                data.Add(new SuffixArray <T> .data_t(suffix.SuffixIndex, suffix.WordIndex));
            }
            suffixes = null;
            Array.Resize <SuffixArray <T> .tuple_t>(ref tuples, suffixIndex);
            Array.Reverse(tuples);

            DestroyIsLetterOrDigitArray();

            return(tuples);
        }
Beispiel #2
0
        /*private static IEnumerable< TSource > Distinct< TSource >( IEnumerable< TSource > source )
         * {
         *      var set = new Set< TSource >();
         *      foreach ( TSource current in source )
         *      {
         *      if ( set.Add( current ) )
         *              {
         *                      yield return (current);
         *              }
         *      }
         *      yield break;
         * }*/

        private static int suffixComparison(suffix_t x, suffix_t y)
        {
            return(string.CompareOrdinal(y.Suffix, x.Suffix));
        }