コード例 #1
0
ファイル: SuffixArray.cs プロジェクト: zamgi/SuffixArray
        public static SuffixArray <T> .tuple_t[] Build(
            IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter)
        {
            CreateIsLetterOrDigitArray();

            var totalSuffixCount = (from value in objs.Skip(index).Take(length)
                                    select GetSuffixCount(stringValueGetter.GetStringValue(value))
                                    ).Sum();
            var suffixIndex = 0;
            var suffixes    = new suffix_t[totalSuffixCount];

            for (int i = index, end = index + length; i < end; i++)
            {
                var str = stringValueGetter.GetStringValue(objs[i]);
                //if ( str == "м.бабий" )
                //System.Diagnostics.Debugger.Break();
                //var __ = GetSuffix( i, str ).Distinct().ToArray();
                foreach (var _suffix in GetSuffixes_v2(i, str).Distinct())
                {
                    suffixes[suffixIndex++] = _suffix;
                }
            }
            Array.Resize <suffix_t>(ref suffixes, suffixIndex);
            Array.Sort <suffix_t>(suffixes, suffixComparison);


            var tuples = new SuffixArray <T> .tuple_t[suffixes.Length];

            suffixIndex = 0;
            var suffix     = suffixes[suffixIndex];
            var suffixText = suffix.Suffix;
            var data       = new SimplyLinkedList <SuffixArray <T> .data_t>();

            tuples[suffixIndex++] = new SuffixArray <T> .tuple_t()
            {
                Suffix = suffixText, Data = data
            };
            data.Add(new SuffixArray <T> .data_t(suffix.SuffixIndex, suffix.WordIndex));
            for (int i = 1, len = suffixes.Length; i < len; i++)
            {
                suffix = suffixes[i];
                if (!suffixText.StartsWith(suffix.Suffix))
                {
                    suffixText            = suffix.Suffix;
                    data                  = new SimplyLinkedList <SuffixArray <T> .data_t>();
                    tuples[suffixIndex++] = new SuffixArray <T> .tuple_t()
                    {
                        Suffix = suffixText, Data = data
                    };
                }
                data.Add(new SuffixArray <T> .data_t(suffix.SuffixIndex, suffix.WordIndex));
            }
            suffixes = null;
            Array.Resize <SuffixArray <T> .tuple_t>(ref tuples, suffixIndex);
            Array.Reverse(tuples);

            DestroyIsLetterOrDigitArray();

            return(tuples);
        }
コード例 #2
0
ファイル: SuffixArray_v2.cs プロジェクト: zamgi/SuffixArray
        public SuffixArray_v2(IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter)
        {
            if (objs == null)
            {
                throw (new ArgumentNullException(nameof(objs)));
            }
            if ((length <= 0) || (length <= index) || (objs.Count < length))
            {
                throw (new ArgumentException("index-or-length"));
            }
            if (stringValueGetter == null)
            {
                throw (new ArgumentNullException(nameof(stringValueGetter)));
            }
            //if ( values.Any( s => string.IsNullOrEmpty( s ) ) ) throw (new ArgumentNullException("values.Any()"));

            _Objects           = objs;
            _StringValueGetter = stringValueGetter;
            _BPT = SuffixArrayBuilder.Build(objs, index, length, stringValueGetter);
        }
コード例 #3
0
ファイル: SuffixArray_v2.cs プロジェクト: zamgi/SuffixArray
 /// <summary>
 /// Initializes a new instance of the <see cref="T:System.Collections.Generic.sorted_list_key_char`2" /> class that is empty, has the default initial capacity, and uses the default <see cref="T:System.Collections.Generic.IComparer`1" />.
 /// </summary>
 public SuffixArray_v2(IList <T> objs, IStringValueGetter <T> stringValueGetter)
     : this(objs, 0, objs.Count, stringValueGetter)
 {
 }
コード例 #4
0
ファイル: SuffixArray_v2.cs プロジェクト: zamgi/SuffixArray
            public static BPlusTreeList <tuple_t> Build(
                IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter)
            {
                CreateMapArrays();

                var totalSuffixCount = (from value in objs.Skip(index).Take(length)
                                        select GetSuffixCount(stringValueGetter.GetStringValue(value))
                                        ).Sum();

                /*
                 * var capacity = (int) Math.Sqrt( totalSuffixCount ); //(int) (Math.Sqrt( length - index ) + 1);
                 * var bpt = new BPlusTreeList< tuple_t >( default(tuple_t), capacity, capacity );
                 */
                int BLOCK_CAPACITY_4_LST = 512;
                var bpt = new BPlusTreeList <tuple_t>(default(tuple_t), ((int)(totalSuffixCount / BLOCK_CAPACITY_4_LST * 1.0 + 0.5) + 25), BLOCK_CAPACITY_4_LST);

                var set = new Set <suffix_t>(new suffix_t_IEqualityComparer());

                for (int i = index, end = index + length; i < end; i++)
                {
                    var str = stringValueGetter.GetStringValue(objs[i]);
                    #region test.commented.

                    /*
                     * if ( str == "м.бабий" )
                     * System.Diagnostics.Debugger.Break();
                     * var __ = GetSuffix( i, str ).Distinct().ToArray();
                     */
                    #endregion

                    var tuple = new tuple_t()
                    {
                        Data = new SimplyLinkedList <data_t>()
                    };
                    var tupleExists = default(tuple_t);

                    #region test.commented.

                    /*
                     * str = "м.бабий";
                     * var x1 = GetSuffixes( i, str ).ToArray();
                     * var x2 = GetSuffixes( i, str ).Distinct().ToArray();
                     * if ( x1.Length != x2.Length )
                     * {
                     *  foreach ( var suff_t in GetSuffixes( i, str ) )
                     *  {
                     *      set.Add( suff_t );
                     *  }
                     *  System.Diagnostics.Debug.Assert( set.Count == x2.Length );
                     * }
                     */
                    #endregion

                    foreach (var suff_t in GetSuffixes(i, str) /*.Distinct()*/)
                    {
                        if (!set.Add(suff_t))
                        {
                            continue;
                        }

                        var data = new data_t(suff_t.SuffixIndex, suff_t.WordIndex);
                        tuple.Suffix = suff_t.Suffix;
                        if (bpt.AddOrGetExistsValue(tuple, out tupleExists))
                        {
                            tuple.Data.Add(data);
                            tuple = new tuple_t()
                            {
                                Data = new SimplyLinkedList <data_t>()
                            };
                        }
                        else
                        {
                            tupleExists.Data.Add(data);
                        }
                    }
                    set.Clear();
                }

                DestroyMapArrays();

                var bpt_out = new BPlusTreeList <tuple_t>(default(tuple_t), bpt.Count / bpt.BlockCount, bpt.BlockCount);
                using (var e = bpt.GetEnumerator())
                {
                    if (e.MoveNext())
                    {
                        var root_tuple = e.Current;
                        bpt_out.Add(root_tuple);
                        for ( ; e.MoveNext();)
                        {
                            var tuple = e.Current;
                            if (root_tuple.Suffix.StartsWith(tuple.Suffix))
                            {
                                foreach (var data in tuple.Data)
                                {
                                    root_tuple.Data.Add(data);
                                }
                            }
                            else
                            {
                                root_tuple = tuple;
                                bpt_out.Add(root_tuple);
                            }
                        }
                    }
                }

                return(bpt_out);
            }