Ejemplo n.º 1
0
        public SuffixArray_v2(IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter)
        {
            if (objs == null)
            {
                throw (new ArgumentNullException(nameof(objs)));
            }
            if ((length <= 0) || (length <= index) || (objs.Count < length))
            {
                throw (new ArgumentException("index-or-length"));
            }
            if (stringValueGetter == null)
            {
                throw (new ArgumentNullException(nameof(stringValueGetter)));
            }
            //if ( values.Any( s => string.IsNullOrEmpty( s ) ) ) throw (new ArgumentNullException("values.Any()"));

            _Objects           = objs;
            _StringValueGetter = stringValueGetter;
            _BPT = SuffixArrayBuilder.Build(objs, index, length, stringValueGetter);
        }
Ejemplo n.º 2
0
            public static BPlusTreeList <tuple_t> Build(
                IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter)
            {
                CreateMapArrays();

                var totalSuffixCount = (from value in objs.Skip(index).Take(length)
                                        select GetSuffixCount(stringValueGetter.GetStringValue(value))
                                        ).Sum();

                /*
                 * var capacity = (int) Math.Sqrt( totalSuffixCount ); //(int) (Math.Sqrt( length - index ) + 1);
                 * var bpt = new BPlusTreeList< tuple_t >( default(tuple_t), capacity, capacity );
                 */
                int BLOCK_CAPACITY_4_LST = 512;
                var bpt = new BPlusTreeList <tuple_t>(default(tuple_t), ((int)(totalSuffixCount / BLOCK_CAPACITY_4_LST * 1.0 + 0.5) + 25), BLOCK_CAPACITY_4_LST);

                var set = new Set <suffix_t>(new suffix_t_IEqualityComparer());

                for (int i = index, end = index + length; i < end; i++)
                {
                    var str = stringValueGetter.GetStringValue(objs[i]);
                    #region test.commented.

                    /*
                     * if ( str == "м.бабий" )
                     * System.Diagnostics.Debugger.Break();
                     * var __ = GetSuffix( i, str ).Distinct().ToArray();
                     */
                    #endregion

                    var tuple = new tuple_t()
                    {
                        Data = new SimplyLinkedList <data_t>()
                    };
                    var tupleExists = default(tuple_t);

                    #region test.commented.

                    /*
                     * str = "м.бабий";
                     * var x1 = GetSuffixes( i, str ).ToArray();
                     * var x2 = GetSuffixes( i, str ).Distinct().ToArray();
                     * if ( x1.Length != x2.Length )
                     * {
                     *  foreach ( var suff_t in GetSuffixes( i, str ) )
                     *  {
                     *      set.Add( suff_t );
                     *  }
                     *  System.Diagnostics.Debug.Assert( set.Count == x2.Length );
                     * }
                     */
                    #endregion

                    foreach (var suff_t in GetSuffixes(i, str) /*.Distinct()*/)
                    {
                        if (!set.Add(suff_t))
                        {
                            continue;
                        }

                        var data = new data_t(suff_t.SuffixIndex, suff_t.WordIndex);
                        tuple.Suffix = suff_t.Suffix;
                        if (bpt.AddOrGetExistsValue(tuple, out tupleExists))
                        {
                            tuple.Data.Add(data);
                            tuple = new tuple_t()
                            {
                                Data = new SimplyLinkedList <data_t>()
                            };
                        }
                        else
                        {
                            tupleExists.Data.Add(data);
                        }
                    }
                    set.Clear();
                }

                DestroyMapArrays();

                var bpt_out = new BPlusTreeList <tuple_t>(default(tuple_t), bpt.Count / bpt.BlockCount, bpt.BlockCount);
                using (var e = bpt.GetEnumerator())
                {
                    if (e.MoveNext())
                    {
                        var root_tuple = e.Current;
                        bpt_out.Add(root_tuple);
                        for ( ; e.MoveNext();)
                        {
                            var tuple = e.Current;
                            if (root_tuple.Suffix.StartsWith(tuple.Suffix))
                            {
                                foreach (var data in tuple.Data)
                                {
                                    root_tuple.Data.Add(data);
                                }
                            }
                            else
                            {
                                root_tuple = tuple;
                                bpt_out.Add(root_tuple);
                            }
                        }
                    }
                }

                return(bpt_out);
            }