public SuffixArray_v2(IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter) { if (objs == null) { throw (new ArgumentNullException(nameof(objs))); } if ((length <= 0) || (length <= index) || (objs.Count < length)) { throw (new ArgumentException("index-or-length")); } if (stringValueGetter == null) { throw (new ArgumentNullException(nameof(stringValueGetter))); } //if ( values.Any( s => string.IsNullOrEmpty( s ) ) ) throw (new ArgumentNullException("values.Any()")); _Objects = objs; _StringValueGetter = stringValueGetter; _BPT = SuffixArrayBuilder.Build(objs, index, length, stringValueGetter); }
public static BPlusTreeList <tuple_t> Build( IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter) { CreateMapArrays(); var totalSuffixCount = (from value in objs.Skip(index).Take(length) select GetSuffixCount(stringValueGetter.GetStringValue(value)) ).Sum(); /* * var capacity = (int) Math.Sqrt( totalSuffixCount ); //(int) (Math.Sqrt( length - index ) + 1); * var bpt = new BPlusTreeList< tuple_t >( default(tuple_t), capacity, capacity ); */ int BLOCK_CAPACITY_4_LST = 512; var bpt = new BPlusTreeList <tuple_t>(default(tuple_t), ((int)(totalSuffixCount / BLOCK_CAPACITY_4_LST * 1.0 + 0.5) + 25), BLOCK_CAPACITY_4_LST); var set = new Set <suffix_t>(new suffix_t_IEqualityComparer()); for (int i = index, end = index + length; i < end; i++) { var str = stringValueGetter.GetStringValue(objs[i]); #region test.commented. /* * if ( str == "м.бабий" ) * System.Diagnostics.Debugger.Break(); * var __ = GetSuffix( i, str ).Distinct().ToArray(); */ #endregion var tuple = new tuple_t() { Data = new SimplyLinkedList <data_t>() }; var tupleExists = default(tuple_t); #region test.commented. /* * str = "м.бабий"; * var x1 = GetSuffixes( i, str ).ToArray(); * var x2 = GetSuffixes( i, str ).Distinct().ToArray(); * if ( x1.Length != x2.Length ) * { * foreach ( var suff_t in GetSuffixes( i, str ) ) * { * set.Add( suff_t ); * } * System.Diagnostics.Debug.Assert( set.Count == x2.Length ); * } */ #endregion foreach (var suff_t in GetSuffixes(i, str) /*.Distinct()*/) { if (!set.Add(suff_t)) { continue; } var data = new data_t(suff_t.SuffixIndex, suff_t.WordIndex); tuple.Suffix = suff_t.Suffix; if (bpt.AddOrGetExistsValue(tuple, out tupleExists)) { tuple.Data.Add(data); tuple = new tuple_t() { Data = new SimplyLinkedList <data_t>() }; } else { tupleExists.Data.Add(data); } } set.Clear(); } DestroyMapArrays(); var bpt_out = new BPlusTreeList <tuple_t>(default(tuple_t), bpt.Count / bpt.BlockCount, bpt.BlockCount); using (var e = bpt.GetEnumerator()) { if (e.MoveNext()) { var root_tuple = e.Current; bpt_out.Add(root_tuple); for ( ; e.MoveNext();) { var tuple = e.Current; if (root_tuple.Suffix.StartsWith(tuple.Suffix)) { foreach (var data in tuple.Data) { root_tuple.Data.Add(data); } } else { root_tuple = tuple; bpt_out.Add(root_tuple); } } } } return(bpt_out); }