FindCount(string suffix, FindModeEnum findMode = FindModeEnum.IgnoreCase) { suffix = CorrectFindSuffix(suffix, findMode); var findCount = 0; var tuple = new tuple_t() { Suffix = suffix }; var bpt_tuples = _BPT.GetValues(tuple, StartsWithStringComparer.Inst); foreach (var bpt_tuple in bpt_tuples) { foreach (var data in bpt_tuple.Data) { var word = _StringValueGetter.GetStringValue(_Objects[data.ObjIndex]); var endIndex = data.SuffixIndex + suffix.Length; if (endIndex <= word.Length) { findCount++; } } } return(findCount); }
Find(string suffix, int maxCount, out int findTotalCount, FindModeEnum findMode = FindModeEnum.IgnoreCase) { findTotalCount = 0; suffix = CorrectFindSuffix(suffix, findMode); var tuple = new tuple_t() { Suffix = suffix }; var bpt_tuples = _BPT.GetValues(tuple, StartsWithStringComparer.Inst); var frs = new LinkedList <find_result_t>(); foreach (var bpt_tuple in bpt_tuples) { foreach (var data in bpt_tuple.Data) { var word = _StringValueGetter.GetStringValue(_Objects[data.ObjIndex]); var endIndex = data.SuffixIndex + suffix.Length; if (endIndex <= word.Length) { if (++findTotalCount <= maxCount) { frs.AddFirst(find_result_t.Create(data.ObjIndex, word, data.SuffixIndex, suffix.Length)); } } } } return(frs.Any() ? frs.ToArray() : find_result_t.EMPTY); }
ContainsKey(string suffix, FindModeEnum findMode = FindModeEnum.IgnoreCase) { suffix = CorrectFindSuffix(suffix, findMode); var tuple = new tuple_t() { Suffix = suffix }; var bpt_tuples = _BPT.GetValues(tuple, StartsWithStringComparer.Inst); return(bpt_tuples.Any()); }
public static BPlusTreeList <tuple_t> Build( IList <T> objs, int index, int length, IStringValueGetter <T> stringValueGetter) { CreateMapArrays(); var totalSuffixCount = (from value in objs.Skip(index).Take(length) select GetSuffixCount(stringValueGetter.GetStringValue(value)) ).Sum(); /* * var capacity = (int) Math.Sqrt( totalSuffixCount ); //(int) (Math.Sqrt( length - index ) + 1); * var bpt = new BPlusTreeList< tuple_t >( default(tuple_t), capacity, capacity ); */ int BLOCK_CAPACITY_4_LST = 512; var bpt = new BPlusTreeList <tuple_t>(default(tuple_t), ((int)(totalSuffixCount / BLOCK_CAPACITY_4_LST * 1.0 + 0.5) + 25), BLOCK_CAPACITY_4_LST); var set = new Set <suffix_t>(new suffix_t_IEqualityComparer()); for (int i = index, end = index + length; i < end; i++) { var str = stringValueGetter.GetStringValue(objs[i]); #region test.commented. /* * if ( str == "м.бабий" ) * System.Diagnostics.Debugger.Break(); * var __ = GetSuffix( i, str ).Distinct().ToArray(); */ #endregion var tuple = new tuple_t() { Data = new SimplyLinkedList <data_t>() }; var tupleExists = default(tuple_t); #region test.commented. /* * str = "м.бабий"; * var x1 = GetSuffixes( i, str ).ToArray(); * var x2 = GetSuffixes( i, str ).Distinct().ToArray(); * if ( x1.Length != x2.Length ) * { * foreach ( var suff_t in GetSuffixes( i, str ) ) * { * set.Add( suff_t ); * } * System.Diagnostics.Debug.Assert( set.Count == x2.Length ); * } */ #endregion foreach (var suff_t in GetSuffixes(i, str) /*.Distinct()*/) { if (!set.Add(suff_t)) { continue; } var data = new data_t(suff_t.SuffixIndex, suff_t.WordIndex); tuple.Suffix = suff_t.Suffix; if (bpt.AddOrGetExistsValue(tuple, out tupleExists)) { tuple.Data.Add(data); tuple = new tuple_t() { Data = new SimplyLinkedList <data_t>() }; } else { tupleExists.Data.Add(data); } } set.Clear(); } DestroyMapArrays(); var bpt_out = new BPlusTreeList <tuple_t>(default(tuple_t), bpt.Count / bpt.BlockCount, bpt.BlockCount); using (var e = bpt.GetEnumerator()) { if (e.MoveNext()) { var root_tuple = e.Current; bpt_out.Add(root_tuple); for ( ; e.MoveNext();) { var tuple = e.Current; if (root_tuple.Suffix.StartsWith(tuple.Suffix)) { foreach (var data in tuple.Data) { root_tuple.Data.Add(data); } } else { root_tuple = tuple; bpt_out.Add(root_tuple); } } } } return(bpt_out); }