/// <summary> /// 获得指定键中出现频率最高的键。 /// </summary> /// <typeparam name="TKey">键类型。</typeparam> /// <param name="st">用于计算的符号表。</param> /// <param name="keys">所有的键。</param> /// <returns><paramref name="keys"/> 中出现频率最高的键。</returns> public static TKey MostFrequentlyKey <TKey>(IST <TKey, int> st, TKey[] keys) { foreach (var s in keys) { if (st.Contains(s)) { st.Put(s, st.Get(s) + 1); } else { st.Put(s, 1); } } var max = keys[0]; foreach (var s in st.Keys()) { if (st.Get(s) > st.Get(max)) { max = s; } } return(max); }
/// <summary> /// 计算指定文本文档中出现频率最高的字符串, /// 保存 <see cref="IST{TKey, TValue}.Get(TKey)"/> /// 和 <see cref="IST{TKey, TValue}.Put(TKey, TValue)"/> /// 的调用次数以及对应的耗时。 /// </summary> /// <param name="filename">文件名。</param> /// <param name="minLength">字符串最小长度。</param> /// <param name="st">用于计算的符号表。</param> /// <param name="callIndex">调用次数。</param> /// <param name="timeRecord">对应耗时。</param> public static void MostFrequentlyWordAnalysis(string filename, int minLength, IST <string, int> st, out int[] callIndex, out long[] timeRecord) { var call = new List <int>(); var time = new List <long>(); var sw = Stopwatch.StartNew(); var callTime = 0; int distinct = 0, words = 0; var sr = new StreamReader(File.OpenRead(filename)); var inputs = sr .ReadToEnd() .Split(new char[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); for (var i = 0; i < inputs.Length; i++) { if (inputs[i].Length < minLength) { continue; } words++; if (st.Contains(inputs[i])) { st.Put(inputs[i], st.Get(inputs[i]) + 1); callTime += 2; time.Add(sw.ElapsedMilliseconds); call.Add(callTime); } else { st.Put(inputs[i], 1); callTime++; time.Add(sw.ElapsedMilliseconds); call.Add(callTime); distinct++; } } var max = ""; st.Put(max, 0); callTime++; time.Add(sw.ElapsedMilliseconds); call.Add(callTime); foreach (var s in st.Keys()) { if (st.Get(s) > st.Get(max)) { max = s; } callTime += 2; time.Add(sw.ElapsedMilliseconds); call.Add(callTime); } callIndex = call.ToArray(); timeRecord = time.ToArray(); }
/// <summary> /// 获得指定文本文档中出现频率最高的所有字符串。 /// </summary> /// <param name="filename">文件名。</param> /// <param name="minLength">字符串最小长度。</param> /// <param name="st">用于计算的符号表。</param> /// <returns>文本文档出现频率最高的字符串数组。</returns> public static string[] MostFrequentlyWords(string filename, int minLength, IST <string, int> st) { int distinct = 0, words = 0; var sr = new StreamReader(File.OpenRead(filename)); var inputs = sr .ReadToEnd() .Split(new char[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (var s in inputs) { if (s.Length < minLength) { continue; } words++; if (st.Contains(s)) { st.Put(s, st.Get(s) + 1); } else { st.Put(s, 1); distinct++; } } var max = ""; var queue = new Queue <string>(); st.Put(max, 0); foreach (var s in st.Keys()) { if (st.Get(s) > st.Get(max)) { max = s; queue.Clear(); queue.Enqueue(s); } else if (st.Get(s) == st.Get(max)) { queue.Enqueue(s); } } return(queue.ToArray()); }
/// <summary> /// 获得指定文本文档中出现频率最高的字符串。 /// </summary> /// <param name="filename">文件名。</param> /// <param name="minLength">字符串最小长度。</param> /// <param name="st">用于计算的符号表。</param> /// <returns>文本文档出现频率最高的字符串。</returns> public static string MostFrequentlyWord(string filename, int minLength, IST <string, int> st) { int distinct = 0, words = 0; var sr = new StreamReader(File.OpenRead(filename)); var inputs = sr .ReadToEnd() .Split(new char[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); var lastPut = ""; foreach (var s in inputs) { if (s.Length < minLength) { continue; } words++; if (st.Contains(s)) { lastPut = s; st.Put(s, st.Get(s) + 1); } else { lastPut = s; st.Put(s, 1); distinct++; } } Console.WriteLine("Last Put: " + lastPut + "\t words count: " + words); var max = ""; st.Put(max, 0); foreach (var s in st.Keys()) { if (st.Get(s) > st.Get(max)) { max = s; } } return(max); }
/// <summary> /// 获得指定文本文档中出现频率最高的字符串。 /// </summary> /// <param name="filename">文件名。</param> /// <param name="counts">从文件读入的单词数目。</param> /// <param name="minLength">字符串最小长度。</param> /// <param name="st">用于计算的符号表。</param> /// <returns>文本文档出现频率最高的字符串。</returns> public static string MostFrequentlyWord(string filename, int counts, int minLength, IST <string, int> st) { int distinct = 0, words = 0; var sr = new StreamReader(File.OpenRead(filename)); var inputs = sr .ReadToEnd() .Split(new char[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); for (var i = 0; i < counts && i < inputs.Length; i++) { if (inputs[i].Length < minLength) { counts++; continue; } words++; if (st.Contains(inputs[i])) { st.Put(inputs[i], st.Get(inputs[i]) + 1); } else { st.Put(inputs[i], 1); distinct++; } } var max = ""; st.Put(max, 0); foreach (var s in st.Keys()) { if (st.Get(s) > st.Get(max)) { max = s; } } return(max); }
/// <summary> /// 计算数组中不重复元素的数量。 /// </summary> /// <typeparam name="TKey">数组元素的类型。</typeparam> /// <param name="keys">包含重复元素的数组。</param> /// <param name="st">用于计算的符号表。</param> /// <returns><paramref name="keys"/> 中的不重复元素数量。</returns> public static int CountDistinct <TKey>(TKey[] keys, IST <TKey, int> st) { var distinct = 0; for (var i = 0; i < keys.Length; i++) { if (!st.Contains(keys[i])) { st.Put(keys[i], ++distinct); } } return(distinct); }
/// <summary> /// 对符号表进行性能测试,先 <see cref="IST{TKey, TValue}.Put(TKey, TValue)"/> <paramref name="n"/> 个字符串, /// 再进行若干次 <see cref="IST{TKey, TValue}.Get(TKey)"/>, /// 使得每个元素被平均访问 <paramref name="averageHit"/> 次, /// 以及同样多的未命中访问。 /// </summary> /// <param name="st">需要进行性能测试的符号表。</param> /// <param name="n">需要插入符号表中的字符串数量。</param> /// <param name="averageHit">平均每个元素被查询的次数。</param> /// <returns>测试耗时,单位为毫秒。</returns> public static long Performance(IST <string, int> st, int n, int averageHit) { var keys = GetRandomArrayString(n, 2, 50); var keyNotExist = GetRandomString(51, 52); var sw = Stopwatch.StartNew(); // 构建 for (var i = 0; i < n; i++) { st.Put(keys[i], i); } // 查询 for (var i = 0; i < averageHit; i++) { for (var j = 0; j < n; j++) { st.Get(keys[j]); st.Get(keyNotExist); } } sw.Stop(); return(sw.ElapsedMilliseconds); }
static void Test(int n, IST <int, int> st) { var data = new int[n]; for (var i = 0; i < n; i++) { data[i] = i; } Shuffle(data); var sw = Stopwatch.StartNew(); foreach (var item in data) { st.Put(item, item); } sw.Stop(); Console.WriteLine("Random Put " + n + ":" + sw.ElapsedMilliseconds + "ms"); Shuffle(data); sw.Restart(); foreach (var item in data) { st.Get(item); } sw.Stop(); Console.WriteLine("Random Get " + n + ":" + sw.ElapsedMilliseconds + "ms"); Shuffle(data); sw.Restart(); foreach (var item in data) { st.Delete(item); } sw.Stop(); Console.WriteLine("Random Delete " + n + ":" + sw.ElapsedMilliseconds + "ms"); }