コード例 #1
0
        /// <summary>
        /// 获得指定键中出现频率最高的键。
        /// </summary>
        /// <typeparam name="TKey">键类型。</typeparam>
        /// <param name="st">用于计算的符号表。</param>
        /// <param name="keys">所有的键。</param>
        /// <returns><paramref name="keys"/> 中出现频率最高的键。</returns>
        public static TKey MostFrequentlyKey <TKey>(IST <TKey, int> st, TKey[] keys)
        {
            foreach (var s in keys)
            {
                if (st.Contains(s))
                {
                    st.Put(s, st.Get(s) + 1);
                }
                else
                {
                    st.Put(s, 1);
                }
            }

            var max = keys[0];

            foreach (var s in st.Keys())
            {
                if (st.Get(s) > st.Get(max))
                {
                    max = s;
                }
            }

            return(max);
        }
コード例 #2
0
        /// <summary>
        /// 计算指定文本文档中出现频率最高的字符串,
        /// 保存 <see cref="IST{TKey, TValue}.Get(TKey)"/>
        /// 和 <see cref="IST{TKey, TValue}.Put(TKey, TValue)"/>
        /// 的调用次数以及对应的耗时。
        /// </summary>
        /// <param name="filename">文件名。</param>
        /// <param name="minLength">字符串最小长度。</param>
        /// <param name="st">用于计算的符号表。</param>
        /// <param name="callIndex">调用次数。</param>
        /// <param name="timeRecord">对应耗时。</param>
        public static void MostFrequentlyWordAnalysis(string filename, int minLength, IST <string, int> st, out int[] callIndex, out long[] timeRecord)
        {
            var call = new List <int>();
            var time = new List <long>();
            var sw   = Stopwatch.StartNew();

            var callTime = 0;
            int distinct = 0, words = 0;
            var sr = new StreamReader(File.OpenRead(filename));

            var inputs =
                sr
                .ReadToEnd()
                .Split(new char[] { ' ', '\r', '\n' },
                       StringSplitOptions.RemoveEmptyEntries);

            for (var i = 0; i < inputs.Length; i++)
            {
                if (inputs[i].Length < minLength)
                {
                    continue;
                }
                words++;
                if (st.Contains(inputs[i]))
                {
                    st.Put(inputs[i], st.Get(inputs[i]) + 1);
                    callTime += 2;
                    time.Add(sw.ElapsedMilliseconds);
                    call.Add(callTime);
                }
                else
                {
                    st.Put(inputs[i], 1);
                    callTime++;
                    time.Add(sw.ElapsedMilliseconds);
                    call.Add(callTime);
                    distinct++;
                }
            }

            var max = "";

            st.Put(max, 0);
            callTime++;
            time.Add(sw.ElapsedMilliseconds);
            call.Add(callTime);
            foreach (var s in st.Keys())
            {
                if (st.Get(s) > st.Get(max))
                {
                    max = s;
                }
                callTime += 2;
                time.Add(sw.ElapsedMilliseconds);
                call.Add(callTime);
            }

            callIndex  = call.ToArray();
            timeRecord = time.ToArray();
        }
コード例 #3
0
        /// <summary>
        /// 计算数组中不重复元素的数量。
        /// </summary>
        /// <typeparam name="TKey">数组元素的类型。</typeparam>
        /// <param name="keys">包含重复元素的数组。</param>
        /// <param name="st">用于计算的符号表。</param>
        /// <returns><paramref name="keys"/> 中的不重复元素数量。</returns>
        public static int CountDistinct <TKey>(TKey[] keys, IST <TKey, int> st)
        {
            var distinct = 0;

            for (var i = 0; i < keys.Length; i++)
            {
                if (!st.Contains(keys[i]))
                {
                    st.Put(keys[i], ++distinct);
                }
            }
            return(distinct);
        }
コード例 #4
0
        /// <summary>
        /// 获得指定文本文档中出现频率最高的所有字符串。
        /// </summary>
        /// <param name="filename">文件名。</param>
        /// <param name="minLength">字符串最小长度。</param>
        /// <param name="st">用于计算的符号表。</param>
        /// <returns>文本文档出现频率最高的字符串数组。</returns>
        public static string[] MostFrequentlyWords(string filename, int minLength, IST <string, int> st)
        {
            int distinct = 0, words = 0;
            var sr = new StreamReader(File.OpenRead(filename));

            var inputs =
                sr
                .ReadToEnd()
                .Split(new char[] { ' ', '\r', '\n' },
                       StringSplitOptions.RemoveEmptyEntries);

            foreach (var s in inputs)
            {
                if (s.Length < minLength)
                {
                    continue;
                }
                words++;
                if (st.Contains(s))
                {
                    st.Put(s, st.Get(s) + 1);
                }
                else
                {
                    st.Put(s, 1);
                    distinct++;
                }
            }

            var max   = "";
            var queue = new Queue <string>();

            st.Put(max, 0);
            foreach (var s in st.Keys())
            {
                if (st.Get(s) > st.Get(max))
                {
                    max = s;
                    queue.Clear();
                    queue.Enqueue(s);
                }
                else if (st.Get(s) == st.Get(max))
                {
                    queue.Enqueue(s);
                }
            }

            return(queue.ToArray());
        }
コード例 #5
0
        /// <summary>
        /// 获得指定文本文档中出现频率最高的字符串。
        /// </summary>
        /// <param name="filename">文件名。</param>
        /// <param name="minLength">字符串最小长度。</param>
        /// <param name="st">用于计算的符号表。</param>
        /// <returns>文本文档出现频率最高的字符串。</returns>
        public static string MostFrequentlyWord(string filename, int minLength, IST <string, int> st)
        {
            int distinct = 0, words = 0;
            var sr = new StreamReader(File.OpenRead(filename));

            var inputs =
                sr
                .ReadToEnd()
                .Split(new char[] { ' ', '\r', '\n' },
                       StringSplitOptions.RemoveEmptyEntries);

            var lastPut = "";

            foreach (var s in inputs)
            {
                if (s.Length < minLength)
                {
                    continue;
                }
                words++;
                if (st.Contains(s))
                {
                    lastPut = s;
                    st.Put(s, st.Get(s) + 1);
                }
                else
                {
                    lastPut = s;
                    st.Put(s, 1);
                    distinct++;
                }
            }

            Console.WriteLine("Last Put: " + lastPut + "\t words count: " + words);

            var max = "";

            st.Put(max, 0);
            foreach (var s in st.Keys())
            {
                if (st.Get(s) > st.Get(max))
                {
                    max = s;
                }
            }

            return(max);
        }
コード例 #6
0
        /// <summary>
        /// 获得指定文本文档中出现频率最高的字符串。
        /// </summary>
        /// <param name="filename">文件名。</param>
        /// <param name="counts">从文件读入的单词数目。</param>
        /// <param name="minLength">字符串最小长度。</param>
        /// <param name="st">用于计算的符号表。</param>
        /// <returns>文本文档出现频率最高的字符串。</returns>
        public static string MostFrequentlyWord(string filename, int counts, int minLength, IST <string, int> st)
        {
            int distinct = 0, words = 0;
            var sr = new StreamReader(File.OpenRead(filename));

            var inputs =
                sr
                .ReadToEnd()
                .Split(new char[] { ' ', '\r', '\n' },
                       StringSplitOptions.RemoveEmptyEntries);

            for (var i = 0; i < counts && i < inputs.Length; i++)
            {
                if (inputs[i].Length < minLength)
                {
                    counts++;
                    continue;
                }
                words++;
                if (st.Contains(inputs[i]))
                {
                    st.Put(inputs[i], st.Get(inputs[i]) + 1);
                }
                else
                {
                    st.Put(inputs[i], 1);
                    distinct++;
                }
            }

            var max = "";

            st.Put(max, 0);
            foreach (var s in st.Keys())
            {
                if (st.Get(s) > st.Get(max))
                {
                    max = s;
                }
            }

            return(max);
        }