Exemplo n.º 1
0
        /// <summary>
        /// Reads all the words of a text file filename and sets the
        /// important (wanted) words into the sorted list sc.
        /// The occurrence of every word in the text is regarded.
        /// </summary>
        /// <param name="sc"></param>
        /// <param name="filename"></param>
        /// <param name="language"></param>
        /// <returns></returns>
        public static bool ReadHTMLFile(out SortedList <string, TagCloudControl.StringItem> sc, string filename, TagCloud.TagCloudControl.TextLanguage language)
        {
            // give all entries the same timestamp
            long last = DateTime.Now.Ticks;

            // read the exclude and include strings for the language into an include and an exclude list
            FillExcludeIncludeList(language, true);
            // read the exclude and include strings for HTML files and add them to the include and exclude list
            FillExcludeIncludeList(TagCloud.TagCloudControl.TextLanguage.HTML, false);
            sc = new SortedList <string, TagCloudControl.StringItem>();
            try
            {
                if (!File.Exists(filename))
                {
                    return(false);
                }
                FileStream   objFStreamRead = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read);
                StreamReader objSReader     = new StreamReader(objFStreamRead, System.Text.Encoding.Default);
                string       line;
                string       delimStr  = "\t#$&'=/<>+:“„;,. !?(){}[]\\–-_\"»«";
                char[]       delimiter = delimStr.ToCharArray();

                string trimStr = "–-";
                char[] trimmer = trimStr.ToCharArray();
                while ((line = objSReader.ReadLine()) != null)
                {
                    line = line.Trim(trimmer);
                    string[] words = line.Split(delimiter);
                    foreach (string word in words)
                    {
                        string newword = word.Trim(trimmer);
                        // HTML filtered words
                        if (WantedHTMLWord(newword))
                        {
                            // Language filtered words
                            if (WantedWord(newword))
                            {
                                if (!sc.ContainsKey(newword))
                                {
                                    sc.Add(newword, new TagCloudControl.StringItem(newword, 1, true, last));
                                }
                                else
                                {
                                    // get the previous occurrence
                                    int    ind = sc.IndexOfKey(newword);
                                    double occ = sc.Values[ind].Occurrence;
                                    sc.RemoveAt(ind);
                                    sc.Add(newword, new TagCloudControl.StringItem(newword, occ + 1, true, last));
                                }
                            }
                        }
                    } // foreach
                }     // while
                objSReader.Close();
                return(true);
            }
            catch
            {
                return(false);
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Sets the strings which should be excluded into the exclude list.
        /// Sets the strings which should be included into the include list.
        /// The strings are read from a file, depending on the parameter language.
        /// If clear == true, the lists will be cleared before being filled, else the
        /// new strings will be appended to the lists.
        /// </summary>
        /// <param name="language"></param>
        /// <param name="clear"></param>
        /// <returns></returns>
        private static int FillExcludeIncludeList(TagCloud.TagCloudControl.TextLanguage language, bool clear)
        {
            if (clear)
            {
                ExcludeList.Clear();
                IncludeList.Clear();
            }
            try
            {
                string txtfile = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
                switch (language)
                {
                case TagCloudControl.TextLanguage.German:
                    txtfile = Path.Combine(txtfile, "ExcludeList-de.txt");
                    break;

                case TagCloudControl.TextLanguage.HTML:
                    txtfile = Path.Combine(txtfile, "ExcludeList-html.txt");
                    break;

                default:   // default is english
                    txtfile = Path.Combine(txtfile, "ExcludeList-en.txt");
                    break;
                }
                if (!File.Exists(txtfile))
                {
                    return(0);
                }
                FileStream   objFStreamRead = new FileStream(txtfile, FileMode.Open, FileAccess.Read, FileShare.Read);
                StreamReader objSReader     = new StreamReader(objFStreamRead, System.Text.Encoding.Default);

                bool   exclude = false;
                bool   include = false;
                string line;
                while ((line = objSReader.ReadLine()) != null)
                {
                    line = line.Trim();
                    if (line.StartsWith("Exclude:"))
                    {
                        exclude = true;
                        include = false;
                    }
                    if (line.StartsWith("Include:"))
                    {
                        exclude = false;
                        include = true;
                    }
                    string[] items = line.Split(',');
                    if (exclude)
                    {
                        foreach (string item in items)
                        {
                            if ((!ExcludeList.Contains(item)) && (item != "Exclude:"))
                            {
                                ExcludeList.Add(item);
                            }
                        }
                    }
                    if (include)
                    {
                        foreach (string item in items)
                        {
                            if ((!IncludeList.Contains(item)) && (item != "Include:"))
                            {
                                IncludeList.Add(item);
                            }
                        }
                    }
                }
                objSReader.Close();
                return(ExcludeList.Count);
            }
            catch
            {
                return(0);
            }
        }