/// <summary> /// Reads all the words of a text file filename and sets the /// important (wanted) words into the sorted list sc. /// The occurrence of every word in the text is regarded. /// </summary> /// <param name="sc"></param> /// <param name="filename"></param> /// <param name="language"></param> /// <returns></returns> public static bool ReadHTMLFile(out SortedList <string, TagCloudControl.StringItem> sc, string filename, TagCloud.TagCloudControl.TextLanguage language) { // give all entries the same timestamp long last = DateTime.Now.Ticks; // read the exclude and include strings for the language into an include and an exclude list FillExcludeIncludeList(language, true); // read the exclude and include strings for HTML files and add them to the include and exclude list FillExcludeIncludeList(TagCloud.TagCloudControl.TextLanguage.HTML, false); sc = new SortedList <string, TagCloudControl.StringItem>(); try { if (!File.Exists(filename)) { return(false); } FileStream objFStreamRead = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read); StreamReader objSReader = new StreamReader(objFStreamRead, System.Text.Encoding.Default); string line; string delimStr = "\t#$&'=/<>+:“„;,. !?(){}[]\\–-_\"»«"; char[] delimiter = delimStr.ToCharArray(); string trimStr = "–-"; char[] trimmer = trimStr.ToCharArray(); while ((line = objSReader.ReadLine()) != null) { line = line.Trim(trimmer); string[] words = line.Split(delimiter); foreach (string word in words) { string newword = word.Trim(trimmer); // HTML filtered words if (WantedHTMLWord(newword)) { // Language filtered words if (WantedWord(newword)) { if (!sc.ContainsKey(newword)) { sc.Add(newword, new TagCloudControl.StringItem(newword, 1, true, last)); } else { // get the previous occurrence int ind = sc.IndexOfKey(newword); double occ = sc.Values[ind].Occurrence; sc.RemoveAt(ind); sc.Add(newword, new TagCloudControl.StringItem(newword, occ + 1, true, last)); } } } } // foreach } // while objSReader.Close(); return(true); } catch { return(false); } }
/// <summary> /// Sets the strings which should be excluded into the exclude list. /// Sets the strings which should be included into the include list. /// The strings are read from a file, depending on the parameter language. /// If clear == true, the lists will be cleared before being filled, else the /// new strings will be appended to the lists. /// </summary> /// <param name="language"></param> /// <param name="clear"></param> /// <returns></returns> private static int FillExcludeIncludeList(TagCloud.TagCloudControl.TextLanguage language, bool clear) { if (clear) { ExcludeList.Clear(); IncludeList.Clear(); } try { string txtfile = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); switch (language) { case TagCloudControl.TextLanguage.German: txtfile = Path.Combine(txtfile, "ExcludeList-de.txt"); break; case TagCloudControl.TextLanguage.HTML: txtfile = Path.Combine(txtfile, "ExcludeList-html.txt"); break; default: // default is english txtfile = Path.Combine(txtfile, "ExcludeList-en.txt"); break; } if (!File.Exists(txtfile)) { return(0); } FileStream objFStreamRead = new FileStream(txtfile, FileMode.Open, FileAccess.Read, FileShare.Read); StreamReader objSReader = new StreamReader(objFStreamRead, System.Text.Encoding.Default); bool exclude = false; bool include = false; string line; while ((line = objSReader.ReadLine()) != null) { line = line.Trim(); if (line.StartsWith("Exclude:")) { exclude = true; include = false; } if (line.StartsWith("Include:")) { exclude = false; include = true; } string[] items = line.Split(','); if (exclude) { foreach (string item in items) { if ((!ExcludeList.Contains(item)) && (item != "Exclude:")) { ExcludeList.Add(item); } } } if (include) { foreach (string item in items) { if ((!IncludeList.Contains(item)) && (item != "Include:")) { IncludeList.Add(item); } } } } objSReader.Close(); return(ExcludeList.Count); } catch { return(0); } }