/// <summary> /// 文章を解析しやすい形に前処理します。 /// </summary> /// <remarks> /// 単語に分割 /// ストップワードを除外 /// ステミング /// スペース区切りで単語をつないで戻す。 /// </remarks> /// <param name="text"></param> /// <returns></returns> private string CleanText(string text) { string[] words = text.Split(' '); var cleanWords = EnumerableValidWords(words).Select(word => _stemmer.StemWord(word)); return(string.Join(" ", cleanWords.ToArray())); }
public CrawlerLink(string address, string contents, IEnumerable <string> links) { Console.WriteLine("Processing contents..."); this.Address = address; // Store links this.Links = new HashSet <string>(links); // Tokenize Console.Write("Tokenizing... "); var tokens = new List <string>(contents.Split(' ')); Console.WriteLine("Done!"); Console.Write("Removing short and stop word tokens... "); tokens.RemoveAll(token => token.Length <= 1 || StopWords.StopWordsList.Contains(token)); Console.WriteLine("Done!"); // Generate shingle hashes Console.Write("Generating shingle hashes... "); var jaccard = new Jaccard(); this.ShingleHashes = new LinkedList <ulong>(jaccard.HashedShinglifyDocument(tokens.ToArray())); Console.WriteLine("Done!"); // Apply stemming Console.Write("Stemming tokens... "); var stemmer = new PorterStemmer(); var stemmedTokens = new List <string>(tokens.Select(token => stemmer.StemWord(token))); this.Tokens = new HashSet <string>(stemmedTokens); Console.WriteLine("Done!"); // Sort elements Console.Write("Sorting stemmed tokens... "); stemmedTokens.Sort(); Console.WriteLine("Done!"); // Get keyword count Console.Write("Adding stemmed tokens to dictionary... "); var lastKeyword = ""; var keywords = new Dictionary <string, int>(); foreach (var stemmedToken in stemmedTokens) { if (!stemmedToken.Equals(lastKeyword)) { lastKeyword = stemmedToken; keywords[stemmedToken] = 1; } else { keywords[stemmedToken] += 1; } } this.Keywords = keywords; Console.WriteLine("Done!"); }
public LinkedList <IndexEntry> Execute(string query, CrawlerRegistry registry, int maxResults = 25, bool usePageRank = false) { var tokens = new List <string>(query.ToLower().Split(' ')); tokens.RemoveAll(token => token.Length <= 1 || StopWords.StopWordsList.Contains(token)); var stemmer = new PorterStemmer(); var stemmedTokens = new HashSet <string>(tokens.Select(token => stemmer.StemWord(token.ToLower()))); return(this.Execute(stemmedTokens.ToList(), registry, maxResults, usePageRank)); }
//inverted index public Dictionary <string, Dictionary <string, double> > InvertedIndex(string folder) { converter = new Converter(); if (internalIndex != null) { internalIndex.Clear(); } // clears the memory usage of exisitng Index internalIndex = new Dictionary <string, Dictionary <string, double> >(); // the invertedIndex to be returned indexCount = 0; // a counter for how large the inverted index is. Dictionary <string, double> fileList = new Dictionary <string, double>(); // a list to populate the files that match a term PorterStemmer stemmer = new PorterStemmer(); // instantiate a PorterStemmer object to stem words from files foreach (string file in IndexingFolders(folder)) { //int fileID = converter.assignID(file); // create an Id from the string of the file and store in HashMap Converter.paths foreach (string word in ScanFiles.scanFiles(file)) { // stem the word string stemmedWord = stemmer.StemWord(word); // create the Dictionary for the collection if (internalIndex.ContainsKey(stemmedWord)) { fileList = internalIndex[stemmedWord]; // check if the file is already in the list or not if (fileList.ContainsKey(file)) { fileList[file] = double.Parse(fileList[file].ToString()) + 1; } else { fileList.Add(file, 1.0); } internalIndex[stemmedWord] = fileList; } else { // create a new key and start new List of files for the key fileList = new Dictionary <string, double> { { file, 1.0 } }; internalIndex.Add(stemmedWord, fileList); indexCount++; } } } return(internalIndex); }
public void Test_StemWordOutPut_Matches_StaticOutput() { string filepath = Path.GetDirectoryName(System.AppDomain.CurrentDomain.BaseDirectory); filepath = Directory.GetParent(Directory.GetParent(Directory.GetParent(filepath).FullName).FullName).FullName; List <string> StaticOutput = new List <string>(); List <string> TestOutput = new List <string>(); try { string staticOutputPath = filepath + @"\StemmerTestFiles\OutputWords.txt"; using (var stream = new StreamReader(staticOutputPath)) { string line = stream.ReadLine(); while (line != null) { StaticOutput.Add(line); line = stream.ReadLine(); } } } catch (IOException e) { Console.WriteLine("The file could not be read:"); Console.WriteLine(e.Message); } PorterStemmer ps = new PorterStemmer(); try { string staticOutputPath = filepath + @"\StemmerTestFiles\RawWords.txt"; using (var stream = new StreamReader(staticOutputPath)) { string line = stream.ReadLine(); while (line != null) { TestOutput.Add(ps.StemWord(line)); line = stream.ReadLine(); } } } catch (IOException e) { Console.WriteLine("The file could not be read:"); Console.WriteLine(e.Message); } Assert.IsTrue(StaticOutput.SequenceEqual(TestOutput)); }
/// <summary> /// Transforms the list of terms into their root form. /// Uses the Porter Stemming alogorithm. /// </summary> /// <param name="wordList">List of terms to edit</param> /// <returns></returns> public static List <string> StemWords(List <string> wordList) { var stemmer = new PorterStemmer(); List <string> stemmedList = new List <string>(); foreach (var item in wordList) { stemmedList.Add(stemmer.StemWord(item)); } return(stemmedList); }
/// <summary> /// 52. ステミング /// 51の出力を入力として受け取り,Porterのステミングアルゴリズムを適用し,単語と語幹をタブ区切り形式で出力せよ. Pythonでは,Porterのステミングアルゴリズムの実装としてstemmingモジュールを利用するとよい. /// </summary> public void Answer52() { PorterStemmer porterStemmer = new PorterStemmer(); foreach (var sentence in SplitSentence()) { foreach (var word in SplitWords(sentence)) { var stem = porterStemmer.StemWord(word); Console.WriteLine($"{word}\t{stem}"); } Console.WriteLine(); } }
private void searchButton_Click(object sender, EventArgs e) { PorterStemmer stemmer = new PorterStemmer(); // instantiate a PorterStemmer object to stem words from files string stemmedWord = stemmer.StemWord(searchWord.Text); //stems the word before searching fileList.Text = " "; filesFound.Text = " "; List <string> files = new List <string>(); bool found = false; if (thread.IsAlive) { MessageBox.Show("The index is currently busy. Please try again later"); //message shown if index building is in progress } else { foreach (var item in index.internalIndex) { string newItem = StopWords.RemoveStopwords(item.Key); if (newItem == stemmedWord) { found = true; filesFound.Text = item.Value.Count.ToString(); foreach (var folderName in item.Value.Keys) { files.Add(folderName); } } } foreach (var file in files) { fileList.Text += file + "\r\n"; } if (!found) { filesFound.Text = "0"; fileList.Text = "No results found"; } } }
static void Main(string[] args) { debug = false; // If set increases output of info string usage = "Usage options: \"verbose\" for additional output "; if (args.Length != 0) { switch (args[0]) { case "verbose": debug = true; break; default: Console.WriteLine(usage); break; } } string output; string[] tokens; StreamWriter outfile = new StreamWriter("dictionary.txt"); Console.WriteLine("Getting list of files for processing"); //************************************* // Get files from the current directory //************************************* string path = Directory.GetCurrentDirectory(); string[] docx = Directory.GetFiles(path, "*.docx"); string[] pdf = Directory.GetFiles(path, "*.pdf"); string[] rtf = Directory.GetFiles(path, "*.rtf"); string[] docArray = docx.Concat(Directory.GetFiles(path, "*.pdf")).ToArray(); docArray = docArray.Concat(rtf).ToArray(); Array.Sort(docArray); // Ensure sort order is maintained across the processing apps // list all docs found if (debug == true) { Console.WriteLine("We found the following list of files: "); foreach (var file in docArray) { Console.WriteLine(file); } } Console.WriteLine("Total Files found:{0}", docArray.Length); Dictionary <string, int> dictionary = new Dictionary <string, int>(); foreach (var file in docArray) { output = processdocument(file); if (fileopen == false) { continue; } string fname = file; fname = file.Replace(".docx", ".txt").Replace(".pdf", ".txt").Replace(".rtf", ".txt"); Console.WriteLine($"Writing file {fname} output..."); System.IO.StreamWriter writefile = new System.IO.StreamWriter(fname, true); // Create output file same name .txt //************* // tokenization //************* char[] separators = { '_', ' ', ',', '.', '-', ':', ';', '{', '}', '|', '\n', '\t', '\u2029', '\r' }; tokens = output.Split(separators, StringSplitOptions.RemoveEmptyEntries); var stemmer = new PorterStemmer(); string stem; // Token after stemming foreach (var token in tokens) { // Stem the current Token stemmed token left in stem stem = stemmer.StemWord(token); writefile.WriteLine(stem); // The Add method throws an exception if the new key is // already in the dictionary. try { dictionary.Add(stem, 1); } catch (ArgumentException) { dictionary[stem] += 1; // if exists increment the count } } writefile.Close(); // added this becuase for resumes of less than a page the file was empty } // Now write out the dicionary to a text file if (fileopen == true) { foreach (var entry in dictionary) { outfile.WriteLine("{0}, {1}", entry.Key, entry.Value); } outfile.Close(); // added this because for resumes of less than a page the dict was empty } Console.WriteLine("{0} Errors found", errorcount); }
static void Main(string[] args) { // start connection with database SqlConnection sqlConnection = new SqlConnection("Data Source=AHMEDFATHY-PC;Initial Catalog=newDB;Integrated Security=True; MultipleActiveResultSets=true"); sqlConnection.Open(); // select statment to retrieve everything from database string queryString = "SELECT * FROM crawler_Table"; SqlCommand cmd = new SqlCommand(queryString, sqlConnection); // declare variable from reader to read from database (all the content from database) SqlDataReader rdr = cmd.ExecuteReader(); int counterofopages = 0; // counter for number of pages that i read it from database (at least 1500) // datastructure to save term and doc_id and frequency and list of positions for this term List <KeyValuePair <string, KeyValuePair <int[], List <int> > > > indexmap = new List <KeyValuePair <string, KeyValuePair <int[], List <int> > > >(); // while loop to read row by row from the reader while (rdr.Read()) { // this condition to break from loop when take at least 1500 page if (counterofopages == 1600) { break; } // try and catch to throw any exceptions out if it retreive null from innertext or something else int boolll = 0; // boolean to check if the inner text has exception change boolean = 1 and skip tha link try { //===================================================// // retreive from each row docid , url (link) , content of the page (html page) int doc_id = (int)rdr["doc_id"]; string url = (string)rdr["URL"]; string content = (string)rdr["Page_Content"]; //===================================================// // pasre html page from database and get the inner text (step 1) IHTMLDocument2 myDoc = new HTMLDocumentClass(); myDoc.write(content); string elements = myDoc.body.innerText; //===================================================// //(it will be) /// split in (step 2) (to take tokens and save it in array of strings named (tokens) string[] tokens = elements.Split(',', ' ', '.', ':', '\t', '\n', '\r'); int i = 0; // counter to calculate the position for every term // check if any string it will be null or empty tokens = tokens.Where(x => !string.IsNullOrEmpty(x)).ToArray(); //===================================================// /// saves every term and its list (positions) (s in dictionary named (termsandpos) before removing stop words Dictionary <string, List <int> > termsandpos = new Dictionary <string, List <int> >(); foreach (var words in tokens) { List <int> listofpos = new List <int>(); i++; // using regex to remove punctuation characters from every word (step 3) -> req 1 string word = Regex.Replace(words, @"[^\w\d\s]", ""); word = Regex.Replace(word, @"\d", ""); // if the word is empty after removing punctuation characters continues and don't save it if (word == "") { continue; } // using spelling class from netspell reference and create object from it and using it to check if this word is real word in english or not. Spelling ss = new Spelling(); // when the object from spelling class is used , the dialog window will opened and has many feature and i will closed by using next line to continue my run it's not used for my code. ss.ShowDialog = false; // check if this word is not found in dictionary in the spell library , continue ( go to the next word). // esle continue the rest of the code (that is mean the word is found in the dictionary). if (ss.SpellCheck(word)) { continue; } word = word.ToLower(); //case folding in (step 3) -> req 2 //If the word is already existed ,add the new position in the list of this word if (termsandpos.ContainsKey(word)) { listofpos = termsandpos[word]; listofpos.Add(i); termsandpos[word] = listofpos; } // else, add the word and the first position else { listofpos.Add(i); termsandpos.Add(word, listofpos); } } //===================================================// ///// stop words removing in (step 3) -> req 3 /// list of stop words /// create anthor dictinary to copy all terms without stop words Dictionary <string, List <int> > temp = new Dictionary <string, List <int> >(); List <string> stopwords = new List <string>() { "a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the" }; for (int f = 0; f < termsandpos.Count; f++) { // if the term is already existed in the stopwords list or the term is a single character like ii or i , continue (and go to the next term). if (stopwords.Contains(termsandpos.Keys.ElementAt(f)) || termsandpos.Keys.ElementAt(f).Length <= 2) { continue; } // else ,that's mean the term is not a stop word then add it and its positions in the temp dictionary. else { List <int> copyofpositions = new List <int>(); copyofpositions = termsandpos[termsandpos.Keys.ElementAt(f)]; temp.Add(termsandpos.Keys.ElementAt(f), copyofpositions); } } //===================================================// //// al stemming algorithm (step 3) --> req 4 var stemmer = new PorterStemmer(); // declare object from claas of porterstemmer algorithm Dictionary <string, List <int> > finalterm = new Dictionary <string, List <int> >(); foreach (KeyValuePair <string, List <int> > iter1 in temp) { //===================================================// // add every term and its docid in table called (TermsBStemming_Table) in db before stemming (the note in step 3 -->req 4) string insertString3 = "INSERT INTO TermsBStemming_Table (termBstemming,docID) VALUES (@termBstemming,@docID)"; SqlCommand cmd3 = new SqlCommand(insertString3, sqlConnection); SqlParameter par1 = new SqlParameter("@termBstemming", iter1.Key); SqlParameter par2 = new SqlParameter("@docID", doc_id); cmd3.Parameters.Add(par1); cmd3.Parameters.Add(par2); cmd3.ExecuteNonQuery(); //===================================================// List <int> listofpositions = new List <int>(); // called function (StemWord) and send the term and return term after stemming string stem = stemmer.StemWord(iter1.Key); // check if this stem is already existed in finalterm dictionary (the new datastructure to save the term and its list after stemmnig) if (finalterm.ContainsKey(stem)) { List <int> tempforsimlir = new List <int>(); tempforsimlir = finalterm[stem]; // take the list of positions for this term (old positions added before for this term) listofpositions = temp[iter1.Key]; // take the list of new positions for this term /// added the new positions and old position in one list for (int j = 0; j < listofpositions.Count; j++) { tempforsimlir.Add(listofpositions[j]); } // and save it again for the term finalterm[stem] = tempforsimlir; } // addd the term ans its list to finalterm dictionary else { listofpositions = temp[iter1.Key]; finalterm.Add(stem, listofpositions); } } //===================================================// //// inverted index (step 4) foreach (KeyValuePair <string, List <int> > iter in finalterm) { int freq = iter.Value.Count; // calculate freq through count number of positions int[] arr = new int[2]; // save in this array doc id and the frequency arr[0] = doc_id; arr[1] = freq; // convert list of the positions for every term to string var resultofpositions = string.Join(", ", iter.Value); //===================================================// // save term and docid ans=d frequency and (list of positions as string ) in table called Inverted_Index in db. string insertString2 = "INSERT INTO Inverted_Index (Term,DocID,Frequency,position) VALUES (@Term,@DocID,@Frequency,@position)"; SqlCommand cmd2 = new SqlCommand(insertString2, sqlConnection); SqlParameter paramter1 = new SqlParameter("@Term", iter.Key); SqlParameter paramter2 = new SqlParameter("@DocID", doc_id); SqlParameter paramter3 = new SqlParameter("@Frequency", freq); SqlParameter paramter4 = new SqlParameter("@position", resultofpositions); cmd2.Parameters.Add(paramter1); cmd2.Parameters.Add(paramter2); cmd2.Parameters.Add(paramter3); cmd2.Parameters.Add(paramter4); cmd2.ExecuteNonQuery(); //===================================================// /// store in index list term and arrof ints (arr[0]=docid,arr[1] = freqs of every term) and list of all positions of this term (if i needed in ranks or something else). indexmap.Add(new KeyValuePair <string, KeyValuePair <int[], List <int> > >(iter.Key, new KeyValuePair <int[], List <int> >(arr, iter.Value))); } //===================================================// } //===================================================// //catch any type of exception and change the boolean that i decalred equal zero catch (NullReferenceException ex) { boolll = 1; Console.WriteLine(ex.Message); } catch (ArgumentOutOfRangeException exx) { boolll = 1; Console.WriteLine(exx.Message); } // if the boolean became equal 1 , then leave this link and go to anthor link if (boolll == 1) { continue; } //===================================================// /// to count number of pages (at least 1500 page) counterofopages++; //===================================================// } //===================================================// // close the reader from database rdr.Close(); /// close the connection sqlConnection.Close(); //===================================================// }
public QueryPart ParseQuery(string input) { var tokens = new List <string>(input.ToLower().Split(' ')); tokens.RemoveAll(token => token.Length <= 1 || StopWords.BooleanStopWordsList.Contains(token)); var stemmer = new PorterStemmer(); var stemmedTokens = new List <string>(tokens.Select(token => StopWords.BooleanWords.Contains(token.ToLower()) ? token.ToUpper() : stemmer.StemWord(token.ToLower()))); return(this.ParseQuery(stemmedTokens)); }
public static string SummarizeByLSA(TextFile textFile) { string input = textFile.RawText; string[] sentences = input.Split(new char[] { '.', '!', '?', ':', '…', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < sentences.Length; ++i) { var sb = new StringBuilder(); string sentence = sentences[i].Trim(); foreach (char c in sentence) { if (!char.IsPunctuation(c)) { sb.Append(c); } } sentences[i] = sb.ToString().ToLower(); } // Remove stop words--e.g., the, and, a, etc. string[] stopwords = File.ReadAllLines(@"Resources/stopwords.txt"); for (int i = 0; i < sentences.Count(); ++i) { string sentence = sentences[i]; for (int j = 0; j < stopwords.Count(); ++j) { sentences[i] = string.Join(" ", sentence.Split(' ').Where(wrd => !stopwords.Contains(wrd))); } } // Reduce words to their stem. PorterStemmer stemmer = new PorterStemmer(); for (int i = 0; i < sentences.Count(); ++i) { sentences[i] = stemmer.StemWord(sentences[i]); } Dictionary <string, int> wordFrequencies = new Dictionary <string, int>(); foreach (string s in sentences) { string[] words = s.Split(' '); foreach (string w in words) { if (wordFrequencies.ContainsKey(w)) { wordFrequencies[w] += 1; } else { wordFrequencies[w] = 1; } } } // Top N words with highest frequencies will serve as document concepts. int N = textFile.DesiredSummaryLength; string[] concepts = (from kvp in wordFrequencies orderby kvp.Value descending select kvp) .ToDictionary(pair => pair.Key, pair => pair.Value).Take(N) .Select(k => k.Key).ToArray(); // Add concepts to TextFile instance properties. textFile.DocumentConcepts = concepts; int documentLength = sentences.Length; var X = DenseMatrix.Create(N, documentLength, (i, j) => 0.0); for (int i = 0; i < X.RowCount; ++i) { int sentencesWithConcept = 0; string concept = concepts[i]; for (int j = 0; j < X.ColumnCount; ++j) { string[] sentenceWords = sentences[j].Split(' '); int wordCount = (from word in sentenceWords where word == concept select word) .Count(); if (wordCount > 0) { sentencesWithConcept += 1; } X[i, j] = wordCount / sentenceWords.Length; } if (sentencesWithConcept == 0) { Console.WriteLine("No sentences with concept " + concepts[i]); } double inverseDocumentFreq = Math.Log(documentLength / (sentencesWithConcept + 0.0001), 2.0); for (int k = 0; k < X.ColumnCount; ++k) { X[i, k] = X[i, k] * inverseDocumentFreq; } } // Compute SVD of the topic representation matrix, X. var svd = X.Svd(); // Cross method to select summary sentences. int columnCount = svd.VT.ColumnCount; Matrix <double> Vh = svd.VT.SubMatrix(0, concepts.Length, 0, columnCount).PointwiseAbs(); for (int i = 0; i < Vh.RowCount; ++i) { double averageSentenceScore = Vh.Row(i).Average(); for (int j = 0; j < Vh.ColumnCount; ++j) { if (Vh[i, j] <= averageSentenceScore) { Vh[i, j] = 0; } } } var sentenceLengths = Vh.RowSums(); int[] summaryIndices = new int[Vh.RowCount]; Console.Write("Vh.RowCnt = ", Vh.RowCount); Console.Write("concepts.Length = ", concepts.Length); for (int i = 0; i < Vh.RowCount; ++i) { double max = 0; for (int j = 0; j < Vh.ColumnCount; ++j) { if (Vh[i, j] > max) { max = Vh[i, j]; summaryIndices[i] = j; } } } string[] sourceSentences = Regex.Split(input, @"(?<=[\.!\?])\s+"); textFile.DocumentLength = sourceSentences.Length; string summary = ""; foreach (int i in summaryIndices) { summary += sourceSentences[i] + " "; } /* From https://bit.ly/3ogjy2l */ return(summary.Replace("\r\n", string.Empty) .Replace("\n", string.Empty) .Replace("\r", string.Empty) .Replace("\t", string.Empty) .Replace(((char)0x2028).ToString(), string.Empty) .Replace(((char)0x2029).ToString(), string.Empty)); }