/// <summary> /// Copy this WordsCollection to another one /// starting at the specified index position /// </summary> /// <param name="col" type="WordsCollection"> /// WordsCollection to be copied to /// </param> /// <param name="index" type="integer"> /// Starting index to begin copy operations /// </param> public void CopyTo(WordsCollection col, int index) { for (int i = index; i < List.Count; i++) { col.Add(this[i]); } }
internal void CopyTo(WordsCollection col, int index) { for (var i = index; i < List.Count; i++) { col.Add(this[i]); } }
//TODO: use nhibernate join private async void LoadWordsCollection(NHibernate.Criterion.ICriterion expression = null) { var allWords = new List <Word>(); WordsCollection.Clear(); var collection = await WordsService.GetDataAsync <Word>(expression); if (collection != null) { foreach (var item in collection) { if (!allWords.Exists(x => x.Id == item.Id)) { allWords.Add(item); } } } foreach (var item in allWords) { WordsCollection.Add(new WordViewModel { WordToDisplay = item }); } var alreadyAdded = allWords.Count; //only in case of searching it does matter if (expression != null) { var collection2 = await WordsService.GetDataAsync <Translation>(expression); if (collection2 != null) { foreach (var item in collection2) { foreach (var w in item.WordsStoredIn) { if (!allWords.Exists(x => x.Id == w.Id)) { allWords.Add(w); } } } } } foreach (var item in allWords.Skip(alreadyAdded)) { WordsCollection.Add(new WordViewModel { WordToDisplay = item }); } allWords = null; }
/// <summary> /// Further processing of a string /// </summary> /// <param name="words"> /// Collection that new word(s) will be added in /// </param> /// <param name="prefix"> /// prefix come with the string /// </param> /// <param name="word"> /// A string that may be a real word or have leading or tailing /// special character /// </param> /// <param name="suffix"> /// suffix comes with the string. /// </param> private static void processWord(WordsCollection words, string prefix, string word, string suffix) { // the passed in word may have leading special // characters such as '(', '"' etc or tailing // punctuations. We need to sort this out. int length = word.Length; if (length == 1) { words.Add(new Word(word, prefix, suffix)); } else if (!char.IsLetterOrDigit(word[0])) { // it is some kind of special character in the first place // report it separately words.Add(new Word(word[0].ToString(), prefix, "")); words.Add(new Word(word.Substring(1), "", suffix)); return; } else if (char.IsPunctuation(word[length - 1])) { // there is a end punctuation words.Add(new Word(word.Substring(0, length - 1), prefix, "")); words.Add(new Word(word[length - 1].ToString(), "", suffix)); } else { // it is a real word(hope so) words.Add(new Word(word, prefix, suffix)); } }
static void Main(string[] args) { WordsCollection wordsCollection = new WordsCollection(); try { StreamReader sr=new StreamReader(_pathIn); while (!sr.EndOfStream) { wordsCollection.Add(new WordsLine(LineBuilder.StringSplit(sr.ReadLine(), _seperator))); } sr.Close(); try { StreamWriter sw = new StreamWriter(_pathOut); foreach (var i in wordsCollection.GetSingleOrderWords()) { sw.WriteLine("[{0}] ", i.Key); foreach (var j in i) { sw.WriteLine("{0} - {1} - {2}", j, wordsCollection.GetWordsLinesCount(j), String.Join(",", wordsCollection.GetPositionsWord(j).ToArray())); } } sw.Close(); } catch (Exception e) { Console.WriteLine("The process failed: {0}", e); } } catch (Exception e) { Console.WriteLine("The process failed: {0}", e); } Console.ReadKey(); }
/// <summary> /// Static method that parses the passed-in string into /// Words collection /// </summary> /// <param name="s"> /// String /// </param> /// <returns> /// Words Collection /// </returns> static public WordsCollection parse(string s) { int curPos = 0; int prevPos; string prefix = string.Empty; string suffix = string.Empty; string word = string.Empty; WordsCollection words = new WordsCollection(); while (curPos < s.Length) { // eat the leading or tailing white spaces prevPos = curPos; while (curPos < s.Length && (char.IsControl(s[curPos]) || char.IsWhiteSpace(s[curPos]))) { curPos++; } prefix += s.Substring(prevPos, curPos - prevPos); if (curPos == s.Length) { // it is possible that there are // something in the prefix if (prefix != string.Empty) { // report a empty word with prefix. words.Add(new Word("", prefix, "")); } break; } // we have 3 different cases here, // 1) if the string starts with '<', we assume // that it is a html tag which will be put // into prefix. // 2) starts with '&', we need to check if it is // " " or "&#xxx;". If it is the former, // we treat it as prefix and if it is latter, // we treat it as a word. // 3) a string that may be a real word or a set // of words separated by " " or may have // leading special character or tailing // punctuation. // // Another possible case that is too complicated // or expensive to handle is that some special // characters are embeded inside the word with // no space separation if (s[curPos] == '<') { // it is a html tag, consume it // as prefix. prevPos = curPos; while (s[curPos] != '>' && curPos < s.Length) { curPos++; } prefix += s.Substring(prevPos, curPos - prevPos + 1); if (curPos == s.Length) { // if we come to this point, it means // the html tag is not closed. Anyway, // we are not validating html, so just // report a empty word with prefix. words.Add(new Word("", prefix, "")); break; } // curPos is pointing to '>', move // it to next. curPos++; if (curPos == s.Length) { // the html tag is closed but nothing more // behind, so report a empty word with prefix. words.Add(new Word("", prefix, "")); break; } continue; } else if (s[curPos] == '&') { prevPos = curPos; // case for html whitespace if (curPos + 6 < s.Length && s.Substring(prevPos, 6) == " ") { prefix += " "; curPos += 6; continue; } // case for special character like "{" etc string pattern = @"&#[0-9]{3};"; Regex r = new Regex(pattern); if (curPos + 6 < s.Length && r.IsMatch(s.Substring(prevPos, 6))) { words.Add(new Word(s.Substring(prevPos, 6), prefix, "")); prefix = string.Empty; curPos += 6; continue; } // case for special character like "" etc pattern = @"&#[0-9]{2};"; r = new Regex(pattern); if (curPos + 5 < s.Length && r.IsMatch(s.Substring(prevPos, 5))) { words.Add(new Word(s.Substring(prevPos, 5), prefix, "")); prefix = string.Empty; curPos += 5; continue; } // can't think of anything else that is special, // have to treat it as a '&' leaded word. Hope // it is just single '&' for and in meaning. prevPos = curPos; while (curPos < s.Length && !char.IsControl(s[curPos]) && !char.IsWhiteSpace(s[curPos]) && s[curPos] != '<') { curPos++; } word = s.Substring(prevPos, curPos - prevPos); // eat the following witespace as suffix prevPos = curPos; while (curPos < s.Length && (char.IsControl(s[curPos]) || char.IsWhiteSpace(s[curPos]))) { curPos++; } suffix += s.Substring(prevPos, curPos - prevPos); words.Add(new Word(word, prefix, suffix)); prefix = string.Empty; suffix = string.Empty; } else { // eat the word prevPos = curPos; while (curPos < s.Length && !char.IsControl(s[curPos]) && !char.IsWhiteSpace(s[curPos]) && s[curPos] != '<' && s[curPos] != '&') { curPos++; } word = s.Substring(prevPos, curPos - prevPos); // if there are newlines or spaces follow // the word, consume it as suffix prevPos = curPos; while (curPos < s.Length && (char.IsControl(s[curPos]) || char.IsWhiteSpace(s[curPos]))) { curPos++; } suffix = s.Substring(prevPos, curPos - prevPos); processWord(words, prefix, word, suffix); prefix = string.Empty; suffix = string.Empty; } } return(words); }
private static void AddWordsCollection(WordsCollection words, string prefix, string word, string suffix) { #region | prefix | if (prefix != string.Empty) { if (prefix.Trim() == string.Empty && prefix.Length > 1) { //check for double spaces var chars = prefix.ToCharArray(); for (var i = 0; i < chars.Length; i++) { words.Add(i == 0 ? new Word(string.Empty, chars[i].ToString(), string.Empty) : new Word(((char)160).ToString(), string.Empty, string.Empty)); } } else { words.Add(new Word("", prefix, "")); } } #endregion #region | word | var wortTmp = string.Empty; foreach (var _char in word.ToCharArray()) { if (Processor.Settings.comparisonType == Settings.ComparisonType.Characters) //every character is a treated as a word { words.Add(new Word(_char.ToString(), string.Empty, string.Empty)); } else { if (Encoding.UTF8.GetByteCount(_char.ToString()) > 2) //double byte characters? { if (wortTmp != string.Empty) { words.Add(new Word(wortTmp, string.Empty, string.Empty)); } wortTmp = string.Empty; words.Add(new Word(_char.ToString(), string.Empty, string.Empty)); } else { wortTmp += _char.ToString(); } } } if (wortTmp != string.Empty) { words.Add(new Word(wortTmp, string.Empty, string.Empty)); } #endregion #region | suffix | if (suffix == string.Empty) { return; } { if (suffix.Trim() == string.Empty && suffix.Length > 1) { //check for double spaces var chars = suffix.ToCharArray(); for (var i = 0; i < chars.Length; i++) { words.Add(i == 0 ? new Word(string.Empty, string.Empty, chars[i].ToString()) : new Word(((char)160).ToString(), string.Empty, string.Empty)); } } else { words.Add(new Word(string.Empty, string.Empty, suffix)); } } #endregion }
public static WordsCollection Parse(List <SegmentSection> xSegmentSections) { var prefix = string.Empty; var words = new WordsCollection(); foreach (var xSegmentSection in xSegmentSections) { string suffix; if (xSegmentSection.Type != SegmentSection.ContentType.Text) { prefix = string.Empty; suffix = string.Empty; words.Add(new Word("<" + MarkupTag + ">" + xSegmentSection.Content + "</" + MarkupTag + ">", prefix, suffix)); prefix = string.Empty; } else { var curPos = 0; while (curPos < xSegmentSection.Content.Length) { var prevPos = curPos; while (curPos < xSegmentSection.Content.Length && (char.IsControl(xSegmentSection.Content[curPos]) || char.IsWhiteSpace(xSegmentSection.Content[curPos]))) { curPos++; } prefix += xSegmentSection.Content.Substring(prevPos, curPos - prevPos); if (curPos == xSegmentSection.Content.Length) { if (prefix != string.Empty) { words.Add(new Word(string.Empty, prefix, string.Empty)); } break; } prevPos = curPos; while (curPos < xSegmentSection.Content.Length && !char.IsControl(xSegmentSection.Content[curPos]) && !char.IsWhiteSpace(xSegmentSection.Content[curPos])) { curPos++; } var word = xSegmentSection.Content.Substring(prevPos, curPos - prevPos); prevPos = curPos; while (curPos < xSegmentSection.Content.Length && (char.IsControl(xSegmentSection.Content[curPos]) || char.IsWhiteSpace(xSegmentSection.Content[curPos]))) { curPos++; } suffix = xSegmentSection.Content.Substring(prevPos, curPos - prevPos); ProcessWord(words, prefix, word, suffix); prefix = string.Empty; } } } return(words); }
static void Main(string[] args) { LinesBuilder linesBuilder = new LinesBuilder(_seperatorLine, _seperatorSentence); WordsCollection wordsCollection = new WordsCollection(); List<String> linesList = new List<String>(); try { StreamReader sr=new StreamReader(_pathIn); while (!sr.EndOfStream) { foreach (var i in linesBuilder.GetLinesFromString(sr.ReadLine())) { linesList.Add(i); } } sr.Close(); foreach (var i in linesList) { wordsCollection.Add(new WordsLine(linesBuilder.GetSentencesFromString(i).ToArray())); } foreach (var i in wordsCollection) { Console.WriteLine(i.ToString()); } Console.WriteLine("\n - wordsCollection.GetLinesSortedByWordsCount()"); foreach (var i in wordsCollection.GetLinesSortedByWordsCount()) { Console.WriteLine(i.ToString()); } Console.WriteLine("\n - wordsCollection.GetWordsInQuestionLines(1)"); foreach (var i in wordsCollection.GetWordsInQuestionLines(1)) { Console.WriteLine(i.ToString()); } Console.WriteLine("\n - wordsCollection.GetLinesWithDeletedWords(5, seperstorConsonants)"); var seperstorConsonants = new char[] {'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z'}; foreach (var i in wordsCollection.GetLinesWithDeletedWords(5, seperstorConsonants)) { Console.WriteLine(i.ToString()); } Console.WriteLine("\n - wordsCollection.GetLinesWithReplacedWords(4,'yelow')"); foreach (var i in wordsCollection.GetLinesWithReplacedWords(4,"yelow")) { Console.WriteLine(i.ToString()); } } catch (Exception e) { Console.WriteLine("The process failed: {0}", e); } Console.ReadKey(); }
/// <summary> /// Static method that parses the passed-in string into /// Words collection /// </summary> /// <param name="s"> /// String /// </param> /// <returns> /// Words Collection /// </returns> public static WordsCollection parse(string s) { int curPos = 0; int prevPos; string prefix = string.Empty; string suffix = string.Empty; string word = string.Empty; WordsCollection words = new WordsCollection(); while (curPos < s.Length) { // eat the leading or tailing white spaces prevPos = curPos; while (curPos < s.Length && (char.IsControl(s[curPos]) || char.IsWhiteSpace(s[curPos]))) { curPos++; } prefix += s.Substring(prevPos, curPos - prevPos); if (curPos == s.Length) { // it is possible that there are // something in the prefix if (prefix != string.Empty) { // report a empty word with prefix. words.Add(new Word("", prefix, "")); } break; } // we have 3 different cases here, // 1) if the string starts with '<', we assume // that it is a html tag which will be put // into prefix. // 2) starts with '&', we need to check if it is // " " or "&#xxx;". If it is the former, // we treat it as prefix and if it is latter, // we treat it as a word. // 3) a string that may be a real word or a set // of words separated by " " or may have // leading special character or tailing // punctuation. // // Another possible case that is too complicated // or expensive to handle is that some special // characters are embeded inside the word with // no space separation if (s[curPos] == '<') { // it is a html tag, consume it // as prefix. prevPos = curPos; while (s[curPos] != '>' && curPos < s.Length) { curPos++; } prefix += s.Substring(prevPos, curPos - prevPos + 1); if (curPos == s.Length) { // if we come to this point, it means // the html tag is not closed. Anyway, // we are not validating html, so just // report a empty word with prefix. words.Add(new Word("", prefix, "")); break; } // curPos is pointing to '>', move // it to next. curPos++; if (curPos == s.Length) { // the html tag is closed but nothing more // behind, so report a empty word with prefix. words.Add(new Word("", prefix, "")); break; } continue; } else if (s[curPos] == '&') { prevPos = curPos; // case for html whitespace if (curPos + 6 < s.Length && s.Substring(prevPos, 6) == " ") { prefix += " "; curPos += 6; continue; } // case for special character like "{" etc string pattern = @"&#[0-9]{3};"; Regex r = new Regex(pattern); if (curPos + 6 < s.Length && r.IsMatch(s.Substring(prevPos, 6))) { words.Add(new Word(s.Substring(prevPos, 6), prefix, "")); prefix = string.Empty; curPos += 6; continue; } // case for special character like "" etc pattern = @"&#[0-9]{2};"; r = new Regex(pattern); if (curPos + 5 < s.Length && r.IsMatch(s.Substring(prevPos, 5))) { words.Add(new Word(s.Substring(prevPos, 5), prefix, "")); prefix = string.Empty; curPos += 5; continue; } // can't think of anything else that is special, // have to treat it as a '&' leaded word. Hope // it is just single '&' for and in meaning. prevPos = curPos; while (curPos < s.Length && !char.IsControl(s[curPos]) && !char.IsWhiteSpace(s[curPos]) && s[curPos] != '<') { curPos++; } word = s.Substring(prevPos, curPos - prevPos); // eat the following witespace as suffix prevPos = curPos; while (curPos < s.Length && (char.IsControl(s[curPos]) || char.IsWhiteSpace(s[curPos]))) { curPos++; } suffix += s.Substring(prevPos, curPos - prevPos); words.Add(new Word(word, prefix, suffix)); prefix = string.Empty; suffix = string.Empty; } else { // eat the word prevPos = curPos; while (curPos < s.Length && !char.IsControl(s[curPos]) && !char.IsWhiteSpace(s[curPos]) && s[curPos] != '<' && s[curPos] != '&') { curPos++; } word = s.Substring(prevPos, curPos - prevPos); // if there are newlines or spaces follow // the word, consume it as suffix prevPos = curPos; while (curPos < s.Length && (char.IsControl(s[curPos]) || char.IsWhiteSpace(s[curPos]))) { curPos++; } suffix = s.Substring(prevPos, curPos - prevPos); processWord(words, prefix, word, suffix); prefix = string.Empty; suffix = string.Empty; } } return words; }