Пример #1
0
 /// <summary>
 /// Copy this WordsCollection to another one
 /// starting at the specified index position
 /// </summary>
 /// <param name="col" type="WordsCollection">
 /// WordsCollection to be copied to
 /// </param>
 /// <param name="index" type="integer">
 /// Starting index to begin copy operations
 /// </param>
 public void CopyTo(WordsCollection col, int index)
 {
     for (int i = index; i < List.Count; i++)
     {
         col.Add(this[i]);
     }
 }
Пример #2
0
 internal void CopyTo(WordsCollection col, int index)
 {
     for (var i = index; i < List.Count; i++)
     {
         col.Add(this[i]);
     }
 }
        //TODO: use nhibernate join
        private async void LoadWordsCollection(NHibernate.Criterion.ICriterion expression = null)
        {
            var allWords = new List <Word>();

            WordsCollection.Clear();
            var collection = await WordsService.GetDataAsync <Word>(expression);

            if (collection != null)
            {
                foreach (var item in collection)
                {
                    if (!allWords.Exists(x => x.Id == item.Id))
                    {
                        allWords.Add(item);
                    }
                }
            }
            foreach (var item in allWords)
            {
                WordsCollection.Add(new WordViewModel {
                    WordToDisplay = item
                });
            }
            var alreadyAdded = allWords.Count;

            //only in case of searching it does matter
            if (expression != null)
            {
                var collection2 = await WordsService.GetDataAsync <Translation>(expression);

                if (collection2 != null)
                {
                    foreach (var item in collection2)
                    {
                        foreach (var w in item.WordsStoredIn)
                        {
                            if (!allWords.Exists(x => x.Id == w.Id))
                            {
                                allWords.Add(w);
                            }
                        }
                    }
                }
            }

            foreach (var item in allWords.Skip(alreadyAdded))
            {
                WordsCollection.Add(new WordViewModel {
                    WordToDisplay = item
                });
            }
            allWords = null;
        }
Пример #4
0
            /// <summary>
            /// Further processing of a string
            /// </summary>
            /// <param name="words">
            /// Collection that new word(s) will be added in
            /// </param>
            /// <param name="prefix">
            /// prefix come with the string
            /// </param>
            /// <param name="word">
            /// A string that may be a real word or have leading or tailing
            /// special character
            /// </param>
            /// <param name="suffix">
            /// suffix comes with the string.
            /// </param>
            private static void processWord(WordsCollection words,
                                            string prefix, string word, string suffix)
            {
                // the passed in word may have leading special
                // characters such as '(', '"' etc or tailing
                // punctuations. We need to sort this out.
                int length = word.Length;

                if (length == 1)
                {
                    words.Add(new Word(word, prefix, suffix));
                }
                else if (!char.IsLetterOrDigit(word[0]))
                {
                    // it is some kind of special character in the first place
                    // report it separately
                    words.Add(new Word(word[0].ToString(), prefix, ""));
                    words.Add(new Word(word.Substring(1), "", suffix));
                    return;
                }
                else if (char.IsPunctuation(word[length - 1]))
                {
                    // there is a end punctuation
                    words.Add(new Word(word.Substring(0, length - 1), prefix, ""));
                    words.Add(new Word(word[length - 1].ToString(), "", suffix));
                }
                else
                {
                    // it is a real word(hope so)
                    words.Add(new Word(word, prefix, suffix));
                }
            }
Пример #5
0
        static void Main(string[] args)
        {
            WordsCollection wordsCollection = new WordsCollection();

            try
            {
                StreamReader sr=new StreamReader(_pathIn);
                while (!sr.EndOfStream)
                {
                    wordsCollection.Add(new WordsLine(LineBuilder.StringSplit(sr.ReadLine(), _seperator)));
                }
                sr.Close();

                try
                {
                    StreamWriter sw = new StreamWriter(_pathOut);
                    foreach (var i in wordsCollection.GetSingleOrderWords())
                    {
                        sw.WriteLine("[{0}] ", i.Key);
                        foreach (var j in i)
                        {
                             sw.WriteLine("{0} - {1} - {2}",
                            j,
                            wordsCollection.GetWordsLinesCount(j),
                            String.Join(",", wordsCollection.GetPositionsWord(j).ToArray()));
                        }
                    }
                    sw.Close();
                }
                catch (Exception e)
                {
                    Console.WriteLine("The process failed: {0}", e);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine("The process failed: {0}", e);
            }

            Console.ReadKey();
        }
Пример #6
0
            /// <summary>
            /// Static method that parses the passed-in string into
            /// Words collection
            /// </summary>
            /// <param name="s">
            /// String
            /// </param>
            /// <returns>
            /// Words Collection
            /// </returns>
            static public WordsCollection parse(string s)
            {
                int             curPos = 0;
                int             prevPos;
                string          prefix = string.Empty;
                string          suffix = string.Empty;
                string          word   = string.Empty;
                WordsCollection words  = new WordsCollection();

                while (curPos < s.Length)
                {
                    // eat the leading or tailing white spaces
                    prevPos = curPos;
                    while (curPos < s.Length &&
                           (char.IsControl(s[curPos]) ||
                            char.IsWhiteSpace(s[curPos])))
                    {
                        curPos++;
                    }
                    prefix += s.Substring(prevPos, curPos - prevPos);

                    if (curPos == s.Length)
                    {
                        // it is possible that there are
                        // something in the prefix
                        if (prefix != string.Empty)
                        {
                            // report a empty word with prefix.
                            words.Add(new Word("", prefix, ""));
                        }
                        break;
                    }

                    // we have 3 different cases here,
                    // 1) if the string starts with '<', we assume
                    //    that it is a html tag which will be put
                    //    into prefix.
                    // 2) starts with '&', we need to check if it is
                    //    "&nbsp;" or "&#xxx;". If it is the former,
                    //    we treat it as prefix and if it is latter,
                    //    we treat it as a word.
                    // 3) a string that may be a real word or a set
                    //    of words separated by "&nbsp;" or may have
                    //    leading special character or tailing
                    //    punctuation.
                    //
                    // Another possible case that is too complicated
                    // or expensive to handle is that some special
                    // characters are embeded inside the word with
                    // no space separation
                    if (s[curPos] == '<')
                    {
                        // it is a html tag, consume it
                        // as prefix.
                        prevPos = curPos;
                        while (s[curPos] != '>' && curPos < s.Length)
                        {
                            curPos++;
                        }
                        prefix += s.Substring(prevPos, curPos - prevPos + 1);

                        if (curPos == s.Length)
                        {
                            // if we come to this point, it means
                            // the html tag is not closed. Anyway,
                            // we are not validating html, so just
                            // report a empty word with prefix.
                            words.Add(new Word("", prefix, ""));
                            break;
                        }
                        // curPos is pointing to '>', move
                        // it to next.
                        curPos++;
                        if (curPos == s.Length)
                        {
                            // the html tag is closed but nothing more
                            // behind, so report a empty word with prefix.
                            words.Add(new Word("", prefix, ""));
                            break;
                        }
                        continue;
                    }
                    else if (s[curPos] == '&')
                    {
                        prevPos = curPos;

                        // case for html whitespace
                        if (curPos + 6 < s.Length &&
                            s.Substring(prevPos, 6) == "&nbsp;")
                        {
                            prefix += "&nbsp;";
                            curPos += 6;
                            continue;
                        }

                        // case for special character like "&#123;" etc
                        string pattern = @"&#[0-9]{3};";
                        Regex  r       = new Regex(pattern);

                        if (curPos + 6 < s.Length &&
                            r.IsMatch(s.Substring(prevPos, 6)))
                        {
                            words.Add(new Word(s.Substring(prevPos, 6), prefix, ""));
                            prefix  = string.Empty;
                            curPos += 6;
                            continue;
                        }

                        // case for special character like "&#12;" etc
                        pattern = @"&#[0-9]{2};";
                        r       = new Regex(pattern);
                        if (curPos + 5 < s.Length &&
                            r.IsMatch(s.Substring(prevPos, 5)))
                        {
                            words.Add(new Word(s.Substring(prevPos, 5), prefix, ""));
                            prefix  = string.Empty;
                            curPos += 5;
                            continue;
                        }

                        // can't think of anything else that is special,
                        // have to treat it as a '&' leaded word. Hope
                        // it is just single '&' for and in meaning.
                        prevPos = curPos;
                        while (curPos < s.Length &&
                               !char.IsControl(s[curPos]) &&
                               !char.IsWhiteSpace(s[curPos]) &&
                               s[curPos] != '<')
                        {
                            curPos++;
                        }
                        word = s.Substring(prevPos, curPos - prevPos);

                        // eat the following witespace as suffix
                        prevPos = curPos;
                        while (curPos < s.Length &&
                               (char.IsControl(s[curPos]) ||
                                char.IsWhiteSpace(s[curPos])))
                        {
                            curPos++;
                        }
                        suffix += s.Substring(prevPos, curPos - prevPos);

                        words.Add(new Word(word, prefix, suffix));
                        prefix = string.Empty;
                        suffix = string.Empty;
                    }
                    else
                    {
                        // eat the word
                        prevPos = curPos;
                        while (curPos < s.Length &&
                               !char.IsControl(s[curPos]) &&
                               !char.IsWhiteSpace(s[curPos]) &&
                               s[curPos] != '<' &&
                               s[curPos] != '&')
                        {
                            curPos++;
                        }
                        word = s.Substring(prevPos, curPos - prevPos);

                        // if there are newlines or spaces follow
                        // the word, consume it as suffix
                        prevPos = curPos;
                        while (curPos < s.Length &&
                               (char.IsControl(s[curPos]) ||
                                char.IsWhiteSpace(s[curPos])))
                        {
                            curPos++;
                        }
                        suffix = s.Substring(prevPos, curPos - prevPos);
                        processWord(words, prefix, word, suffix);
                        prefix = string.Empty;
                        suffix = string.Empty;
                    }
                }
                return(words);
            }
Пример #7
0
            private static void AddWordsCollection(WordsCollection words, string prefix, string word, string suffix)
            {
                #region  |  prefix  |
                if (prefix != string.Empty)
                {
                    if (prefix.Trim() == string.Empty && prefix.Length > 1)
                    {
                        //check for double spaces
                        var chars = prefix.ToCharArray();
                        for (var i = 0; i < chars.Length; i++)
                        {
                            words.Add(i == 0
                                ? new Word(string.Empty, chars[i].ToString(), string.Empty)
                                : new Word(((char)160).ToString(), string.Empty, string.Empty));
                        }
                    }
                    else
                    {
                        words.Add(new Word("", prefix, ""));
                    }
                }
                #endregion

                #region  |  word  |
                var wortTmp = string.Empty;
                foreach (var _char in word.ToCharArray())
                {
                    if (Processor.Settings.comparisonType == Settings.ComparisonType.Characters) //every character is a treated as a word
                    {
                        words.Add(new Word(_char.ToString(), string.Empty, string.Empty));
                    }
                    else
                    {
                        if (Encoding.UTF8.GetByteCount(_char.ToString()) > 2) //double byte characters?
                        {
                            if (wortTmp != string.Empty)
                            {
                                words.Add(new Word(wortTmp, string.Empty, string.Empty));
                            }
                            wortTmp = string.Empty;

                            words.Add(new Word(_char.ToString(), string.Empty, string.Empty));
                        }
                        else
                        {
                            wortTmp += _char.ToString();
                        }
                    }
                }
                if (wortTmp != string.Empty)
                {
                    words.Add(new Word(wortTmp, string.Empty, string.Empty));
                }
                #endregion

                #region  |  suffix  |

                if (suffix == string.Empty)
                {
                    return;
                }
                {
                    if (suffix.Trim() == string.Empty && suffix.Length > 1)
                    {
                        //check for double spaces
                        var chars = suffix.ToCharArray();
                        for (var i = 0; i < chars.Length; i++)
                        {
                            words.Add(i == 0
                                ? new Word(string.Empty, string.Empty, chars[i].ToString())
                                : new Word(((char)160).ToString(), string.Empty, string.Empty));
                        }
                    }
                    else
                    {
                        words.Add(new Word(string.Empty, string.Empty, suffix));
                    }
                }

                #endregion
            }
Пример #8
0
            public static WordsCollection Parse(List <SegmentSection> xSegmentSections)
            {
                var prefix = string.Empty;
                var words  = new WordsCollection();

                foreach (var xSegmentSection in xSegmentSections)
                {
                    string suffix;
                    if (xSegmentSection.Type != SegmentSection.ContentType.Text)
                    {
                        prefix = string.Empty;
                        suffix = string.Empty;
                        words.Add(new Word("<" + MarkupTag + ">" + xSegmentSection.Content + "</" + MarkupTag + ">", prefix, suffix));
                        prefix = string.Empty;
                    }
                    else
                    {
                        var curPos = 0;
                        while (curPos < xSegmentSection.Content.Length)
                        {
                            var prevPos = curPos;
                            while (curPos < xSegmentSection.Content.Length &&
                                   (char.IsControl(xSegmentSection.Content[curPos]) ||
                                    char.IsWhiteSpace(xSegmentSection.Content[curPos])))
                            {
                                curPos++;
                            }
                            prefix += xSegmentSection.Content.Substring(prevPos, curPos - prevPos);

                            if (curPos == xSegmentSection.Content.Length)
                            {
                                if (prefix != string.Empty)
                                {
                                    words.Add(new Word(string.Empty, prefix, string.Empty));
                                }
                                break;
                            }

                            prevPos = curPos;
                            while (curPos < xSegmentSection.Content.Length &&
                                   !char.IsControl(xSegmentSection.Content[curPos]) &&
                                   !char.IsWhiteSpace(xSegmentSection.Content[curPos]))
                            {
                                curPos++;
                            }
                            var word = xSegmentSection.Content.Substring(prevPos, curPos - prevPos);


                            prevPos = curPos;
                            while (curPos < xSegmentSection.Content.Length &&
                                   (char.IsControl(xSegmentSection.Content[curPos]) ||
                                    char.IsWhiteSpace(xSegmentSection.Content[curPos])))
                            {
                                curPos++;
                            }
                            suffix = xSegmentSection.Content.Substring(prevPos, curPos - prevPos);
                            ProcessWord(words, prefix, word, suffix);
                            prefix = string.Empty;
                        }
                    }
                }
                return(words);
            }
Пример #9
0
        static void Main(string[] args)
        {
            LinesBuilder linesBuilder = new LinesBuilder(_seperatorLine, _seperatorSentence);

            WordsCollection wordsCollection = new WordsCollection();

            List<String> linesList = new List<String>();
            try
            {
                StreamReader sr=new StreamReader(_pathIn);
                while (!sr.EndOfStream)
                {
                    foreach (var i in linesBuilder.GetLinesFromString(sr.ReadLine()))
                    {
                        linesList.Add(i);
                    }
                }
                sr.Close();

                foreach (var i in linesList)
                {
                    wordsCollection.Add(new WordsLine(linesBuilder.GetSentencesFromString(i).ToArray()));
                }

                foreach (var i in wordsCollection)
                {
                    Console.WriteLine(i.ToString());
                }

                Console.WriteLine("\n - wordsCollection.GetLinesSortedByWordsCount()");
                foreach (var i in wordsCollection.GetLinesSortedByWordsCount())
                {
                     Console.WriteLine(i.ToString());
                }

                Console.WriteLine("\n - wordsCollection.GetWordsInQuestionLines(1)");
                foreach (var i in wordsCollection.GetWordsInQuestionLines(1))
                {
                    Console.WriteLine(i.ToString());
                }

                Console.WriteLine("\n - wordsCollection.GetLinesWithDeletedWords(5, seperstorConsonants)");
                var seperstorConsonants = new char[]
                {'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z'};
                foreach (var i in wordsCollection.GetLinesWithDeletedWords(5, seperstorConsonants))
                {
                    Console.WriteLine(i.ToString());
                }

                Console.WriteLine("\n - wordsCollection.GetLinesWithReplacedWords(4,'yelow')");
                foreach (var i in wordsCollection.GetLinesWithReplacedWords(4,"yelow"))
                {
                    Console.WriteLine(i.ToString());
                }
            }
            catch (Exception e)
            {
                Console.WriteLine("The process failed: {0}", e);
            }

            Console.ReadKey();
        }
Пример #10
0
            /// <summary>
            /// Further processing of a string
            /// </summary>
            /// <param name="words">
            /// Collection that new word(s) will be added in
            /// </param>
            /// <param name="prefix">
            /// prefix come with the string 
            /// </param>
            /// <param name="word">
            /// A string that may be a real word or have leading or tailing 
            /// special character
            /// </param>
            /// <param name="suffix">
            /// suffix comes with the string.
            /// </param>
            private static void processWord(WordsCollection words,
                string prefix, string word, string suffix)
            {
                // the passed in word may have leading special
                // characters such as '(', '"' etc or tailing
                // punctuations. We need to sort this out.
                int length = word.Length;

                if (length == 1)
                {
                    words.Add(new Word(word, prefix, suffix));
                }
                else if (!char.IsLetterOrDigit(word[0]))
                {
                    // it is some kind of special character in the first place
                    // report it separately
                    words.Add(new Word(word[0].ToString(), prefix, ""));
                    words.Add(new Word(word.Substring(1), "", suffix));
                    return;
                }
                else if (char.IsPunctuation(word[length - 1]))
                {
                    // there is a end punctuation
                    words.Add(new Word(word.Substring(0, length - 1), prefix, ""));
                    words.Add(new Word(word[length - 1].ToString(), "", suffix));
                }
                else
                {
                    // it is a real word(hope so)
                    words.Add(new Word(word, prefix, suffix));
                }
            }
Пример #11
0
            /// <summary>
            /// Static method that parses the passed-in string into
            /// Words collection 
            /// </summary>
            /// <param name="s">
            /// String
            /// </param>
            /// <returns>
            /// Words Collection
            /// </returns>
            public static WordsCollection parse(string s)
            {
                int curPos = 0;
                int prevPos;
                string prefix = string.Empty;
                string suffix = string.Empty;
                string word = string.Empty;
                WordsCollection words = new WordsCollection();

                while (curPos < s.Length)
                {
                    // eat the leading or tailing white spaces
                    prevPos = curPos;
                    while (curPos < s.Length &&
                       (char.IsControl(s[curPos]) ||
                        char.IsWhiteSpace(s[curPos])))
                    {
                        curPos++;
                    }
                    prefix += s.Substring(prevPos, curPos - prevPos);

                    if (curPos == s.Length)
                    {
                        // it is possible that there are
                        // something in the prefix
                        if (prefix != string.Empty)
                        {
                            // report a empty word with prefix.
                            words.Add(new Word("", prefix, ""));
                        }
                        break;
                    }

                    // we have 3 different cases here,
                    // 1) if the string starts with '<', we assume
                    //    that it is a html tag which will be put
                    //    into prefix.
                    // 2) starts with '&', we need to check if it is
                    //    "&nbsp;" or "&#xxx;". If it is the former,
                    //    we treat it as prefix and if it is latter,
                    //    we treat it as a word.
                    // 3) a string that may be a real word or a set
                    //    of words separated by "&nbsp;" or may have
                    //    leading special character or tailing
                    //    punctuation.
                    //
                    // Another possible case that is too complicated
                    // or expensive to handle is that some special
                    // characters are embeded inside the word with
                    // no space separation
                    if (s[curPos] == '<')
                    {
                        // it is a html tag, consume it
                        // as prefix.
                        prevPos = curPos;
                        while (s[curPos] != '>' && curPos < s.Length)
                        {
                            curPos++;
                        }
                        prefix += s.Substring(prevPos, curPos - prevPos + 1);

                        if (curPos == s.Length)
                        {
                            // if we come to this point, it means
                            // the html tag is not closed. Anyway,
                            // we are not validating html, so just
                            // report a empty word with prefix.
                            words.Add(new Word("", prefix, ""));
                            break;
                        }
                        // curPos is pointing to '>', move
                        // it to next.
                        curPos++;
                        if (curPos == s.Length)
                        {
                            // the html tag is closed but nothing more
                            // behind, so report a empty word with prefix.
                            words.Add(new Word("", prefix, ""));
                            break;
                        }
                        continue;
                    }
                    else if (s[curPos] == '&')
                    {
                        prevPos = curPos;

                        // case for html whitespace
                        if (curPos + 6 < s.Length &&
                            s.Substring(prevPos, 6) == "&nbsp;")
                        {
                            prefix += "&nbsp;";
                            curPos += 6;
                            continue;
                        }

                        // case for special character like "&#123;" etc
                        string pattern = @"&#[0-9]{3};";
                        Regex r = new Regex(pattern);

                        if (curPos + 6 < s.Length &&
                            r.IsMatch(s.Substring(prevPos, 6)))
                        {
                            words.Add(new Word(s.Substring(prevPos, 6), prefix, ""));
                            prefix = string.Empty;
                            curPos += 6;
                            continue;
                        }

                        // case for special character like "&#12;" etc
                        pattern = @"&#[0-9]{2};";
                        r = new Regex(pattern);
                        if (curPos + 5 < s.Length &&
                            r.IsMatch(s.Substring(prevPos, 5)))
                        {
                            words.Add(new Word(s.Substring(prevPos, 5), prefix, ""));
                            prefix = string.Empty;
                            curPos += 5;
                            continue;
                        }

                        // can't think of anything else that is special,
                        // have to treat it as a '&' leaded word. Hope
                        // it is just single '&' for and in meaning.
                        prevPos = curPos;
                        while (curPos < s.Length &&
                            !char.IsControl(s[curPos]) &&
                            !char.IsWhiteSpace(s[curPos]) &&
                            s[curPos] != '<')
                        {
                            curPos++;
                        }
                        word = s.Substring(prevPos, curPos - prevPos);

                        // eat the following witespace as suffix
                        prevPos = curPos;
                        while (curPos < s.Length &&
                            (char.IsControl(s[curPos]) ||
                            char.IsWhiteSpace(s[curPos])))
                        {
                            curPos++;
                        }
                        suffix += s.Substring(prevPos, curPos - prevPos);

                        words.Add(new Word(word, prefix, suffix));
                        prefix = string.Empty;
                        suffix = string.Empty;
                    }
                    else
                    {
                        // eat the word
                        prevPos = curPos;
                        while (curPos < s.Length &&
                            !char.IsControl(s[curPos]) &&
                            !char.IsWhiteSpace(s[curPos]) &&
                            s[curPos] != '<' &&
                            s[curPos] != '&')
                        {
                            curPos++;
                        }
                        word = s.Substring(prevPos, curPos - prevPos);

                        // if there are newlines or spaces follow
                        // the word, consume it as suffix
                        prevPos = curPos;
                        while (curPos < s.Length &&
                            (char.IsControl(s[curPos]) ||
                            char.IsWhiteSpace(s[curPos])))
                        {
                            curPos++;
                        }
                        suffix = s.Substring(prevPos, curPos - prevPos);
                        processWord(words, prefix, word, suffix);
                        prefix = string.Empty;
                        suffix = string.Empty;
                    }
                }
                return words;
            }
Пример #12
0
 /// <summary>
 /// Copy this WordsCollection to another one 
 /// starting at the specified index position
 /// </summary>
 /// <param name="col" type="WordsCollection">
 /// WordsCollection to be copied to
 /// </param>
 /// <param name="index" type="integer">
 /// Starting index to begin copy operations
 /// </param>
 public void CopyTo(WordsCollection col, int index)
 {
     for (int i = index; i < List.Count; i++)
     {
         col.Add(this[i]);
     }
 }