Example #1
0
        void Editor_KeyUp(object sender, KeyEventArgs e)
        {
            TextRange tr = WordBreaker.GetWordRange(Editor.CaretPosition);

            // Make sure we have at least two chars
            if (tr.Text.Length >= 2)
            {
                var results = VirtualMailBox.VirtualMailBox.Current.Labels.Keys
                              .Where(k => k.IndexOf(tr.Text, StringComparison.InvariantCultureIgnoreCase) > -1)
                              .Select(k => new LabelsContainer(k))
                              .ToList();

                var prevResults = AutoCompletionListBox.ItemsSource as List <LabelsContainer>;

                if (prevResults != null && prevResults.Count == results.Count)
                {
                    return;
                }

                // No result, hide list (if it was allready shown)
                if (results.Count > 0)
                {
                    // Get position of caret
                    var pos = tr.Start.GetCharacterRect(LogicalDirection.Forward);

                    ShowList(results, pos);

                    return;
                }
            }

            // Nothing to show, hide the list
            HideList();
        }
        private static void OnFind(object sender, ExecutedRoutedEventArgs e)
        {
            RichEditorControl control  = (RichEditorControl)sender;
            string            findText = (string)control.FindComboBox.Text;

            TextPointer navigator = control.RichTextBox.Selection.IsEmpty ?
                                    control.RichTextBox.Document.ContentStart :
                                    control.RichTextBox.Selection.End.GetNextInsertionPosition(LogicalDirection.Forward);

            while (navigator != null && navigator.CompareTo(control.RichTextBox.Document.ContentEnd) < 0)
            {
                TextRange wordRange = WordBreaker.GetWordRange(navigator);

                if (wordRange == null)
                {
                    break;
                }

                string wordText = wordRange.Text;
                if (wordText == findText)
                {
                    control.RichTextBox.Selection.Select(wordRange.Start, wordRange.End);
                    return;
                }

                navigator = wordRange.End.GetNextInsertionPosition(LogicalDirection.Forward);
            }
        }
Example #3
0
        void ProcessCurrentWord()
        {
            TextRange range = WordBreaker.GetWordRange(Editor.CaretPosition);

            string text = range.Text.Trim();

            // Validate if the email address is valid
            if (!SourceAddress.IsValidEmail(text))
            {
                if (ValidationEnabled)
                {
                    range.ApplyPropertyValue(TextBlock.ForegroundProperty, FindResource("TabAndLightButtonText"));

                    SuppressListForCurrentWord = false;
                }

                return;
            }

            SuppressListForCurrentWord = false;

            SourceAddress address = new SourceAddress(text);

            AddRecipient(address);

            // Notify listeners of new entry
            RebuildRecipientsList();
        }
Example #4
0
        private static Document ConstructDocument(string pageContents)
        {
            StopWordRemover  stopWordRemover = new StopWordRemover();
            SStemmer         stemmer         = new SStemmer();
            WordBreaker      wb         = new WordBreaker();
            SentenceBreaker  sb         = SentenceBreaker.Instance;
            List <Statement> statements = new List <Statement>();

            string[] statementsString = sb.BreakIntoSentences(pageContents);
            foreach (string statementString in statementsString)
            {
                string[] wordsString = wb.BreakParagraph(statementString);
                wordsString = stopWordRemover.RemoveStopWords(wordsString);
                wordsString = stemmer.StemWords(wordsString);
                List <Word> words = new List <Word>();
                foreach (string wordString in wordsString)
                {
                    words.Add(new Word(wordString));
                }

                statements.Add(new Statement(words.ToArray()));
            }

            return(new Document(statements.ToArray()));
        }
Example #5
0
        public void AddVisualLabel(LabelsContainer source, bool addToMessage)
        {
            // Create display object
            var ctrl = new ContentControl {
                Content = source
            };
            var container = new InlineUIContainer(ctrl, Editor.CaretPosition);

            contentEnd = container.ContentEnd;

            if (addToMessage && Message != null)
            {
                Message.AddLabel(new Label(source.Labelname));
            }

            // Remove any typed text
            WordBreaker.GetWordRange(Editor.CaretPosition).Text = String.Empty;

            // Move caret to end of what was the word
            Editor.CaretPosition = contentEnd;
            Editor.ShowWatermark = false;

            // Hide dropdown list if it is visible
            HideList();
        }
Example #6
0
 private void richTextBox_MouseDown(object sender, MouseEventArgs args)
 {
     //TODO: respond to text select
     if (args.RightButton == MouseButtonState.Pressed)
     {
         TextPointer location  = rtb_Editor.GetPositionFromPoint(Mouse.GetPosition(rtb_Editor), true);
         TextRange   wordRange = WordBreaker.GetWordRange(location);
         //TODO: what to do with this text maybe add an option to highlight it in context menu?
     }
 }
Example #7
0
        private void RichTextBoxEditor_KeyUp(object sender, KeyEventArgs e)
        {
            if (e.KeyCode == Keys.F4)
            {
                string word = toolStripStatusCurrentWord.Text;
                if (!bangla.Any())
                {
                    WordBreaker breaker = new WordBreaker(option.Location.DBFolder);
                    bangla = breaker.ReadWordFromRepository();
                }
                Suggest(word);
                return;
            }

            var posInLine = richTextBoxEditor.SelectionStart - richTextBoxEditor.GetFirstCharIndexOfCurrentLine();

            toolStripStatusCursorPosition.Text = "" + posInLine;
            string guessWord     = richTextBoxEditor.Text;
            int    start         = richTextBoxEditor.SelectionStart;
            int    previousSpace = 0;

            for (int i = start - 1; i >= 0; i--)
            {
                char ch = richTextBoxEditor.Text[i];
                previousSpace = i;
                if (char.IsWhiteSpace(ch))
                {
                    break;
                }
            }
            int nextSpace = start;

            for (int i = start; i < richTextBoxEditor.Text.Length;)
            {
                char ch = richTextBoxEditor.Text[i];
                nextSpace = ++i;
                if (char.IsWhiteSpace(ch))
                {
                    break;
                }
            }
            if (previousSpace >= nextSpace)
            {
                guessWord = "";
                WriteLine("Space here.");
            }
            else
            {
                int totalchar = nextSpace - previousSpace;
                guessWord = richTextBoxEditor.Text.Substring(previousSpace, totalchar);
            }
            guessWord = guessWord.Trim();
            toolStripStatusCurrentWord.Text = guessWord;
            textBoxTheWord.Text             = guessWord;
        }
Example #8
0
        void ProcessCurrentWord()
        {
            TextRange range = WordBreaker.GetWordRange(Editor.CaretPosition);

            string text = range.Text.Trim();

            if (!String.IsNullOrEmpty(text))
            {
                AddVisualLabel(new LabelsContainer(text), true);
            }
        }
Example #9
0
        void CheckExistingLabel()
        {
            TextRange range = WordBreaker.GetWordRange(Editor.CaretPosition);

            string text = range.Text.Trim();

            if (VirtualMailBox.VirtualMailBox.Current.Labels.ContainsKey(text))
            {
                AddVisualLabel(new LabelsContainer(text), true);
            }
        }
Example #10
0
        void UserControl_LostKeyboardFocus(object sender, KeyboardFocusChangedEventArgs e)
        {
            TextRange tr = WordBreaker.GetWordRange(Editor.CaretPosition);

            // Clear any text which might have not been processed yet
            tr.Text = String.Empty;

            HideList();

            if (HideOnEmpty && Message.LabelsList.Count(l => l.LabelType == LabelType.Custom) == 0)
            {
                Visibility = Visibility.Collapsed;
            }
        }
Example #11
0
        private static Statement StemStatement(Statement statement)
        {
            SStemmer        stemmer         = new SStemmer();
            WordBreaker     wb              = new WordBreaker();
            StopWordRemover stopWordRemover = new StopWordRemover();

            string[] wordsString = wb.BreakParagraph(statement.ToString());
            wordsString = stopWordRemover.RemoveStopWords(wordsString);
            wordsString = stemmer.StemWords(wordsString);
            List <Word> words = new List <Word>();

            foreach (string wordString in wordsString)
            {
                words.Add(new Word(wordString));
            }

            return(new Statement(words.ToArray()));
        }
Example #12
0
        internal static void RozdelitNaSlova(string vstup, string vystup)
        {
            WordBreaker wb    = new WordBreaker();
            string      input = vstup;

            if (!File.Exists(input))
            {
                return;
            }

            wb.Input       = input;
            wb.Output      = vystup;
            wb.XmlIdFormat = "w-{0}";

            wb.IgnoredElements = new List <string>(new[] { "teiHeader", "resp", "repository", "idno", "oVar", "catDesc" });
            wb.Punctation      = ",.:;…„“‚‘?!—/[]´+─≈+−'#›‹()";

            wb.Run();
        }
Example #13
0
 /// <summary>
 /// 0980-09FD
 /// 2432-2558
 /// </summary>
 /// <param name="sender"></param>
 /// <param name="e"></param>
 private void ReadWordsToolStripMenuItem_Click(object sender, EventArgs e)
 {
     try
     {
         Cursor.Current = Cursors.WaitCursor;
         WordBreaker breaker  = new WordBreaker(option.Location.DBFolder);
         string      wordFile = Path.Combine(option.Location.DBFolder, "Bangla.txt");
         breaker.InitializeFolder();
         breaker.BreakFile(wordFile);
         breaker.WriteBack();
         WriteLine("Completed word breaking process.");
     }
     catch (Exception ex)
     {
         Debug.WriteLine(ex);
     }
     finally
     {
         Cursor.Current = Cursors.Default;
     }
 }
Example #14
0
        void SearchContacts()
        {
            TextRange tr = WordBreaker.GetWordRange(Editor.CaretPosition);

            // Make sure we have at least two chars
            if (tr.Text.Length >= 3)
            {
                var prevResults = AutoCompletionListBox.ItemsSource as List <Profile>;

                List <Profile> results;

                var q = mailbox.Profiles.Where(
                    p => p.SourceAddress.ToString().IndexOf(Text, StringComparison.InvariantCultureIgnoreCase) > -1)
                        .Where(p => p.SourceChannelId == 0 || p.SourceChannel.Charasteristics.SupportsPrivateMessage)
                        .Union(mailbox.Persons
                               .Where(r => r.Name.IndexOf(Text, StringComparison.InvariantCultureIgnoreCase) > -1)
                               .SelectMany(r => r.Profiles
                                           .Where(p => p.SourceChannelId == 0 || p.SourceChannel.Charasteristics.SupportsPrivateMessage)))
                        .OrderByDescending(p => p.Messages.Count)
                        .Distinct()
                        .Take(10);

                using (mailbox.Profiles.ReaderLock)
                    results = q.ToList();

                if (prevResults != null && prevResults.Count == results.Count)
                {
                    return;
                }

                if (results.Count > 0)
                {
                    ShowList(results);

                    return;
                }
            }

            HideList();
        }
Example #15
0
        public void AddRecipient(object source)
        {
            TextRange range = WordBreaker.GetWordRange(Editor.CaretPosition);

            // Create display object
            ContentControl ctrl = new ContentControl();

            ctrl.Content = source;

            var container = new InlineUIContainer(ctrl, Editor.CaretPosition);

            contentEnd = container.ContentEnd;

            // Remove actual text
            range.Text = String.Empty;

            // Move caret to end of what was the word
            Editor.CaretPosition = contentEnd;

            // Rebuild list of recipients on insert
            RebuildRecipientsList();
        }
Example #16
0
        /// <summary>
        /// Event handler for RichTextBox.TextChanged event.
        /// </summary>
        private void TextChangedEventHandler(object sender, TextChangedEventArgs e)
        {
            if (!this.pasteFlag || this.Document == null)
            {
                return;
            }

            // Temporarily disable TextChanged event handler, since following code might insert Hyperlinks,
            // which will raise another TextChanged event.
            this.TextChanged -= this.TextChangedEventHandler;

            TextPointer navigator = this.Document.ContentStart;

            while (navigator != null && navigator.CompareTo(this.Document.ContentEnd) < 0)
            {
                TextRange wordRange = WordBreaker.GetWordRange(navigator);
                if (wordRange == null || wordRange.IsEmpty)
                {
                    // No more words in the document.
                    break;
                }

                string wordText = wordRange.Text;
                if (wordText == "www.microsoft.com" &&
                    !HyperlinkHelper.IsInHyperlinkScope(wordRange.Start) &&
                    !HyperlinkHelper.IsInHyperlinkScope(wordRange.End))
                {
                    Hyperlink hyperlink = new Hyperlink(wordRange.Start, wordRange.End);
                    navigator = hyperlink.ElementEnd.GetNextInsertionPosition(LogicalDirection.Forward);
                }
                else
                {
                    navigator = wordRange.End.GetNextInsertionPosition(LogicalDirection.Forward);
                }
            }

            this.TextChanged += this.TextChangedEventHandler;
            this.pasteFlag    = false;
        }
Example #17
0
        /// <summary>
        /// Event handler for KeyDown event to auto-detect hyperlinks on space, enter and backspace keys.
        /// </summary>
        private static void OnKeyDown(object sender, KeyEventArgs e)
        {
            MyRichTextBox myRichTextBox = (MyRichTextBox)sender;

            if (e.Key != Key.Back && e.Key != Key.Space && e.Key != Key.Return)
            {
                return;
            }

            if (!myRichTextBox.Selection.IsEmpty)
            {
                myRichTextBox.Selection.Text = String.Empty;
            }

            TextPointer caretPosition = myRichTextBox.Selection.Start;

            if (e.Key == Key.Space || e.Key == Key.Return)
            {
                TextRange wordRange = WordBreaker.GetWordRange(caretPosition);
                string    wordText  = wordRange.Text;

                if (wordText == "www.microsoft.com")
                {
                    // Insert hyperlink element at word boundaries.
                    new Hyperlink(wordRange.Start, wordRange.End);

                    // No need to update RichTextBox caret position,
                    // since we only inserted a Hyperlink ElementEnd following current caretPosition.
                    // Subsequent handling of space input by base RichTextBox will update selection.
                }
            }
            else // Key.Back
            {
                TextPointer backspacePosition = caretPosition.GetNextInsertionPosition(LogicalDirection.Backward);
                Hyperlink   hyperlink;
                if (backspacePosition != null && HyperlinkHelper.IsHyperlinkBoundaryCrossed(caretPosition, backspacePosition, out hyperlink))
                {
                    // Remember caretPosition with forward gravity. This is necessary since we are going to delete
                    // the hyperlink element preceeding caretPosition and after deletion current caretPosition
                    // (with backward gravity) will follow content preceeding the hyperlink.
                    // We want to remember content following the hyperlink to set new caret position at.

                    TextPointer newCaretPosition = caretPosition.GetPositionAtOffset(0, LogicalDirection.Forward);

                    // Deleting the hyperlink is done using logic below.

                    // 1. Copy its children Inline to a temporary array.
                    InlineCollection hyperlinkChildren = hyperlink.Inlines;
                    Inline[]         inlines           = new Inline[hyperlinkChildren.Count];
                    hyperlinkChildren.CopyTo(inlines, 0);

                    // 2. Remove each child from parent hyperlink element and insert it after the hyperlink.
                    for (int i = inlines.Length - 1; i >= 0; i--)
                    {
                        hyperlinkChildren.Remove(inlines[i]);
                        hyperlink.SiblingInlines.InsertAfter(hyperlink, inlines[i]);
                    }

                    // 3. Apply hyperlink's local formatting properties to inlines (which are now outside hyperlink scope).
                    LocalValueEnumerator localProperties = hyperlink.GetLocalValueEnumerator();
                    TextRange            inlineRange     = new TextRange(inlines[0].ContentStart, inlines[inlines.Length - 1].ContentEnd);

                    while (localProperties.MoveNext())
                    {
                        LocalValueEntry    property = localProperties.Current;
                        DependencyProperty dp       = property.Property;
                        object             value    = property.Value;

                        if (!dp.ReadOnly &&
                            dp != Inline.TextDecorationsProperty && // Ignore hyperlink defaults.
                            dp != TextElement.ForegroundProperty &&
                            dp != BaseUriHelper.BaseUriProperty &&
                            !HyperlinkHelper.IsHyperlinkProperty(dp))
                        {
                            inlineRange.ApplyPropertyValue(dp, value);
                        }
                    }

                    // 4. Delete the (empty) hyperlink element.
                    hyperlink.SiblingInlines.Remove(hyperlink);

                    // 5. Update selection, since we deleted Hyperlink element and caretPosition was at that Hyperlink's end boundary.
                    myRichTextBox.Selection.Select(newCaretPosition, newCaretPosition);
                }
            }
        }
Example #18
0
        void Editor_PreviewKeyDown(object sender, KeyEventArgs e)
        {
            wordFlipper.Delay();

            switch (e.Key)
            {
            case Key.Up:
                // Move selection up
                if (AutoCompletionListBox.SelectedIndex > 0)
                {
                    AutoCompletionListBox.SelectedIndex--;
                }

                e.Handled = true;

                break;

            case Key.Down:
                // Move selection down
                if (AutoCompletionListBox.SelectedIndex < AutoCompletionListBox.Items.Count)
                {
                    AutoCompletionListBox.SelectedIndex++;
                }

                e.Handled = true;

                break;

            case Key.Enter:
                // Accept selection
                if (IsPopupOpen)
                {
                    InsertSelectedContact();
                }

                break;

            case Key.Escape:
                // Restore state before we opened the list
                HideList();

                e.Handled = true;
                break;

            case Key.Tab:
                if (IsPopupOpen)
                {
                    InsertSelectedContact();
                }
                else
                {
                    TextRange range = WordBreaker.GetWordRange(Editor.CaretPosition);

                    string text = range.Text.Trim();

                    // Break out when use tabs and nothing has been entered
                    if (String.IsNullOrEmpty(text.Trim()))
                    {
                        return;
                    }

                    ProcessCurrentWord();
                }

                e.Handled = true;

                break;

            case Key.Space:
            case Key.OemComma:
            case Key.OemSemicolon:
                // Add word being typed in by user
                ProcessCurrentWord();

                break;

            default:
                // Clear color of range
                TextRange currentRange = WordBreaker.GetWordRange(Editor.CaretPosition);
                currentRange.ApplyPropertyValue(TextBlock.ForegroundProperty, Brushes.Black);

                break;
            }
        }
        // moves an endpoint backward a certain number of units.
        // the endpoint is just an index into the text so it could represent either
        // the endpoint.
        private int MoveEndpointBackward(int index, TextUnit unit, int count, out int moved)
        {
            switch (unit)
            {
                case TextUnit.Character:
                    {
                        int limit = _provider.GetTextLength();
                        ValidateEndpoints();

                        int oneBasedIndex = index + 1;

                        moved = Math.Max(count, -oneBasedIndex);
                        index = index + moved;

                        index = index < 0 ? 0 : index;
                    }
                    break;

                case TextUnit.Word:
                    {
                        string text = _provider.GetText();
                        ValidateEndpoints();

#if WCP_NLS_ENABLED
                    // use the same word breaker as Avalon Text.
                    WordBreaker breaker = new WordBreaker();
                    TextContainer container = new TextContainer(text);
                    TextNavigator navigator = new TextNavigator(index, container);

                    // move backward one word break for each count
                    for (moved = 0; moved > count && index > 0; moved--)
                    {
                        if (!breaker.MoveToPreviousWordBreak(navigator))
                            break;
                    }

                    index = navigator.Position;
#else
                        for (moved = 0; moved > count && index > 0; moved--)
                        {
                            for (index--; !AtWordBoundary(text, index); index--) ;
                        }
#endif
                    }
                    break;

                case TextUnit.Line:
                    {
                        // Note count < 0.

                        // Get 1-based line.
                        int line = _provider.LineFromChar(index) + 1;

                        int lineMax = _provider.GetLineCount();

                        // Truncate the count to the number of available lines.
                        int actualCount = Math.Max(count, -line);

                        moved = actualCount;

                        if (actualCount == -line)
                        {
                            // We are moving by the maximum number of possible lines,
                            // so we know the resulting index will be 0.
                            index = 0;

                            // If a line other than the first consists of only "\r\n",
                            // you can move backwards past this line and the position changes,
                            // hence this is counted.  The first line is special, though:
                            // if it is empty, and you move say from the second line back up
                            // to the first, you cannot move further; however if the first line
                            // is nonempty, you can move from the end of the first line to its
                            // beginning!  This latter move is counted, but if the first line
                            // is empty, it is not counted.

                            // Recalculate the value of "moved".
                            // The first line is empty if it consists only of
                            // a line separator sequence.
                            bool firstLineEmpty =
                                ((lineMax > 1 && _provider.LineIndex(1) == _lineSeparator.Length)
                                    || lineMax == 0);
                                
                            if (moved < 0 && firstLineEmpty)
                            {
                                ++moved;
                            }
                        }
                        else // actualCount > -line
                        {
                            // Move the endpoint to the beginning of the following line,
                            // then back by the line separator length to get to the end
                            // of the previous line, since the Edit control has
                            // no method to get the character index of the end
                            // of a line directly.
                            index = _provider.LineIndex(line + actualCount) - _lineSeparator.Length;
                        }
                    }
                    break;

                case TextUnit.Paragraph:
                    {
                        // just like moving words but we look for paragraph boundaries instead of 
                        // word boundaries.
                        string text = _provider.GetText();
                        ValidateEndpoints();

                        for (moved = 0; moved > count && index > 0; moved--)
                        {
                            for (index--; !AtParagraphBoundary(text, index); index--) ;
                        }
                    }
                    break;

                case TextUnit.Format:
                case TextUnit.Page:
                case TextUnit.Document:
                    {
                        // since edit controls are plain text moving one uniform format unit will
                        // take us all the way to the beginning of the document, just like
                        // "pages" and document.

                        // we'll move 1 format unit if we aren't already at the beginning of the
                        // document.  Otherwise, we won't move at all.
                        moved = index > 0 ? -1 : 0;
                        index = 0;
                    }
                    break;

                default:
                    throw new System.ComponentModel.InvalidEnumArgumentException("unit", (int)unit, typeof(TextUnit));
            }

            return index;
        }
        void ITextRangeProvider.ExpandToEnclosingUnit(TextUnit unit)
        {
            Misc.SetFocus(_provider._hwnd);

            switch (unit)
            {
                case TextUnit.Character:
                    // if it is a degenerate range then expand it to be one character.
                    // otherwise, leave it as it is.
                    if (Start == End)
                    {
                        int moved;
                        End = MoveEndpointForward(End, TextUnit.Character, 1, out moved);
                    }
                    break;

                case TextUnit.Word:
                    {
                        // this works same as paragraph except we look for word boundaries instead of paragraph boundaries.

                        // get the text so we can figure out where the boundaries are
                        string text = _provider.GetText();
                        ValidateEndpoints();

#if WCP_NLS_ENABLED
                        // use the same word breaker that Avalon Text uses.
                        WordBreaker breaker = new WordBreaker();
                        TextContainer container = new TextContainer(text);
                        // if the starting point of the range is not already at a word break
                        // then move it backwards to the nearest word break.
                        TextNavigator startNavigator = new TextNavigator(Start, container);
                        if (!breaker.IsAtWordBreak(startNavigator))
                        {
                            breaker.MoveToPreviousWordBreak(startNavigator);
                            Start = startNavigator.Position;
                        }

                        // if the range is degenerate or the ending point of the range is not already at a word break 
                        // then move it forwards to the nearest word break.
                        TextNavigator endNavigator = new TextNavigator(End, container);
                        if (Start==End || !breaker.IsAtWordBreak(endNavigator))
                        {
                            breaker.MoveToNextWordBreak(endNavigator);
                            End = endNavigator.Position;
                        }
#else
                        // move start left until we reach a word boundary.
                        for (; !AtWordBoundary(text, Start); Start--) ;

                        // move end right until we reach word boundary (different from Start).
                        End = Math.Min(Math.Max(End, Start + 1), text.Length);
                        for (; !AtWordBoundary(text, End); End++) ;
#endif
                    }
                    break;

                case TextUnit.Line:
                    {
                        if (_provider.GetLineCount() != 1)
                        {
                            int startLine = _provider.LineFromChar(Start);
                            int endLine = _provider.LineFromChar(End);

                            MoveTo(_provider.LineIndex(startLine), _provider.LineIndex(endLine + 1));
                        }
                        else
                        {
                            MoveTo(0, _provider.GetTextLength());
                        }
                    }
                    break;

                case TextUnit.Paragraph:
                    { 
                        // this works same as paragraph except we look for word boundaries instead of paragraph boundaries.

                        // get the text so we can figure out where the boundaries are
                        string text = _provider.GetText();
                        ValidateEndpoints();

                        // move start left until we reach a paragraph boundary.
                        for (; !AtParagraphBoundary(text, Start); Start--);

                        // move end right until we reach a paragraph boundary (different from Start).
                        End = Math.Min(Math.Max(End, Start + 1), text.Length);
                        for (; !AtParagraphBoundary(text, End); End++);
                    } 
                    break;

                case TextUnit.Format:
                case TextUnit.Page:
                case TextUnit.Document:
                    MoveTo(0, _provider.GetTextLength());
                    break;

                //break;
                default:
                    throw new System.ComponentModel.InvalidEnumArgumentException("unit", (int)unit, typeof(TextUnit));
            }
        }
        // moves an endpoint forward a certain number of units.
        // the endpoint is just an index into the text so it could represent either
        // the endpoint.
        private int MoveEndpointForward(int index, TextUnit unit, int count, out int moved)
        {
            switch (unit)
            {
                case TextUnit.Character:
                    {
                        int limit = _provider.GetTextLength() ;
                        ValidateEndpoints();

                        moved = Math.Min(count, limit - index);
                        index = index + moved;

                        index = index > limit ? limit : index;
                    }
                    break;

                case TextUnit.Word:
                    {
                        string text = _provider.GetText();
                        ValidateEndpoints();

#if WCP_NLS_ENABLED
                    // use the same word breaker as Avalon Text.
                    WordBreaker breaker = new WordBreaker();
                    TextContainer container = new TextContainer(text);
                    TextNavigator navigator = new TextNavigator(index, container);

                    // move forward one word break for each count
                    for (moved = 0; moved < count && index < text.Length; moved++)
                    {
                        if (!breaker.MoveToNextWordBreak(navigator))
                            break;
                    }

                    index = navigator.Position;
#else
                        for (moved = 0; moved < count && index < text.Length; moved++)
                        {
                            for (index++; !AtWordBoundary(text, index); index++) ;
                        }
#endif
                    }
                    break;

                case TextUnit.Line:
                    {
                        // figure out what line we are on.  if we are in the middle of a line and
                        // are moving left then we'll round up to the next line so that we move
                        // to the beginning of the current line.
                        int line = _provider.LineFromChar(index);

                        // limit the number of lines moved to the number of lines available to move
                        // Note lineMax is always >= 1.
                        int lineMax = _provider.GetLineCount();
                        moved = Math.Min(count, lineMax - line - 1);

                        if (moved > 0)
                        {
                            // move the endpoint to the beginning of the destination line.
                            index = _provider.LineIndex(line + moved);
                        }
                        else if (moved == 0 && lineMax == 1)
                        {
                            // There is only one line so get the text length as endpoint
                            index = _provider.GetTextLength();
                            moved = 1;
                        }
                    }
                    break;

                case TextUnit.Paragraph:
                    {
                        // just like moving words but we look for paragraph boundaries instead of 
                        // word boundaries.
                        string text = _provider.GetText();
                        ValidateEndpoints();

                        for (moved = 0; moved < count && index < text.Length; moved++)
                        {
                            for (index++; !AtParagraphBoundary(text, index); index++) ;
                        }
                    }
                    break;

                case TextUnit.Format:
                case TextUnit.Page:
                case TextUnit.Document:
                    {
                        // since edit controls are plain text moving one uniform format unit will
                        // take us all the way to the end of the document, just like
                        // "pages" and document.
                        int limit = _provider.GetTextLength();
                        ValidateEndpoints();

                        // we'll move 1 format unit if we aren't already at the end of the
                        // document.  Otherwise, we won't move at all.
                        moved = index < limit ? 1 : 0;
                        index = limit;
                    }
                    break;

                default:
                    throw new System.ComponentModel.InvalidEnumArgumentException("unit", (int)unit, typeof(TextUnit));
            }

            return index;
        }
Example #22
0
        public CorrelationMatrix UpdateCorrelationMatrix(CorrelationMatrix existingMatrix, IEnumerable <string> documents)
        {
            WordBreaker     wordBreaker     = new WordBreaker();
            StopWordRemover stopwordRemover = new StopWordRemover();
            SentenceBreaker sb = SentenceBreaker.Instance;

            int i = 1;

            try
            {
                Parallel.ForEach(documents, (documentContents, loopState) => //string documentContents in documents)
                {
                    int documentNumber = Interlocked.Increment(ref i);
                    using (new MonitoredScope("Learning from a document No. " + documentNumber.ToString()))
                    {
                        SStemmer stemmer = new SStemmer();
                        string[] words;
                        //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium))
                        {
                            words = sb.BreakIntoWords(documentContents);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium))
                        {
                            words = stemmer.StemWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium))
                        {
                            words = stopwordRemover.RemoveStopWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium))
                        {
                            existingMatrix.Add(words);
                        }
                    }

                    Logger.Log("Finished document number: " + documentNumber.ToString());
                    if (existingMatrix.Words.Count > 100000)
                    {
                        loopState.Break();
                    }
                    //Logger.Log("Finished document number: " + (i++).ToString() + " unique words: " + correlationMatrix.Words.Count + ", pairs: " + correlationMatrix.Matrix.Count);
                });
            }
            finally
            {
                Logger.Log("Unique words: " + existingMatrix.WordsMetadata.Count + ", Pairs: " + existingMatrix.Matrix.Count);
                string filename = "autorss_" + Guid.NewGuid().ToString();
                using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
                {
                    new CorrelationMatrixBinarySerializer().Serialize(fs, existingMatrix);
                }

                Logger.Log("Correlation Matrix saved to file: " + filename);

                filename = "autorss_Scopes_" + Guid.NewGuid().ToString();
                using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
                {
                    MonitoredScope.SerializeStatistics(fs);
                }

                Logger.Log("MonitoredScopes saved to file: " + filename);
            }

            return(existingMatrix);
        }
Example #23
0
        private static void CalculateCorrelationFromWikipediaDB(ProgramArguments programArgs)
        {
            WordBreaker       wordBreaker       = new WordBreaker();
            StopWordRemover   stopwordRemover   = new StopWordRemover();
            SStemmer          stemmer           = new SStemmer();
            CorrelationMatrix correlationMatrix = new CorrelationMatrix();

            string wikipediaPath = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml";

            using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open)))
            {
                for (int i = 0; i < programArgs.WikipediaStartArticle; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (!elementFound)
                    {
                        break;
                    }
                }

                for (int i = programArgs.WikipediaStartArticle; i < programArgs.WikipediaEndArticle; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (elementFound)
                    {
                        string pageContents;
                        //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium))
                        {
                            sr.ReadStartElement();
                            pageContents = sr.ReadContentAsString();
                        }

                        string[] words;
                        //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium))
                        {
                            words = wordBreaker.BreakParagraph(pageContents);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium))
                        {
                            words = stopwordRemover.RemoveStopWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium))
                        {
                            words = stemmer.StemWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium))
                        {
                            correlationMatrix.Add(words);
                        }

                        Logger.Log("Finished document number: " + (i + 1).ToString());
                    }
                }
            }

            string filename = "autorss_" + Guid.NewGuid().ToString();

            using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
            {
                BinaryFormatter formatter = new BinaryFormatter();
                formatter.Serialize(fs, correlationMatrix);
            }

            Logger.Log("Saved to file: " + filename);

            filename = "autorss_Scopes_" + Guid.NewGuid().ToString();
            using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
            {
                MonitoredScope.SerializeStatistics(fs);
            }

            Logger.Log("Saved to file: " + filename);
        }