Beispiel #1
0
        /// <summary>
        /// Parses a string of text containing tagged words, e.g. "LASI/NNP can/MD sniff-out/VBP
        /// the/DT problem/NN", and returns of the collection containing, for each word, the
        /// function which will create the Part of Speech subtyped <see cref="Word"/> instance representing that word.
        /// </summary>
        /// <param name="text">A string containing tagged words.</param>
        /// <returns>
        /// The List of constructor function instances which, when invoked, create the instances
        /// <see cref="Word"/> which represent each word in the source
        /// </returns>
        protected virtual List <Lazy <Word> > CreateWordExpressions(string text)
        {
            var wordExpressions = new List <Lazy <Word> >();
            var elements        = GetTaggedWordStrings(text);
            var posExtractor    = new TaggedWordExtractor();
            var wordFactory     = new WordFactory(wordTagset);

            foreach (var element in elements)
            {
                var textTagPair = posExtractor.Extract(element);
                if (textTagPair.HasValue)
                {
                    var pair = textTagPair.Value;
                    try
                    {
                        wordExpressions.Add(new Lazy <Word>(() => wordFactory.Create(pair)));
                    }
                    catch (UnknownWordTagException e)
                    {
                        Logger.Log(
                            $@"{e.Message}
                            Text: {pair.Text}
                            Instantiating new {nameof(Lazy<UnknownWord>)} holding the literal content, {element},  to compensate."
                            );
                        wordExpressions.Add(new Lazy <Word>(() => new UnknownWord(pair.Text)));
                    }
                }
            }
            return(wordExpressions);
        }
Beispiel #2
0
        /// <summary>
        /// Parses a string of text containing tagged words e.g. "LASI/NNP can/MD sniff-out/VBP
        /// the/DT problem/NN" into a collection of Part of Speech subtyped LASI.Algorithm.Word
        /// instances which represent them.
        /// </summary>
        /// <param name="text">
        /// A string containing tagged words from which to instantiate <see cref="Word"/> instances.
        /// </param>
        /// <returns>
        /// The collection of Part of Speech subtyped <see cref="Word"/> instances each
        /// corresponding to a tagged word element.
        /// </returns>
        protected virtual List <Word> CreateWords(string text)
        {
            var parsedWords   = new List <Word>();
            var wordExtractor = new TaggedWordExtractor();

            var factory = new WordFactory(wordTagset);

            foreach (var taggedToken in GetTaggedWordStrings(text))
            {
                var textTagPair = wordExtractor.Extract(taggedToken);
                if (textTagPair.HasValue)
                {
                    var pair = textTagPair.Value;
                    try
                    {
                        parsedWords.Add(factory.Create(pair));
                    }
                    catch (PartOfSpeechTagException e) when(e is EmptyWordTagException || e is UnknownWordTagException)
                    {
                        Logger.Log(
                            $@"{e.Message}
                            Text: {pair.Text}
                            Instantiating new {nameof(UnknownWord)} to compensate.
                            Attempting to parse data: {taggedToken}"
                            );
                        parsedWords.Add(new UnknownWord(pair.Text));
                    }
                    catch (EmptyOrWhiteSpaceStringTaggedAsWordException x)
                    {
                        Logger.Log($"\n{x.Message} + \nDiscarding");
                    }
                }
            }
            return(parsedWords);
        }