/// <summary> /// Parses a string of text containing tagged words, e.g. "LASI/NNP can/MD sniff-out/VBP /// the/DT problem/NN", and returns of the collection containing, for each word, the /// function which will create the Part of Speech subtyped <see cref="Word"/> instance representing that word. /// </summary> /// <param name="text">A string containing tagged words.</param> /// <returns> /// The List of constructor function instances which, when invoked, create the instances /// <see cref="Word"/> which represent each word in the source /// </returns> protected virtual List <Lazy <Word> > CreateWordExpressions(string text) { var wordExpressions = new List <Lazy <Word> >(); var elements = GetTaggedWordStrings(text); var posExtractor = new TaggedWordExtractor(); var wordFactory = new WordFactory(wordTagset); foreach (var element in elements) { var textTagPair = posExtractor.Extract(element); if (textTagPair.HasValue) { var pair = textTagPair.Value; try { wordExpressions.Add(new Lazy <Word>(() => wordFactory.Create(pair))); } catch (UnknownWordTagException e) { Logger.Log( $@"{e.Message} Text: {pair.Text} Instantiating new {nameof(Lazy<UnknownWord>)} holding the literal content, {element}, to compensate." ); wordExpressions.Add(new Lazy <Word>(() => new UnknownWord(pair.Text))); } } } return(wordExpressions); }
/// <summary> /// Parses a string of text containing tagged words e.g. "LASI/NNP can/MD sniff-out/VBP /// the/DT problem/NN" into a collection of Part of Speech subtyped LASI.Algorithm.Word /// instances which represent them. /// </summary> /// <param name="text"> /// A string containing tagged words from which to instantiate <see cref="Word"/> instances. /// </param> /// <returns> /// The collection of Part of Speech subtyped <see cref="Word"/> instances each /// corresponding to a tagged word element. /// </returns> protected virtual List <Word> CreateWords(string text) { var parsedWords = new List <Word>(); var wordExtractor = new TaggedWordExtractor(); var factory = new WordFactory(wordTagset); foreach (var taggedToken in GetTaggedWordStrings(text)) { var textTagPair = wordExtractor.Extract(taggedToken); if (textTagPair.HasValue) { var pair = textTagPair.Value; try { parsedWords.Add(factory.Create(pair)); } catch (PartOfSpeechTagException e) when(e is EmptyWordTagException || e is UnknownWordTagException) { Logger.Log( $@"{e.Message} Text: {pair.Text} Instantiating new {nameof(UnknownWord)} to compensate. Attempting to parse data: {taggedToken}" ); parsedWords.Add(new UnknownWord(pair.Text)); } catch (EmptyOrWhiteSpaceStringTaggedAsWordException x) { Logger.Log($"\n{x.Message} + \nDiscarding"); } } } return(parsedWords); }