/// <summary> /// Extracts and returns all terms with the specified POS from a string. /// </summary> /// <param name="text">Source string</param> /// <param name="pos">Part of speech to extract</param> /// <returns>String consisting of only the terms with the specified POS</returns> public static string ExtractPOS(string text, POSTaggerSpeechType pos) { Dictionary<POSTaggerSpeechType, List<string>> dict = new Dictionary<POSTaggerSpeechType, List<string>>(); foreach (POSTaggerSpeechType type in Enum.GetValues(typeof(POSTaggerSpeechType))) { dict.Add(type, new List<string>()); } foreach (string term in text.Split()) { string[] kvp = term.Split('_'); if (kvp.Length < 2) continue; if (kvp[1].Length == 0) continue; if (kvp[1][0] == 'N') { dict[POSTaggerSpeechType.Noun].Add(kvp[0]); continue; } if (kvp[1][0] == 'V') { dict[POSTaggerSpeechType.Verb].Add(kvp[0]); continue; } dict[POSTaggerSpeechType.Other].Add(kvp[0]); } return String.Join(" ", dict[pos]); }
/// <summary> /// Extracts and returns all terms with the specified POS from a string. /// </summary> /// <param name="text">Source string</param> /// <param name="pos">Part of speech to extract</param> /// <returns>String consisting of only the terms with the specified POS</returns> public static string ExtractPOS(string text, POSTaggerSpeechType pos) { Dictionary <POSTaggerSpeechType, List <string> > dict = new Dictionary <POSTaggerSpeechType, List <string> >(); foreach (POSTaggerSpeechType type in Enum.GetValues(typeof(POSTaggerSpeechType))) { dict.Add(type, new List <string>()); } foreach (string term in text.Split()) { string[] kvp = term.Split('_'); if (kvp.Length < 2) { continue; } if (kvp[1].Length == 0) { continue; } if (kvp[1][0] == 'N') { dict[POSTaggerSpeechType.Noun].Add(kvp[0]); continue; } if (kvp[1][0] == 'V') { dict[POSTaggerSpeechType.Verb].Add(kvp[0]); continue; } dict[POSTaggerSpeechType.Other].Add(kvp[0]); } return(String.Join(" ", dict[pos])); }
/// <summary> /// Extracts and returns all terms with the specified POS from a TLArtifactsCollection. /// </summary> /// <param name="artifacts">List of artifacts</param> /// <param name="pos">Part of speech to extract</param> /// <param name="modelFile">Training model file location</param> /// <returns>TLArtifactsCollection consisting of only the terms with the specified POS</returns> public static TLArtifactsCollection Extract(TLArtifactsCollection artifacts, POSTaggerSpeechType pos, string modelFile) { TLArtifactsCollection extracted = new TLArtifactsCollection(); foreach (KeyValuePair<string, TLArtifact> artifactKVP in artifacts) { extracted.Add(artifactKVP.Key, ExtractArtifact(artifactKVP.Value, pos, modelFile)); } return extracted; }
/// <summary> /// Extracts and returns all terms with the specified POS from a TLArtifactsCollection. /// </summary> /// <param name="artifacts">List of artifacts</param> /// <param name="pos">Part of speech to extract</param> /// <param name="modelFile">Training model file location</param> /// <returns>TLArtifactsCollection consisting of only the terms with the specified POS</returns> public static TLArtifactsCollection Extract(TLArtifactsCollection artifacts, POSTaggerSpeechType pos, string modelFile) { TLArtifactsCollection extracted = new TLArtifactsCollection(); foreach (KeyValuePair <string, TLArtifact> artifactKVP in artifacts) { extracted.Add(artifactKVP.Key, ExtractArtifact(artifactKVP.Value, pos, modelFile)); } return(extracted); }
/// <summary> /// Extracts and returns all terms with the specified POS from a single TLArtifact. /// </summary> /// <param name="artifact">Single artifact</param> /// <param name="pos">Part of speech to extract</param> /// <param name="modelFile">Training model file location</param> /// <returns>Single artifact consisting of only the terms with the specified POS</returns> public static TLArtifact ExtractArtifact(TLArtifact artifact, POSTaggerSpeechType pos, string modelFile) { return new TLArtifact(artifact.Id, ExtractPOS(Tag(artifact.Text, modelFile), pos)); }
/// <summary> /// Extracts and returns all terms with the specified POS from a single TLArtifact. /// </summary> /// <param name="artifact">Single artifact</param> /// <param name="pos">Part of speech to extract</param> /// <param name="modelFile">Training model file location</param> /// <returns>Single artifact consisting of only the terms with the specified POS</returns> public static TLArtifact ExtractArtifact(TLArtifact artifact, POSTaggerSpeechType pos, string modelFile) { return(new TLArtifact(artifact.Id, ExtractPOS(Tag(artifact.Text, modelFile), pos))); }