// For either a file to annotate or for the hardcoded text example, // this demo file shows two ways to process the output, for teaching // purposes. For the file, it shows both how to run NER on a String // and how to run it on a whole file. For the hard-coded String, // it shows how to run it on a single sentence, and how to do this // and produce an inline XML output format. static void Main(string[] args) { if (args.Length > 0) { var fileContent = File.ReadAllText(args[0]); foreach (List sentence in Classifier.classify(fileContent).toArray()) { foreach (CoreLabel word in sentence.toArray()) { Console.Write("{0}/{1} ", word.word(), word.get(new CoreAnnotations.AnswerAnnotation().getClass())); } Console.WriteLine(); } } else { const string S1 = "Good afternoon Rajat Raina, how are you today?"; const string S2 = "I go to school at Stanford University, which is located in California."; Console.WriteLine("{0}\n", Classifier.classifyToString(S1)); Console.WriteLine("{0}\n", Classifier.classifyWithInlineXML(S2)); Console.WriteLine("{0}\n", Classifier.classifyToString(S2, "xml", true)); var classification = Classifier.classify(S2).toArray(); for (var i = 0; i < classification.Length; i++) { Console.WriteLine("{0}\n:{1}\n", i, classification[i]); } } }
private static string GetNLPResults(string story) { string results; // Path to the folder with classifiers models string baseDirectory = AppDomain.CurrentDomain.BaseDirectory; string classifiersDirectory = baseDirectory + @"..\DirectSupply.Anonymize.Service\Models\NLP"; // Loading 3 class classifier model CRFClassifier classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirectory + @"\english.all.3class.distsim.crf.ser.gz"); results = classifier.classifyWithInlineXML(story); return(results); }
public static List <(string, string)> ClassifyWordsWithTypes(this CRFClassifier classifier, string sentences) { List <(string, string)> results = new List <(string, string)>(); string xmlResults = classifier.classifyWithInlineXML(sentences); foreach (Match match in Regex.Matches(xmlResults, @"<(?<tag>[^<>]*)>(?<word>[^<>]*)<\/[^<>]*>")) { string tag = match.Groups["tag"].Value; string word = match.Groups["word"].Value; if (!string.IsNullOrWhiteSpace(tag)) { results.Add((word, tag)); } } return(results); }
public static Dictionary <string, int> GetNamedEntititesForText(string text, bool lowercase = true) { Dictionary <string, int> entities = new Dictionary <string, int>(); string xml = classifier.classifyWithInlineXML( text.Replace("&", "&").Replace("<", "<")); XDocument doc = XDocument.Parse(string.Concat("<root>", xml, "</root>")); var locations = doc.Descendants(Constants.LocationNamedEntity); foreach (var location in locations) { string loc = (lowercase == true) ? location.Value.ToLowerInvariant() : location.Value; if (!entities.ContainsKey(loc)) { entities.Add(loc, 1); } } var persons = doc.Descendants(Constants.PersonNamedEntity); foreach (var person in persons) { string pers = (lowercase == true) ? person.Value.ToLowerInvariant() : person.Value; if (!entities.ContainsKey(pers)) { entities.Add(pers, 2); } } var organizations = doc.Descendants(Constants.OrganizationNamedEntity); foreach (var organization in organizations) { string org = (lowercase == true) ? organization.Value.ToLowerInvariant() : organization.Value; if (!entities.ContainsKey(org)) { entities.Add(org, 3); } } return(entities); }
public void useModel(string inputPath, string partial_address, ref NERAddress addr) { CRFClassifier model = CRFClassifier.getClassifierNoExceptions(inputPath); //string tagged_address = model.classifyToString(partial_address); string tagged_address = model.classifyWithInlineXML(partial_address); tagged_address = tagged_address.Replace("<0>", "<ZERO>"); tagged_address = tagged_address.Replace("</0>", "</ZERO>"); // parse xml XmlDocument doc = new XmlDocument(); try { doc.LoadXml("<root>" + tagged_address + "</root>"); } catch (XmlException e) { Console.WriteLine("Exception occurred while parsing xml: " + e.Message); return; } Console.WriteLine("Model output: " + tagged_address); string numbers = ""; foreach (XmlNode node in doc.DocumentElement.ChildNodes) { if (Regex.IsMatch(node.Name, "^[IOB]-LOCALITY")) { addr.locality += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-SECONDARY_LOCALITY")) { addr.secondary_locality += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-THOROFARE")) { addr.thorofare += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_GROUP_NAME")) { addr.building_group_name += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_NAME")) { addr.building_name += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-SUB_BUILDING_NAME")) { addr.sub_building_name += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_NUMBER")) { addr.building_number += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-DEPARTMENT")) { addr.department += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-ORGANISATION_NAME")) { addr.organisation_name += node.InnerText + " "; } if (node.Name == "NUMBER") { numbers += node.InnerText + " "; } } addr.numbers = numbers.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); addr.locality = addr.locality.Trim(); addr.secondary_locality = addr.secondary_locality.Trim(); addr.thorofare = addr.thorofare.Trim(); addr.building_group_name = addr.building_group_name.Trim(); addr.building_name = addr.building_name.Trim(); addr.sub_building_name = addr.sub_building_name.Trim(); addr.building_number = addr.building_number.Trim(); addr.department = addr.department.Trim(); addr.organisation_name = addr.organisation_name.Trim(); }
private async Task ScrapeArtistInfo(FullArtist artist) { string bioUrl = $"https://open.spotify.com/artist/{artist.Id}/about"; string pageContent = ""; using (HttpClient client = new HttpClient()) { client.DefaultRequestHeaders.Add("User-Agent", "C# console program"); pageContent = await client.GetStringAsync(bioUrl); } var artistResults = _artistScraping.Extract(pageContent); var cities = artistResults["listeners-city"]; var listeners = artistResults["listeners"]; var bio = artistResults["about"]; // if there's no bio, we can't find locations :( if (bio == null) { return; } string fullBio = bio.ToString() + artistResults["more"].ToString(); int shortBioEnd = System.Math.Min(fullBio.Length, 255); string shortBio = fullBio.Substring(0, shortBioEnd); Dictionary <string, int> listenerCities = new Dictionary <string, int>(); for (int i = 0; i < listeners.Count(); i++) { string listenersString = listeners[i].ToString().Replace("LISTENERS", "").Replace(",", "").Trim(); int numListeners = int.Parse(listenersString); listenerCities.Add(cities[i].ToString(), numListeners); } string aboutArtistJson = JsonConvert.SerializeObject(fullBio, Newtonsoft.Json.Formatting.Indented); string classifiedXml = _classifier.classifyWithInlineXML(aboutArtistJson); // HACK: fix "city, state" locations that are split into two: // "<LOCATION>Logan</LOCATION>, <LOCATION>Utah</LOCATION>" => "<LOCATION>Logan, Utah</LOCATION>" classifiedXml = classifiedXml.Replace("</LOCATION>, <LOCATION>", ", "); MatchCollection locationMatches = _locationRx.Matches(classifiedXml); Dictionary <string, Graphic> artistLocations = new Dictionary <string, Graphic>(); //UNCOMMENT //// Build artist locations //for (int i = 0; i < locationMatches.Count; i++) //{ // var m = locationMatches[i]; // string loc = m.Groups[1].Value; // MapPoint geocodedLocation = await GeocodeArtistPlacename(loc); // if (geocodedLocation == null) { continue; } // // If the place name was geocoded, create a new feature to store it // // (first one is considered the hometown) :\ // if (i == 0) // { // Feature newHometownFeature = _hometownTable.CreateFeature(); // newHometownFeature.Geometry = geocodedLocation; // newHometownFeature.Attributes["placename"] = loc; // newHometownFeature.Attributes["artistname"] = artist.Name; // newHometownFeature.Attributes["artistid"] = artist.Id; // newHometownFeature.Attributes["imageurl"] = artist.Images.Last().Url; // newHometownFeature.Attributes["bioshort"] = shortBio; // await _hometownTable.AddFeatureAsync(newHometownFeature); // } // else // { // if (!artistLocations.ContainsKey(loc)) // { // Feature otherFeature = _otherPointsTable.CreateFeature(); // otherFeature.Geometry = geocodedLocation; // otherFeature.Attributes["placename"] = loc; // otherFeature.Attributes["artistname"] = artist.Name; // otherFeature.Attributes["artistid"] = artist.Id; // await _otherPointsTable.AddFeatureAsync(otherFeature); // } // } //} //// Apply edits to the hometown table (will apply other edits after adding listener cities) //await _hometownTable.ApplyEditsAsync(); //**UNCOMMENT // Create points for the listener cities int r = 0; foreach (var lc in listenerCities) { r++; MapPoint geocodedLocation = await GeocodeArtistPlacename(lc.Key); if (geocodedLocation == null) { continue; } Feature otherFeature = _listenerTable.CreateFeature(); otherFeature.Geometry = geocodedLocation; otherFeature.Attributes["placename"] = lc.Key; otherFeature.Attributes["artistname"] = artist.Name; otherFeature.Attributes["artistid"] = artist.Id; otherFeature.Attributes["listenercount"] = lc.Value; otherFeature.Attributes["listenerrank"] = r; await _listenerTable.AddFeatureAsync(otherFeature); } // Apply edits to the other locations table await _listenerTable.ApplyEditsAsync(); ArtistProgressList.Items.Add(artist.Name); }
protected string ParseToCrf(CRFClassifier classifier, string input) { return(classifier.classifyWithInlineXML(input)); }