Ejemplo n.º 1
0
        // For either a file to annotate or for the hardcoded text example,
        // this demo file shows two ways to process the output, for teaching
        // purposes.  For the file, it shows both how to run NER on a String
        // and how to run it on a whole file.  For the hard-coded String,
        // it shows how to run it on a single sentence, and how to do this
        // and produce an inline XML output format.

        static void Main(string[] args)
        {
            if (args.Length > 0)
            {
                var fileContent = File.ReadAllText(args[0]);
                foreach (List sentence in Classifier.classify(fileContent).toArray())
                {
                    foreach (CoreLabel word in sentence.toArray())
                    {
                        Console.Write("{0}/{1} ", word.word(), word.get(new CoreAnnotations.AnswerAnnotation().getClass()));
                    }
                    Console.WriteLine();
                }
            }
            else
            {
                const string S1 = "Good afternoon Rajat Raina, how are you today?";
                const string S2 = "I go to school at Stanford University, which is located in California.";
                Console.WriteLine("{0}\n", Classifier.classifyToString(S1));
                Console.WriteLine("{0}\n", Classifier.classifyWithInlineXML(S2));
                Console.WriteLine("{0}\n", Classifier.classifyToString(S2, "xml", true));

                var classification = Classifier.classify(S2).toArray();

                for (var i = 0; i < classification.Length; i++)
                {
                    Console.WriteLine("{0}\n:{1}\n", i, classification[i]);
                }
            }
        }
Ejemplo n.º 2
0
        private static string GetNLPResults(string story)
        {
            string results;

            // Path to the folder with classifiers models
            string baseDirectory        = AppDomain.CurrentDomain.BaseDirectory;
            string classifiersDirectory = baseDirectory + @"..\DirectSupply.Anonymize.Service\Models\NLP";

            // Loading 3 class classifier model
            CRFClassifier classifier = CRFClassifier.getClassifierNoExceptions(
                classifiersDirectory + @"\english.all.3class.distsim.crf.ser.gz");

            results = classifier.classifyWithInlineXML(story);

            return(results);
        }
Ejemplo n.º 3
0
        public static List <(string, string)> ClassifyWordsWithTypes(this CRFClassifier classifier, string sentences)
        {
            List <(string, string)> results = new List <(string, string)>();

            string xmlResults = classifier.classifyWithInlineXML(sentences);

            foreach (Match match in Regex.Matches(xmlResults, @"<(?<tag>[^<>]*)>(?<word>[^<>]*)<\/[^<>]*>"))
            {
                string tag  = match.Groups["tag"].Value;
                string word = match.Groups["word"].Value;

                if (!string.IsNullOrWhiteSpace(tag))
                {
                    results.Add((word, tag));
                }
            }

            return(results);
        }
Ejemplo n.º 4
0
        public static Dictionary <string, int> GetNamedEntititesForText(string text, bool lowercase = true)
        {
            Dictionary <string, int> entities = new Dictionary <string, int>();
            string xml = classifier.classifyWithInlineXML(
                text.Replace("&", "&amp;").Replace("<", "&lt;"));

            XDocument doc = XDocument.Parse(string.Concat("<root>", xml, "</root>"));

            var locations = doc.Descendants(Constants.LocationNamedEntity);

            foreach (var location in locations)
            {
                string loc = (lowercase == true) ? location.Value.ToLowerInvariant() : location.Value;
                if (!entities.ContainsKey(loc))
                {
                    entities.Add(loc, 1);
                }
            }

            var persons = doc.Descendants(Constants.PersonNamedEntity);

            foreach (var person in persons)
            {
                string pers = (lowercase == true) ? person.Value.ToLowerInvariant() : person.Value;
                if (!entities.ContainsKey(pers))
                {
                    entities.Add(pers, 2);
                }
            }

            var organizations = doc.Descendants(Constants.OrganizationNamedEntity);

            foreach (var organization in organizations)
            {
                string org = (lowercase == true) ? organization.Value.ToLowerInvariant() : organization.Value;
                if (!entities.ContainsKey(org))
                {
                    entities.Add(org, 3);
                }
            }

            return(entities);
        }
Ejemplo n.º 5
0
        public void useModel(string inputPath, string partial_address, ref NERAddress addr)
        {
            CRFClassifier model = CRFClassifier.getClassifierNoExceptions(inputPath);

            //string tagged_address = model.classifyToString(partial_address);
            string tagged_address = model.classifyWithInlineXML(partial_address);

            tagged_address = tagged_address.Replace("<0>", "<ZERO>");
            tagged_address = tagged_address.Replace("</0>", "</ZERO>");

            // parse xml
            XmlDocument doc = new XmlDocument();

            try
            {
                doc.LoadXml("<root>" + tagged_address + "</root>");
            }
            catch (XmlException e)
            {
                Console.WriteLine("Exception occurred while parsing xml: " + e.Message);
                return;
            }

            Console.WriteLine("Model output: " + tagged_address);

            string numbers = "";

            foreach (XmlNode node in doc.DocumentElement.ChildNodes)
            {
                if (Regex.IsMatch(node.Name, "^[IOB]-LOCALITY"))
                {
                    addr.locality += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-SECONDARY_LOCALITY"))
                {
                    addr.secondary_locality += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-THOROFARE"))
                {
                    addr.thorofare += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_GROUP_NAME"))
                {
                    addr.building_group_name += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_NAME"))
                {
                    addr.building_name += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-SUB_BUILDING_NAME"))
                {
                    addr.sub_building_name += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_NUMBER"))
                {
                    addr.building_number += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-DEPARTMENT"))
                {
                    addr.department += node.InnerText + " ";
                }

                if (Regex.IsMatch(node.Name, "^[IOB]-ORGANISATION_NAME"))
                {
                    addr.organisation_name += node.InnerText + " ";
                }

                if (node.Name == "NUMBER")
                {
                    numbers += node.InnerText + " ";
                }
            }

            addr.numbers = numbers.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            addr.locality            = addr.locality.Trim();
            addr.secondary_locality  = addr.secondary_locality.Trim();
            addr.thorofare           = addr.thorofare.Trim();
            addr.building_group_name = addr.building_group_name.Trim();
            addr.building_name       = addr.building_name.Trim();
            addr.sub_building_name   = addr.sub_building_name.Trim();
            addr.building_number     = addr.building_number.Trim();
            addr.department          = addr.department.Trim();
            addr.organisation_name   = addr.organisation_name.Trim();
        }
Ejemplo n.º 6
0
        private async Task ScrapeArtistInfo(FullArtist artist)
        {
            string bioUrl      = $"https://open.spotify.com/artist/{artist.Id}/about";
            string pageContent = "";

            using (HttpClient client = new HttpClient())
            {
                client.DefaultRequestHeaders.Add("User-Agent", "C# console program");
                pageContent = await client.GetStringAsync(bioUrl);
            }

            var artistResults = _artistScraping.Extract(pageContent);
            var cities        = artistResults["listeners-city"];
            var listeners     = artistResults["listeners"];
            var bio           = artistResults["about"];

            // if there's no bio, we can't find locations :(
            if (bio == null)
            {
                return;
            }

            string fullBio     = bio.ToString() + artistResults["more"].ToString();
            int    shortBioEnd = System.Math.Min(fullBio.Length, 255);
            string shortBio    = fullBio.Substring(0, shortBioEnd);

            Dictionary <string, int> listenerCities = new Dictionary <string, int>();

            for (int i = 0; i < listeners.Count(); i++)
            {
                string listenersString = listeners[i].ToString().Replace("LISTENERS", "").Replace(",", "").Trim();
                int    numListeners    = int.Parse(listenersString);
                listenerCities.Add(cities[i].ToString(), numListeners);
            }

            string aboutArtistJson = JsonConvert.SerializeObject(fullBio, Newtonsoft.Json.Formatting.Indented);
            string classifiedXml   = _classifier.classifyWithInlineXML(aboutArtistJson);

            // HACK: fix "city, state" locations that are split into two:
            //       "<LOCATION>Logan</LOCATION>, <LOCATION>Utah</LOCATION>" => "<LOCATION>Logan, Utah</LOCATION>"
            classifiedXml = classifiedXml.Replace("</LOCATION>, <LOCATION>", ", ");
            MatchCollection locationMatches = _locationRx.Matches(classifiedXml);

            Dictionary <string, Graphic> artistLocations = new Dictionary <string, Graphic>();
            //UNCOMMENT
            //// Build artist locations
            //for (int i = 0; i < locationMatches.Count; i++)
            //{
            //    var m = locationMatches[i];
            //    string loc = m.Groups[1].Value;
            //    MapPoint geocodedLocation = await GeocodeArtistPlacename(loc);
            //    if (geocodedLocation == null) { continue; }

            //    // If the place name was geocoded, create a new feature to store it
            //    // (first one is considered the hometown) :\
            //    if (i == 0)
            //    {
            //        Feature newHometownFeature = _hometownTable.CreateFeature();
            //        newHometownFeature.Geometry = geocodedLocation;
            //        newHometownFeature.Attributes["placename"] = loc;
            //        newHometownFeature.Attributes["artistname"] = artist.Name;
            //        newHometownFeature.Attributes["artistid"] = artist.Id;
            //        newHometownFeature.Attributes["imageurl"] = artist.Images.Last().Url;
            //        newHometownFeature.Attributes["bioshort"] = shortBio;

            //        await _hometownTable.AddFeatureAsync(newHometownFeature);
            //    }
            //    else
            //    {
            //        if (!artistLocations.ContainsKey(loc))
            //        {
            //            Feature otherFeature = _otherPointsTable.CreateFeature();
            //            otherFeature.Geometry = geocodedLocation;
            //            otherFeature.Attributes["placename"] = loc;
            //            otherFeature.Attributes["artistname"] = artist.Name;
            //            otherFeature.Attributes["artistid"] = artist.Id;

            //            await _otherPointsTable.AddFeatureAsync(otherFeature);
            //        }
            //    }
            //}

            //// Apply edits to the hometown table (will apply other edits after adding listener cities)
            //await _hometownTable.ApplyEditsAsync();
            //**UNCOMMENT

            // Create points for the listener cities
            int r = 0;

            foreach (var lc in listenerCities)
            {
                r++;
                MapPoint geocodedLocation = await GeocodeArtistPlacename(lc.Key);

                if (geocodedLocation == null)
                {
                    continue;
                }

                Feature otherFeature = _listenerTable.CreateFeature();
                otherFeature.Geometry = geocodedLocation;
                otherFeature.Attributes["placename"]     = lc.Key;
                otherFeature.Attributes["artistname"]    = artist.Name;
                otherFeature.Attributes["artistid"]      = artist.Id;
                otherFeature.Attributes["listenercount"] = lc.Value;
                otherFeature.Attributes["listenerrank"]  = r;

                await _listenerTable.AddFeatureAsync(otherFeature);
            }

            // Apply edits to the other locations table
            await _listenerTable.ApplyEditsAsync();

            ArtistProgressList.Items.Add(artist.Name);
        }
Ejemplo n.º 7
0
 protected string ParseToCrf(CRFClassifier classifier, string input)
 {
     return(classifier.classifyWithInlineXML(input));
 }