static void Main(string[] args) { var jarRoot = @"stanford-ner-2016-10-31"; var classifiersDirecrory = jarRoot + @"\classifiers"; var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); var rawFileNames = Directory.GetFiles(@"Texts"); var markedFileNames = Directory.GetFiles(@"MarkedTexts"); for (int i = 0; i < rawFileNames.Length; ++i) { using (var rawReader = new StreamReader(rawFileNames[i])) using (var markedReader = new StreamReader(markedFileNames[i])) { string rawText = rawReader.ReadToEnd(); string rightMarkedText = markedReader.ReadToEnd(); var markedText = classifier.classifyWithInlineXML(rawText); Console.WriteLine($"File Name: {Path.GetFileName(rawFileNames[i])}\n"); Console.WriteLine($"{markedText}\n\n"); Console.WriteLine($"{rightMarkedText}\n"); } } }
public string getNER(string S) { CRFClassifier Classifier = CRFClassifier.getClassifierNoExceptions(@"C:\english.all.3class.distsim.crf.ser.gz"); //S = "David go to school at Stanford University, which is located in California."; string S3 = S.Trim(new Char[] { ',', '.' }); string S2 = S3.Replace(@",", ""); // Console.WriteLine(S2); String classify = Classifier.classifyToString(S2); string[] words = classify.Split(' '); string result = ""; //List<String> iList = new List<String>();ctory //List<String> iList = new List<String>(); foreach (string s in words) { if (!s.EndsWith("/O")) { //System.Console.WriteLine(s); result = result + s + "\n"; } } // Keep the console window open in debug mode. return(result); }
public void ExtractNeFromFile() { var filePath = Files.NER.Classifier("english.all.3class.distsim.crf.ser.gz"); var classifier = CRFClassifier.getClassifierNoExceptions(filePath); var fileContent = System.IO.File.ReadAllText(Files.DataFile("SampleText.txt")); var sentences = classifier.classify(fileContent).toArray(); Assert.NotNull(sentences); var key = new CoreAnnotations.AnswerAnnotation().getClass(); foreach (java.util.List rawSentence in sentences) { var sentence = rawSentence.toArray(); Assert.NotNull(sentence); foreach (CoreLabel word in sentence) { var annotation = word.get(key); Assert.NotNull(annotation); TestContext.Out.WriteLine($"{word.word()}/{annotation}"); } TestContext.Out.WriteLine(); } }
public string Location() { //where the nlp library is stored var source = @"C:\Users\chris\Downloads\stanford-ner-2018-02-27\stanford-ner-2018-02-27\classifiers\english.all.3class.distsim.crf.ser.gz"; //declare a location string that use later of for identify words such as (Nicosia,Limassol) const string location = "LOCATION"; var classifier = CRFClassifier.getClassifierNoExceptions(source); int a = 0; String[] words_array = answer.Split(' '); String output = ""; String[] Array = classifier.classifyToString(answer).Split(' '); while (a < words_array.Length) { output = Array[a].ToString(); if (output.Contains(location)) { Console.WriteLine(Array[a]); return(words_array[a]); } else { return(words_array[a]); } } return(null); }
public static CRFClassifier GetClassifierByLang(string lang) { if (!classifiers.ContainsKey(lang)) { classifiers.Add(lang, CRFClassifier.getClassifierNoExceptions(classifiersDirectory + StanfordEnv.GetNerLanguageFiles(lang))); } return(classifiers[lang]); }
private async Task Load3rdParty() { Loading.Visibility = Visibility.Visible; _ocr = new TesseractEngine("./tessdata", "eng", EngineMode.Default); _classifier = await Task.Run(() => CRFClassifier.getClassifierNoExceptions(@"english.all.3class.distsim.crf.ser.gz")); RunButton.Content = "Run"; RunButton.IsEnabled = true; Loading.Visibility = Visibility.Collapsed; }
/// <summary> /// Initializes a new instance of the <see cref="Preprocessor" /> class. /// </summary> public Preprocessor() { listLatestTokenizedArticle = new List <Token>(); listWhoCandidates = new List <Candidate>(); listWhenCandidates = new List <Candidate>(); listWhereCandidates = new List <Candidate>(); listWhatCandidates = new List <List <Token> >(); listWhyCandidates = new List <List <Token> >(); nerClassifier = CRFClassifier.getClassifierNoExceptions(nerModelPath); posTagger = new MaxentTagger(posModelPath); }
public NER() { try { string root = @"D:\Temp\NER\classifiers"; Classifier = CRFClassifier.getClassifierNoExceptions(root + @"\english.all.3class.distsim.crf.ser.gz"); } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
private void Rectangle_MouseLeftButtonDown_2(object sender, MouseButtonEventArgs e) { var jarRoot = @"D:\stanford-ner-2018-10-16"; var classifiersDirecrory = jarRoot + @"\classifiers"; // Loading 3 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); conn.Open(); cmd = new OleDbCommand("SELECT * From articles", conn); reader = cmd.ExecuteReader(); while (reader.Read()) { var s1 = reader["TNote"].ToString(); var s2 = classifier.classifyWithInlineXML(s1); List <string> words = s2.Split(' ', ',', '<', '>').ToList(); List <string> person = new List <string>(); int count = 0; int count1 = 0; bool isTagPerson = false; foreach (var word in words) { if (word == "/PERSON") { isTagPerson = false; count1 += 1; } if (isTagPerson) { person[count1] = person[count1] + word + " "; } if (word == "PERSON") { isTagPerson = true; person.Add(""); } count += 1; } for (int i = 0; i < count1; i++) { OleDbCommand cmd2 = new OleDbCommand("Insert Into AllNames(PName) Values(@PN)", conn); cmd2.Parameters.AddWithValue("PN", person[i]); cmd2.ExecuteNonQuery(); } } conn.Close(); }
static void Main(string[] args) { var propPath = @"..\..\train.prop"; var modelPath = @"..\..\ner-model.ser.gz"; TrainAndWrite(propPath, modelPath); var crf = CRFClassifier.getClassifierNoExceptions(modelPath); String[] tests = new String[] { "apple watch", "samsung mobile phones", " lcd 52 inch tv" }; foreach (String item in tests) { DoTagging(crf, item); } }
private static string GetNLPResults(string story) { string results; // Path to the folder with classifiers models string baseDirectory = AppDomain.CurrentDomain.BaseDirectory; string classifiersDirectory = baseDirectory + @"..\DirectSupply.Anonymize.Service\Models\NLP"; // Loading 3 class classifier model CRFClassifier classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirectory + @"\english.all.3class.distsim.crf.ser.gz"); results = classifier.classifyWithInlineXML(story); return(results); }
private async Task Init() { // Path to the folder with classifiers models var jarRoot = @"C:\stanford-ner-2018-10-16"; var classifiersDirecrory = jarRoot + @"\classifiers"; // Loading 3 class classifier model _classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); // Define a regular expression for finding the location element _locationRx = new Regex(@"<LOCATION\b[^>]*>(.*?)</LOCATION>", RegexOptions.Compiled | RegexOptions.IgnoreCase); // Define configurations for parsing artist and listener info var configArtistInfoJson = @" { 'artist': '//h1[contains(@class, \'view-header\')]', 'about': '//div[contains(@class, \'bio-primary\')]', 'more': '//div[contains(@class, \'bio-secondary\')]', 'listeners-city': '//span[contains(@class, \'horizontal-list__item__title\')]', 'listeners': '//span[contains(@class, \'horizontal-list__item__subtitle\')]' }"; ConfigSection configArtist = StructuredDataConfig.ParseJsonString(configArtistInfoJson); _artistScraping = new StructuredDataExtractor(configArtist); // Get the hosted feature layers for editing ArcGISPortal portal = await ArcGISPortal.CreateAsync(); PortalItem hometownLayerItem = await PortalItem.CreateAsync(portal, _hometownLayerId); PortalItem otherPointsLayerItem = await PortalItem.CreateAsync(portal, _otherPointsLayerId); PortalItem listenerLayerItem = await PortalItem.CreateAsync(portal, _listenerLayerId); _hometownTable = new ServiceFeatureTable(hometownLayerItem, 0); _otherPointsTable = new ServiceFeatureTable(otherPointsLayerItem, 0); _listenerTable = new ServiceFeatureTable(listenerLayerItem, 0); await _hometownTable.LoadAsync(); await _otherPointsTable.LoadAsync(); await _listenerTable.LoadAsync(); }
public string classifyToString(string sentence, string outputFormat = "slashTags") { // Loading classes classifier model var classifier = CRFClassifier.getClassifierNoExceptions(classifiersDirecrory + model); return(classifier.classifyToString(sentence, outputFormat, true)); //.WriteLine("{0}\n", classifier.classifyToString(sentence, outputFormat, true)); //var classified = classifier.classifyToCharacterOffsets(s1).toArray(); //for (int i = 0; i < classified.Length; i++) //{ // Triple triple = (Triple)classified[i]; // int second = Convert.ToInt32(triple.second().ToString()); // int third = Convert.ToInt32(triple.third().ToString()); // Console.WriteLine(triple.first().ToString() + '\t' + s1.Substring(second, third - second)); //} }
static void Main() { // Path to the folder with classifies models var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-ner-2018-02-27"; var classifiersDirecrory = jarRoot + @"\classifiers"; // Loading 3 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); var s1 = "Good afternoon Rajat Raina, how are you today?"; Console.WriteLine("{0}\n", classifier.classifyToString(s1)); var s2 = "I go to school at Stanford University, which is located in California."; Console.WriteLine("{0}\n", classifier.classifyWithInlineXML(s2)); Console.WriteLine("{0}\n", classifier.classifyToString(s2, "xml", true)); }
static void Main(string[] args) { var classifiersDirecrory = Environment.CurrentDirectory + @"\stanford-ner-2016-10-31\classifiers"; // Loading 7 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions(Path.Combine(classifiersDirecrory, "english.muc.7class.distsim.crf.ser.gz")); //Load the document that needs to be recognized with Named Entities contained in it. var document = File.ReadAllText(Path.Combine(Environment.CurrentDirectory, "demo.txt")); //Use the classifyToString method with the document loaded as the 1st argument and //give "xml" as 2nd argument for output format with preserveSpacing set to true as 3rd argument string xmlContent = classifier.classifyToString(document, "xml", true); //Wrap the reslutant xmlContent with parent and WiCollection tag. //This is just for XML Deserialization that eases to perform LINQ operations var xml = $"<WiCollection><parent>{xmlContent}</parent></WiCollection>"; //POCO to hold DeSerialized XML WiCollection wiCollection = null; //Deserialize XML XmlSerializer serializer = new XmlSerializer(typeof(WiCollection)); using (TextReader reader = new StringReader(xml)) { wiCollection = (WiCollection)serializer.Deserialize(reader); } //Iterate and print standard Entity types var entities = Enum.GetValues(typeof(Entity)).Cast <Entity>(); foreach (var entity in entities) { Console.WriteLine($"-----------{entity}-----------"); var tags = wiCollection.Wi.Where(x => x.Entity.ToLowerInvariant() == entity.ToString().ToLowerInvariant()); Console.WriteLine(string.Join(Environment.NewLine, tags.Select(w => w.Text))); } Console.Read(); }
public void ExtractNeFromPredefinedPhrase() { var filePath = Files.NER.Classifier("english.all.3class.distsim.crf.ser.gz"); var classifier = CRFClassifier.getClassifierNoExceptions(filePath); var s1 = "Good afternoon Rajat Raina, how are you today?"; var s2 = "I go to school at Stanford University, which is located in California."; TestContext.Out.WriteLine(classifier.classifyToString(s1)); TestContext.Out.WriteLine(classifier.classifyWithInlineXML(s2)); TestContext.Out.WriteLine(classifier.classifyToString(s2, "xml", true)); var labels = classifier.classify(s2).toArray(); Assert.NotNull(labels); for (var i = 0; i < labels.Length; i++) { TestContext.Out.WriteLine($"{i}\n:{labels[i]}"); } }
public List <EntityRecognition> ExtractionChannel(string message) { // find people List <EntityRecognition> entity = new List <EntityRecognition>(); // Path to the folder with classifiers models //var jarRoot = @"C:\\Users\\Sai\\Downloads\\stanford-ner-2016-10-31\\stanford-ner-2016-10-31"; //var classifiersDirecrory = jarRoot + @"\classifiers"; string filepath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Content\\classifier\\english.all.3class.distsim.crf.ser.gz"); var classifier = CRFClassifier.getClassifierNoExceptions(filepath); TextInfo textInfo = new CultureInfo("en-US", false).TextInfo; message = textInfo.ToTitleCase(message); string result = classifier.classifyWithInlineXML(message); entity = getEntityList(result); return(entity); }
public string[] GetPlaces(string text) { var classifiersDirectory = Environment.CurrentDirectory + @"\stanford-ner-2016-10-31\classifiers"; var classifier = CRFClassifier.getClassifierNoExceptions(Path.Combine(classifiersDirectory, "english.muc.7class.distsim.crf.ser.gz")); var xmlContent = classifier.classifyWithInlineXML(text); var xmlDoc = new XmlDocument(); xmlDoc.LoadXml($"<xml>{xmlContent}</xml>"); var placesElements = xmlDoc.GetElementsByTagName("LOCATION"); var places = new List <string>(); for (var i = 0; i < placesElements.Count; i++) { if (placesElements[i].InnerXml.Length > 0) { places.Add(placesElements[i].InnerXml); } } return(places.Select(place => place.ToString()).ToArray()); }
public static CRFClassifier GetClassifier() { if (_classifier != null) { return(_classifier); } var modelDir = new DirectoryInfo(Path.Combine(GetExecDirLocation(), "Models")); if (!modelDir.Exists) { throw new Exception(string.Format("The required NER model directory {0} is missing", modelDir.FullName)); } var modelFile = new FileInfo(Path.Combine(modelDir.FullName, "english.muc.7class.distsim.crf.ser.gz")); if (!modelFile.Exists) { throw new Exception(string.Format("The required NER model file {0} is missing", modelFile.Name)); } _classifier = CRFClassifier.getClassifierNoExceptions(modelFile.FullName); return(_classifier); }
static void Main() { // Path to the folder with classifies models var jarRoot = @"E:\SystemsTests\stanford\stanford-ner-2015-04-20"; var classifiersDirecrory = jarRoot + @"\classifiers"; // Loading 3 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); var text = System.IO.File.ReadAllText( @"C:\code\github\LingProductsTests\TestResults\strict\eng\2talk\testmodeleng_etalon.txt"); var parser = new HtmlCorpusParser(); var model = parser.Parse(text, 350); var input = model.ClearedText; var timer = Stopwatch.StartNew(); System.IO.File.WriteAllText("stanford.txt", classifier.classifyWithInlineXML(input)); timer.Stop(); Console.WriteLine("Elapsed {0} ms", timer.ElapsedMilliseconds); Console.ReadLine(); }
public string GetData(string value) { var jarRoot2 = @"C:\Workspace\Bofa\NLP\stanford-ner-2015-04-20"; var classifiersDirecrory = jarRoot2 + @"\classifiers"; // Loading 3 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); XmlDocument doc = new XmlDocument(); XmlDeclaration xmlDeclaration = doc.CreateXmlDeclaration("1.0", "UTF-8", null); //XmlElement root = doc.DocumentElement; //doc.InsertBefore(xmlDeclaration, root); doc.LoadXml(string.Format("<root>{0}</root>", classifier.classifyToString(value, "xml", true))); // doc.InnerXml = ; XmlNodeList orgs = doc.SelectNodes("root/wi[@entity='ORGANIZATION']"); XmlNodeList ppl = doc.SelectNodes("root/wi[@entity='PERSON']"); XmlNodeList locs = doc.SelectNodes("root/wi[@entity='LOCATION']"); StringBuilder sd = new StringBuilder(); foreach (XmlNode xmlN in orgs) { sd.Append(xmlN.InnerText + ", "); } foreach (XmlNode xmlN in ppl) { sd.Append(xmlN.InnerText + ", "); } foreach (XmlNode xmlN in locs) { sd.Append(xmlN.InnerText + ", "); } return(sd.ToString()); }
static void setupPipeline() { DateTimeOffset started = DateTimeOffset.UtcNow; var classifiersDirectory = @"C:/temp/stanford-english-corenlp/edu/stanford/nlp/models/ner"; // Loading 3 class classifier model classifier = CRFClassifier.getClassifierNoExceptions(classifiersDirectory + @"\english.all.3class.distsim.crf.ser.gz"); // Path to the folder with models extracted from `stanford-corenlp-3.8.0-models.jar` var jarRoot = @"c:/temp/stanford-english-corenlp/"; // Annotation pipeline configuration var props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, parse, sentiment"); props.setProperty("ner.useSUTime", "0"); var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); Console.WriteLine("finished up : " + (DateTimeOffset.UtcNow - started).TotalSeconds + " seconds"); }
private CRFClassifier GetNerClassifier() { return(CRFClassifier.getClassifierNoExceptions(nlpConfig.JarRoot + nlpConfig.ClassifiersDirectory + nlpConfig.Ner3ClassesModel)); }
public void ProcessInputData() { Console.WriteLine("Processing input file..."); dbPediaAccess dbPediaChecker = new dbPediaAccess(); var jarRoot = @"C:\Users\micha\source\repos\ConsoleApp2\ConsoleApp2\data\paket-files\nlp.stanford.edu\stanford-ner-2016-10-31"; var classifiersDirecrory = jarRoot + @"\classifiers"; var classifier = CRFClassifier.getClassifierNoExceptions(classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); IGraph g = new Graph(); new TurtleParser().Load(g, inputPath); var triples = g.Triples.Where(q => q.Predicate.ToString().ToLower().Contains("isString".ToLower())).GroupBy(q => q.Object).Select(q => q.First()); //https://csharp.hotexamples.com/examples/VDS.RDF.Parsing/TurtleParser/-/php-turtleparser-class-examples.html\ //linq microsoft, regex var dataSet = new List <data.Input>(); foreach (var t in triples) { var match = new Regex("char=([0-9]+),([0-9]+)").Match(t.Subject.ToString()); if (!match.Success) { continue; } int startIndex = 0; int stopIndex = int.Parse(match.Groups[2].Value); var newData = new data.Input(t.Object.ToString().Substring(startIndex, stopIndex)); dataSet.Add(newData); } //http://sergey-tihon.github.io/Stanford.NLP.NET/StanfordNER.html Console.WriteLine(); string classifierOutput = ""; for (int cnt = 0; cnt < dataSet.Count(); cnt++) { classifierOutput = classifier.classifyWithInlineXML(dataSet[cnt].sentence); // Console.WriteLine("{0}\n", classifierOutput); Console.WriteLine(dataSet[cnt].sentence + "\n"); EntityExtractor(classifierOutput, "LOCATION"); EntityExtractor(classifierOutput, "ORGANIZATION"); EntityExtractor(classifierOutput, "PERSON"); if (locationEntityL.Count > 0) { locationEntityL.Distinct(); Console.WriteLine("Locations:"); for (int i = 0; i < locationEntityL.Count; i++) { Console.WriteLine(locationEntityL[i]); dbPediaChecker.checkdBPedia("Place", locationEntityL[i]); } Console.WriteLine(); locationEntityL.Clear(); } if (organizationEntityL.Count > 0) { organizationEntityL.Distinct(); Console.WriteLine("Organisations:"); for (int i = 0; i < organizationEntityL.Count; i++) { Console.WriteLine(organizationEntityL[i]); dbPediaChecker.checkdBPedia("Organisation", organizationEntityL[i]); } Console.WriteLine(); organizationEntityL.Clear(); } if (personEntityL.Count > 0) { personEntityL.Distinct(); Console.WriteLine("Persons:"); for (int i = 0; i < personEntityL.Count; i++) { Console.WriteLine(personEntityL[i]); dbPediaChecker.checkdBPedia("Person", personEntityL[i]); } Console.WriteLine(); personEntityL.Clear(); } } // Console.WriteLine("{0}\n", classifier.classifyToString(dataSet.First().sentence)); // ( //var s1 = "Good afternoon Rajat Raina, how are you today?"; //Console.WriteLine("{0}\n", classifier.classifyToString(s1)); //var s2 = "I go to school at Stanford University, which is located in California."; //Console.WriteLine("{0}\n", classifier.classifyWithInlineXML(s2)); //var s3 = "Michael Jackson and Donald Trump never met in New York."; //Console.WriteLine("{0}\n", classifier.classifyWithInlineXML(s3)); //Console.WriteLine("{0}\n", classifier.classifyToString(s2, "xml", true)); Console.ReadKey(); }
/// <summary> /// Output name entity set of each tweet cluster /// Output: clusterNameEntitySet.txt /// </summary> /// <param name="fileName">Lucene index folder path of tweets</param> public static void nameEntitySet(string fileName) { var indexReader = LuceneOperations.GetIndexReader(fileName); StreamReader sr = new StreamReader("signalCluster.txt", Encoding.Default); StreamReader sr1 = new StreamReader("generalCluster.txt", Encoding.Default); FileStream fs = new FileStream("clusterNameEntitySet.txt", FileMode.Create); StreamWriter sw = new StreamWriter(fs, Encoding.Default); // Path to the folder with classifiers models var jarRoot = @"..\..\..\..\stanford-ner-2015-12-09"; var classifiersDirecrory = jarRoot + @"\classifiers"; // Loading 3 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); string line; string line1; while ((line = sr.ReadLine()) != null && (line1 = sr1.ReadLine()) != null) { line = sr.ReadLine(); line1 = sr1.ReadLine(); sr.ReadLine(); sr1.ReadLine(); string[] iDocStrArray = Regex.Split(line, " "); List <int> iDocList = new List <int>(); for (int i = 0; i < iDocStrArray.Length - 1; i++) { iDocList.Add(int.Parse(iDocStrArray[i])); } string[] iDocStrArray1 = Regex.Split(line1, " "); List <int> iDocList1 = new List <int>(); for (int i = 0; i < iDocStrArray1.Length - 1; i++) { iDocList1.Add(int.Parse(iDocStrArray1[i])); } HashSet <string> nameEntitySet = new HashSet <string>(); for (int i = 0; i < iDocList.Count; i++) { Document inDoc = indexReader.Document(iDocList[i]); string text = inDoc.Get("Text"); text = Regex.Replace(text, @"\s+", " "); text = Regex.Replace(text, @"#n#|#N#", ""); text = Regex.Replace(text, @"#", ""); text = Regex.Replace(text, @"@", ""); text = classifier.classifyWithInlineXML(text); MatchCollection mc; mc = Regex.Matches(text, @"<PERSON>[^<>]+</PERSON>"); var it = mc.GetEnumerator(); for (int j = 0; j < mc.Count; j++) { it.MoveNext(); string str = it.Current.ToString(); nameEntitySet.Add(str.Substring(8, str.Length - 17)); } mc = Regex.Matches(text, @"<ORGANIZATION>[^<>]+</ORGANIZATION>"); it = mc.GetEnumerator(); for (int j = 0; j < mc.Count; j++) { it.MoveNext(); string str = it.Current.ToString(); nameEntitySet.Add(str.Substring(14, str.Length - 29)); } mc = Regex.Matches(text, @"<LOCATION>[^<>]+</LOCATION>"); it = mc.GetEnumerator(); for (int j = 0; j < mc.Count; j++) { it.MoveNext(); string str = it.Current.ToString(); nameEntitySet.Add(str.Substring(10, str.Length - 21)); } } for (int i = 0; i < iDocList1.Count; i++) { Document inDoc = indexReader.Document(iDocList1[i]); string text = inDoc.Get("Text"); text = Regex.Replace(text, @"\s+", " "); text = Regex.Replace(text, @"#n#|#N#", ""); text = Regex.Replace(text, @"#", ""); text = Regex.Replace(text, @"@", ""); text = classifier.classifyWithInlineXML(text); MatchCollection mc; mc = Regex.Matches(text, @"<PERSON>[^<>]+</PERSON>"); var it = mc.GetEnumerator(); for (int j = 0; j < mc.Count; j++) { it.MoveNext(); string str = it.Current.ToString(); nameEntitySet.Add(str.Substring(8, str.Length - 17)); } mc = Regex.Matches(text, @"<ORGANIZATION>[^<>]+</ORGANIZATION>"); it = mc.GetEnumerator(); for (int j = 0; j < mc.Count; j++) { it.MoveNext(); string str = it.Current.ToString(); nameEntitySet.Add(str.Substring(14, str.Length - 29)); } mc = Regex.Matches(text, @"<LOCATION>[^<>]+</LOCATION>"); it = mc.GetEnumerator(); for (int j = 0; j < mc.Count; j++) { it.MoveNext(); string str = it.Current.ToString(); nameEntitySet.Add(str.Substring(10, str.Length - 21)); } } var iter = nameEntitySet.GetEnumerator(); for (int i = 0; i < nameEntitySet.Count; i++) { iter.MoveNext(); sw.Write(iter.Current.ToString() + "; "); } sw.WriteLine(); } sw.Close(); fs.Close(); sr1.Close(); sr.Close(); }
public void useModel(string inputPath, string partial_address, ref NERAddress addr) { CRFClassifier model = CRFClassifier.getClassifierNoExceptions(inputPath); //string tagged_address = model.classifyToString(partial_address); string tagged_address = model.classifyWithInlineXML(partial_address); tagged_address = tagged_address.Replace("<0>", "<ZERO>"); tagged_address = tagged_address.Replace("</0>", "</ZERO>"); // parse xml XmlDocument doc = new XmlDocument(); try { doc.LoadXml("<root>" + tagged_address + "</root>"); } catch (XmlException e) { Console.WriteLine("Exception occurred while parsing xml: " + e.Message); return; } Console.WriteLine("Model output: " + tagged_address); string numbers = ""; foreach (XmlNode node in doc.DocumentElement.ChildNodes) { if (Regex.IsMatch(node.Name, "^[IOB]-LOCALITY")) { addr.locality += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-SECONDARY_LOCALITY")) { addr.secondary_locality += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-THOROFARE")) { addr.thorofare += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_GROUP_NAME")) { addr.building_group_name += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_NAME")) { addr.building_name += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-SUB_BUILDING_NAME")) { addr.sub_building_name += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-BUILDING_NUMBER")) { addr.building_number += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-DEPARTMENT")) { addr.department += node.InnerText + " "; } if (Regex.IsMatch(node.Name, "^[IOB]-ORGANISATION_NAME")) { addr.organisation_name += node.InnerText + " "; } if (node.Name == "NUMBER") { numbers += node.InnerText + " "; } } addr.numbers = numbers.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); addr.locality = addr.locality.Trim(); addr.secondary_locality = addr.secondary_locality.Trim(); addr.thorofare = addr.thorofare.Trim(); addr.building_group_name = addr.building_group_name.Trim(); addr.building_name = addr.building_name.Trim(); addr.sub_building_name = addr.sub_building_name.Trim(); addr.building_number = addr.building_number.Trim(); addr.department = addr.department.Trim(); addr.organisation_name = addr.organisation_name.Trim(); }
public List <Book> getStreamFromTxtFile(String folderPath) { String fileContent = ""; List <string> apts = new List <string>(); DirectoryInfo d = new DirectoryInfo(folderPath); HashSet <City> cities = new HashSet <City>(); List <Book> books = new List <Book>(); var client = new MongoClient(new MongoUrl("mongodb://*****:*****@"\classifiers"; // Loading 3 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz"); string worldcities = System.IO.File.ReadAllText("/worldcities.csv"); foreach (var file in Files) { string content = System.IO.File.ReadAllText(folderPath + "//" + file); var locations = classifier.classifyToString(content).Split('.', ' ', ',', '-', ':').Where(x => x.Contains("/LOCATION")); foreach (var item in locations) { if (item.Contains("/LOCATION") && !item.Contains("/O")) { var newItem = item.Replace("/LOCATION", ""); var splittedArray = worldcities.Split('\n').Where(x => x.Contains(newItem.Trim())).ToArray(); if (splittedArray.Length > 0) { foreach (var row in splittedArray) { var currentRow = row.Split(';').ToArray(); if (currentRow.Length > 2) { if (currentRow[2].Trim() == newItem.Trim()) { cities.Add(new City(newItem, currentRow[3], currentRow[4])); break; } } } } } } var vs = content.Split('\n').Where(x => x.Contains("Title:") || x.Contains("Author:")).ToArray(); if (2 == vs.Length) { collection.InsertOne(new Book(vs[0], vs[1], cities.ToList())); } else if (1 == vs.Length) { collection.InsertOne(new Book(vs[0], "", cities.ToList())); } cities.Clear(); Console.WriteLine(i++); // Console.ReadLine(); } return(books); }
static void Main() { // Path to the folder with classifiers models var jarRoot = @"\Users\devir\OneDrive\Documents\Visual Studio 2015\Projects\ner"; var classifiersDirecrory = jarRoot + @"\classifiers"; // Loading 3 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.muc.7class.distsim.crf.ser.gz"); var s1 = " She got up this morning at 9:00 am and went to a shop to spend five dollars to buy a 50% off toothbrush."; var s2 = "Tell the latest on olympics from the New York."; Console.WriteLine("{0}\n", classifier.classifyToCharacterOffsets(s1)); Console.WriteLine("{0}\n", classifier.classifyWithInlineXML(s1)); //MUNCULIN NER SATU SATU string result = classifier.classifyWithInlineXML(s1); String substr1 = "TIME"; String substr2 = "LOCATION"; String substr3 = "PERSON"; String substr4 = "ORGANIZATION"; String substr5 = "MONEY"; String substr6 = "Percent"; String substr7 = "Date"; string total1, total2, total3, total4, total5, total6, total7; //if (result.Contains(substr1)) //{ // string[] hasiltime = GetStringInBetween("<TIME>", "</TIME>", result, false, false); // string output_time = hasiltime[0]; // string next_time = hasiltime[1]; // total1 = output_time; // // Console.WriteLine(output_time); //} //if (result.Contains(substr2)) //{ // string[] hasillocation = GetStringInBetween("<LOCATION>", "</LOCATION>", result, false, false); // string output_location = hasillocation[0]; // string next_loc = hasillocation[1]; // //Console.WriteLine(output_location); // total2 = output_location; //} //if (result.Contains(substr3)) //{ // string[] hasilperson = GetStringInBetween("<PERSON>", "</PERSON>", result, false, false); // string output_person = hasilperson[0]; // string next_person = hasilperson[1]; // //Console.WriteLine(hasilperson); // total3 = output_person; //} //if (result.Contains(substr4)) //{ // string[] hasilORGANIZATION = GetStringInBetween("<ORGANIZATION>", "</ORGANIZATION>", result, false, false); // string output_ORGANIZATION = hasilORGANIZATION[0]; // string next_ORGANIZATION = hasilORGANIZATION[1]; // //Console.WriteLine(output_ORGANIZATION); // total4 = output_ORGANIZATION; //} //if (result.Contains(substr5)) //{ // string[] hasilMONEY = GetStringInBetween("<MONEY>", "</MONEY>", result, false, false); // string output_MONEY = hasilMONEY[0]; // string next_MONEY = hasilMONEY[1]; // // Console.WriteLine(output_MONEY); // total5 = output_MONEY; //} //if (result.Contains(substr6)) //{ // string[] hasilPercent = GetStringInBetween("<Percent>", "</Percent>", result, false, false); // string output_Percent = hasilPercent[0]; // string next_Percent = hasilPercent[1]; // //Console.WriteLine(output_Percent); // total6 = output_Percent; //} //if (result.Contains(substr7)) //{ // string[] hasilDate = GetStringInBetween("<Date>", "</Date>", result, false, false); // string output_Date = hasilDate[0]; // string next_Date = hasilDate[1]; // //Console.WriteLine(output_Date); // total7 = output_Date; //} string[] hasiltime = GetStringInBetween("<TIME>", "</TIME>", result, false, false); string output_time = hasiltime[0]; string next_time = hasiltime[1]; total1 = output_time; //Console.WriteLine(output_time); string[] hasillocation = GetStringInBetween("<LOCATION>", "</LOCATION>", result, false, false); string output_location = hasillocation[0]; string next_loc = hasillocation[1]; //Console.WriteLine(output_location); total2 = output_location; string[] hasilperson = GetStringInBetween("<PERSON>", "</PERSON>", result, false, false); string output_person = hasilperson[0]; string next_person = hasilperson[1]; //Console.WriteLine(hasilperson); total3 = output_person; string[] hasilORGANIZATION = GetStringInBetween("<ORGANIZATION>", "</ORGANIZATION>", result, false, false); string output_ORGANIZATION = hasilORGANIZATION[0]; string next_ORGANIZATION = hasilORGANIZATION[1]; //Console.WriteLine(output_ORGANIZATION); total4 = output_ORGANIZATION; string[] hasilMONEY = GetStringInBetween("<MONEY>", "</MONEY>", result, false, false); string output_MONEY = hasilMONEY[0]; string next_MONEY = hasilMONEY[1]; // Console.WriteLine(output_MONEY); total5 = output_MONEY; string[] hasilPercent = GetStringInBetween("<Percent>", "</Percent>", result, false, false); string output_Percent = hasilPercent[0]; string next_Percent = hasilPercent[1]; //Console.WriteLine(output_Percent); total6 = output_Percent; string[] hasilDate = GetStringInBetween("<Date>", "</Date>", result, false, false); string output_Date = hasilDate[0]; string next_Date = hasilDate[1]; //Console.WriteLine(output_Date); total7 = output_Date; //BOW string semua = total1 + ";" + total2 + ";" + total3 + ";" + total4 + ";" + total5 + ";" + total6 + ";" + total7 + ";"; Console.WriteLine(semua); string[] gabungan = { total1, total2, total3, total4, total5, total6, total7 }; foreach (var a in gabungan) { Console.WriteLine(a); } string[][] words = gabungan.Tokenize(); //var codebook = new TFIDF() //{ // Tf = TermFrequency.Log, // Idf = InverseDocumentFrequency.Default //}; var codebook = new BagOfWords() { MaximumOccurance = 1 // the resulting vector will have only 0's and 1's }; codebook.Learn(words); double[] bow1 = codebook.Transform(words[0]); double[] bow2 = codebook.Transform(words[1]); double[] bow3 = codebook.Transform(words[2]); double[] bow4 = codebook.Transform(words[3]); double[] bow5 = codebook.Transform(words[4]); double[] bow6 = codebook.Transform(words[5]); double[] bow7 = codebook.Transform(words[6]); double[][] keseluruhanBOW1 = { bow1, bow2, bow3, bow4, bow5, bow6, bow7 }; //coba bool quitNow = false; while (!quitNow) { string val; Console.Write("Enter question: "); val = Console.ReadLine(); string[] textss = { val, }; string[][] wordss = textss.Tokenize(); //var codebook2 = new TFIDF() //{ // Tf = TermFrequency.Log, // Idf = InverseDocumentFrequency.Default //}; var codebook2 = new BagOfWords() { MaximumOccurance = 1 // the resulting vector will have only 0's and 1's }; codebook2.Learn(wordss); double[] c1 = codebook2.Transform(wordss[0]); string path = @"C:\Users\devir\OneDrive\Documents\Visual Studio 2015\Projects\ner"; //var load_svm_model = Serializer.Load<MulticlassClassifierBase>(Path.Combine(path, "pelatihanSVMbayardanpergi.bin")); //LibSvmModel modela = LibSvmModel.Load(Path.Combine(path, "pelatihanSVMbayardanpergi.bint")); //int jawaban = load_svm_model.Decide( c1); // answer will be 2. // Now, we can use the model class to create the equivalent Accord.NET SVM: //Console.WriteLine(jawaban); LibSvmModel model = LibSvmModel.Load(Path.Combine(path, "pelatihanSVMbayardanpergi.txt")); // Now, we can use the model class to create the equivalent Accord.NET SVM: SupportVectorMachine svm = model.CreateMachine(); // Compute classification error bool predicted = svm.Decide(c1); // var machine = teacher.Learn(inputs, outputs); if (predicted == false) { Console.WriteLine("BAYAR"); } ; if (predicted == true) { Console.WriteLine("PERGI"); } ; Console.ReadLine(); } // In order to convert any 2d array to jagged one // let's use a generic implementation }
public static Dictionary <string, string> ner(List <string> content) { Dictionary <string, string> EntityDict = new Dictionary <string, string>(); // Path to the folder with classifiers models var jarRoot = @"C:\Users\Gideon\Documents\stanford-ner-2017-06-09"; var classifiersDirecrory = jarRoot + @"\classifiers"; // Loading 7 class classifier model var classifier = CRFClassifier.getClassifierNoExceptions( classifiersDirecrory + @"\english.muc.7class.distsim.crf.ser.gz"); // Applying ner tagging and saving to xml string string ner = ""; foreach (var item in content) { ner += classifier.classifyWithInlineXML(item); } // Adding root ner = "<root>" + ner + "</root>"; // Converting to Xml document System.Xml.XmlDocument xmlDoc = new System.Xml.XmlDocument(); xmlDoc.LoadXml(ner); // Iterating inside Xml Document foreach (XmlNode item in xmlDoc.ChildNodes[0].ChildNodes) { var entity = item.Name; if (entity == "ORGANIZATION") { if (!EntityDict.ContainsKey(item.InnerText)) { EntityDict.Add(item.InnerText, entity); } } if (entity == "LOCATION") { if (!EntityDict.ContainsKey(item.InnerText)) { EntityDict.Add(item.InnerText, entity); } } if (entity == "PERSON") { if (!EntityDict.ContainsKey(item.InnerText)) { EntityDict.Add(item.InnerText, entity); } } if (entity == "MONEY") { if (!EntityDict.ContainsKey(item.InnerText)) { EntityDict.Add(item.InnerText, entity); } } if (entity == "DATE") { if (!EntityDict.ContainsKey(item.InnerText)) { EntityDict.Add(item.InnerText, entity); } } if (entity == "PERCENT") { if (!EntityDict.ContainsKey(item.InnerText)) { EntityDict.Add(item.InnerText, entity); } } if (entity == "TIME") { if (!EntityDict.ContainsKey(item.InnerText)) { EntityDict.Add(item.InnerText, entity); } } } return(EntityDict); }