// For either a file to annotate or for the hardcoded text example, // this demo file shows two ways to process the output, for teaching // purposes. For the file, it shows both how to run NER on a String // and how to run it on a whole file. For the hard-coded String, // it shows how to run it on a single sentence, and how to do this // and produce an inline XML output format. static void Main(string[] args) { if (args.Length > 0) { var fileContent = File.ReadAllText(args[0]); foreach (List sentence in Classifier.classify(fileContent).toArray()) { foreach (CoreLabel word in sentence.toArray()) { Console.Write("{0}/{1} ", word.word(), word.get(new CoreAnnotations.AnswerAnnotation().getClass())); } Console.WriteLine(); } } else { const string S1 = "Good afternoon Rajat Raina, how are you today?"; const string S2 = "I go to school at Stanford University, which is located in California."; Console.WriteLine("{0}\n", Classifier.classifyToString(S1)); Console.WriteLine("{0}\n", Classifier.classifyWithInlineXML(S2)); Console.WriteLine("{0}\n", Classifier.classifyToString(S2, "xml", true)); var classification = Classifier.classify(S2).toArray(); for (var i = 0; i < classification.Length; i++) { Console.WriteLine("{0}\n:{1}\n", i, classification[i]); } } }
public string getNER(string S) { CRFClassifier Classifier = CRFClassifier.getClassifierNoExceptions(@"C:\english.all.3class.distsim.crf.ser.gz"); //S = "David go to school at Stanford University, which is located in California."; string S3 = S.Trim(new Char[] { ',', '.' }); string S2 = S3.Replace(@",", ""); // Console.WriteLine(S2); String classify = Classifier.classifyToString(S2); string[] words = classify.Split(' '); string result = ""; //List<String> iList = new List<String>();ctory //List<String> iList = new List<String>(); foreach (string s in words) { if (!s.EndsWith("/O")) { //System.Console.WriteLine(s); result = result + s + "\n"; } } // Keep the console window open in debug mode. return(result); }
public List <string> FindTownsInTxt(List <string> sentences) { List <string> result = new List <string>(); foreach (var sentence in sentences) { string sentenceClean = sentence.Replace(Environment.NewLine, " "); sentenceClean = sentenceClean.Replace(",", " ").Replace(";", "").Replace(":", ""); result.Add(Classifier.classifyToString(Regex.Replace(sentenceClean, @"[^0-9A-Za-z ,]", ""))); } return(result); }
public string[] Recognize(string txt) { return(ParseResult(Classifier.classifyToString(txt))); }
static void DoTagging(CRFClassifier model, String input) { input = input.Trim(); Console.WriteLine(input + "=>" + model.classifyToString(input)); }
private void BgWorker_DoWork(object sender, DoWorkEventArgs e) { //selects the text encoding based on user selection Encoding SelectedEncoding = null; this.Invoke((MethodInvoker) delegate() { SelectedEncoding = Encoding.GetEncoding(EncodingDropdown.SelectedItem.ToString()); }); //report what we're working on FilenameLabel.Invoke((MethodInvoker) delegate { FilenameLabel.Text = "Loading model... please wait..."; }); // Path to the folder with models var segmenterData = Path.Combine(Path.GetDirectoryName(AppDomain.CurrentDomain.BaseDirectory), @"data"); var props = new Properties(); props.setProperty("sighanCorporaDict", segmenterData); props.setProperty("serDictionary", segmenterData + @"\dict-chris6.ser.gz"); // Lines below are needed because CTBSegDocumentIteratorFactory accesses it props.setProperty("inputEncoding", SelectedEncoding.ToString()); props.setProperty("sighanPostProcessing", "true"); var segmenter = new CRFClassifier(props); segmenter.loadClassifierNoExceptions(segmenterData + @"\ctb.gz", props); //get the list of files var SearchDepth = SearchOption.TopDirectoryOnly; if (ScanSubfolderCheckbox.Checked) { SearchDepth = SearchOption.AllDirectories; } var files = Directory.EnumerateFiles(((string[])e.Argument)[0], "*.txt", SearchDepth); try { string outputdir = Path.Combine(((string[])e.Argument)[1]); Directory.CreateDirectory(outputdir); foreach (string fileName in files) { //set up our variables to report string Filename_Clean = Path.GetFileName(fileName); //report what we're working on FilenameLabel.Invoke((MethodInvoker) delegate { FilenameLabel.Text = "Analyzing: " + Filename_Clean; }); //do stuff here string readText = File.ReadAllText(fileName, SelectedEncoding).ToLower(); string TokenResults = segmenter.classifyToString(readText); using (System.IO.StreamWriter fileout = new StreamWriter(Path.Combine(outputdir, Filename_Clean), false, SelectedEncoding)) { fileout.Write(TokenResults); } } } catch { MessageBox.Show("ZhToken encountered a problem while trying to tokenize/write a file."); } }