Example #1
0
        // For either a file to annotate or for the hardcoded text example,
        // this demo file shows two ways to process the output, for teaching
        // purposes.  For the file, it shows both how to run NER on a String
        // and how to run it on a whole file.  For the hard-coded String,
        // it shows how to run it on a single sentence, and how to do this
        // and produce an inline XML output format.

        static void Main(string[] args)
        {
            if (args.Length > 0)
            {
                var fileContent = File.ReadAllText(args[0]);
                foreach (List sentence in Classifier.classify(fileContent).toArray())
                {
                    foreach (CoreLabel word in sentence.toArray())
                    {
                        Console.Write("{0}/{1} ", word.word(), word.get(new CoreAnnotations.AnswerAnnotation().getClass()));
                    }
                    Console.WriteLine();
                }
            }
            else
            {
                const string S1 = "Good afternoon Rajat Raina, how are you today?";
                const string S2 = "I go to school at Stanford University, which is located in California.";
                Console.WriteLine("{0}\n", Classifier.classifyToString(S1));
                Console.WriteLine("{0}\n", Classifier.classifyWithInlineXML(S2));
                Console.WriteLine("{0}\n", Classifier.classifyToString(S2, "xml", true));

                var classification = Classifier.classify(S2).toArray();

                for (var i = 0; i < classification.Length; i++)
                {
                    Console.WriteLine("{0}\n:{1}\n", i, classification[i]);
                }
            }
        }
Example #2
0
        public string getNER(string S)
        {
            CRFClassifier Classifier = CRFClassifier.getClassifierNoExceptions(@"C:\english.all.3class.distsim.crf.ser.gz");

            //S = "David go to school at Stanford University, which is located in California.";
            string S3 = S.Trim(new Char[] { ',', '.' });
            string S2 = S3.Replace(@",", "");
            //  Console.WriteLine(S2);
            String classify = Classifier.classifyToString(S2);

            string[] words  = classify.Split(' ');
            string   result = "";

            //List<String> iList = new List<String>();ctory

            //List<String> iList = new List<String>();
            foreach (string s in words)
            {
                if (!s.EndsWith("/O"))
                {
                    //System.Console.WriteLine(s);
                    result = result + s + "\n";
                }
            }

            // Keep the console window open in debug mode.

            return(result);
        }
Example #3
0
        public List <string> FindTownsInTxt(List <string> sentences)
        {
            List <string> result = new List <string>();

            foreach (var sentence in sentences)
            {
                string sentenceClean = sentence.Replace(Environment.NewLine, " ");
                sentenceClean = sentenceClean.Replace(",", " ").Replace(";", "").Replace(":", "");
                result.Add(Classifier.classifyToString(Regex.Replace(sentenceClean, @"[^0-9A-Za-z ,]", "")));
            }

            return(result);
        }
Example #4
0
 public string[] Recognize(string txt)
 {
     return(ParseResult(Classifier.classifyToString(txt)));
 }
Example #5
0
        static void DoTagging(CRFClassifier model, String input)
        {
            input = input.Trim();

            Console.WriteLine(input + "=>" + model.classifyToString(input));
        }
Example #6
0
        private void BgWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            //selects the text encoding based on user selection
            Encoding SelectedEncoding = null;

            this.Invoke((MethodInvoker) delegate()
            {
                SelectedEncoding = Encoding.GetEncoding(EncodingDropdown.SelectedItem.ToString());
            });


            //report what we're working on
            FilenameLabel.Invoke((MethodInvoker) delegate
            {
                FilenameLabel.Text = "Loading model... please wait...";
            });

            // Path to the folder with models
            var segmenterData = Path.Combine(Path.GetDirectoryName(AppDomain.CurrentDomain.BaseDirectory), @"data");

            var props = new Properties();

            props.setProperty("sighanCorporaDict", segmenterData);
            props.setProperty("serDictionary", segmenterData + @"\dict-chris6.ser.gz");
            // Lines below are needed because CTBSegDocumentIteratorFactory accesses it
            props.setProperty("inputEncoding", SelectedEncoding.ToString());
            props.setProperty("sighanPostProcessing", "true");

            var segmenter = new CRFClassifier(props);

            segmenter.loadClassifierNoExceptions(segmenterData + @"\ctb.gz", props);


            //get the list of files
            var SearchDepth = SearchOption.TopDirectoryOnly;

            if (ScanSubfolderCheckbox.Checked)
            {
                SearchDepth = SearchOption.AllDirectories;
            }
            var files = Directory.EnumerateFiles(((string[])e.Argument)[0], "*.txt", SearchDepth);



            try {
                string outputdir = Path.Combine(((string[])e.Argument)[1]);

                Directory.CreateDirectory(outputdir);



                foreach (string fileName in files)
                {
                    //set up our variables to report
                    string Filename_Clean = Path.GetFileName(fileName);



                    //report what we're working on
                    FilenameLabel.Invoke((MethodInvoker) delegate
                    {
                        FilenameLabel.Text = "Analyzing: " + Filename_Clean;
                    });



                    //do stuff here
                    string readText = File.ReadAllText(fileName, SelectedEncoding).ToLower();

                    string TokenResults = segmenter.classifyToString(readText);


                    using (System.IO.StreamWriter fileout =
                               new StreamWriter(Path.Combine(outputdir, Filename_Clean), false, SelectedEncoding))
                    {
                        fileout.Write(TokenResults);
                    }
                }
            }
            catch
            {
                MessageBox.Show("ZhToken encountered a problem while trying to tokenize/write a file.");
            }
        }