Ejemplo n.º 1
0
 // these methods are not implemented for a rule-based sequence classifier
 public override void Train(ICollection <IList <CoreLabel> > docs, IDocumentReaderAndWriter <CoreLabel> readerAndWriter)
 {
 }
 public override void Train(ICollection <IList <IN> > docs, IDocumentReaderAndWriter <IN> readerAndWriter)
 {
     throw new NotSupportedException();
 }
        // run a particular CRF of this ClassifierCombiner on a testFile
        // user can say -crfToExamine 0 to get 1st element or -crfToExamine /edu/stanford/models/muc7.crf.ser.gz
        // this does not currently support drill down on CMM's
        /// <exception cref="System.Exception"/>
        public static void ExamineCRF(Edu.Stanford.Nlp.IE.ClassifierCombiner cc, string crfNameOrIndex, SeqClassifierFlags flags, string testFile, string testFiles, IDocumentReaderAndWriter <CoreLabel> readerAndWriter)
        {
            CRFClassifier <CoreLabel> crf;
            // potential index into baseClassifiers
            int ci;

            // set ci with the following rules
            // 1. first see if ci is an index into baseClassifiers
            // 2. if its not an integer or wrong size, see if its a file name of a loadPath
            try
            {
                ci = System.Convert.ToInt32(crfNameOrIndex);
                if (ci < 0 || ci >= cc.baseClassifiers.Count)
                {
                    // ci is not an int corresponding to an element in baseClassifiers, see if name of a crf loadPath
                    ci = cc.initLoadPaths.IndexOf(crfNameOrIndex);
                }
            }
            catch (NumberFormatException)
            {
                // cannot interpret crfNameOrIndex as an integer, see if name of a crf loadPath
                ci = cc.initLoadPaths.IndexOf(crfNameOrIndex);
            }
            // if ci corresponds to an index in baseClassifiers, get the crf at that index, otherwise set crf to null
            if (ci >= 0 && ci < cc.baseClassifiers.Count)
            {
                // TODO: this will break if baseClassifiers contains something that is not a CRF
                crf = (CRFClassifier <CoreLabel>)cc.baseClassifiers[ci];
            }
            else
            {
                crf = null;
            }
            // if you can get a specific crf, generate the appropriate report, if null do nothing
            if (crf != null)
            {
                // if there is a crf and testFile was set , do the crf stuff for a single testFile
                if (testFile != null)
                {
                    if (flags.searchGraphPrefix != null)
                    {
                        crf.ClassifyAndWriteViterbiSearchGraph(testFile, flags.searchGraphPrefix, crf.MakeReaderAndWriter());
                    }
                    else
                    {
                        if (flags.printFirstOrderProbs)
                        {
                            crf.PrintFirstOrderProbs(testFile, readerAndWriter);
                        }
                        else
                        {
                            if (flags.printFactorTable)
                            {
                                crf.PrintFactorTable(testFile, readerAndWriter);
                            }
                            else
                            {
                                if (flags.printProbs)
                                {
                                    crf.PrintProbs(testFile, readerAndWriter);
                                }
                                else
                                {
                                    if (flags.useKBest)
                                    {
                                        // TO DO: handle if user doesn't provide kBest
                                        int k = flags.kBest;
                                        crf.ClassifyAndWriteAnswersKBest(testFile, k, readerAndWriter);
                                    }
                                    else
                                    {
                                        if (flags.printLabelValue)
                                        {
                                            crf.PrintLabelInformation(testFile, readerAndWriter);
                                        }
                                        else
                                        {
                                            // no crf test flag provided
                                            log.Info("Warning: no crf test flag was provided, running classify and write answers");
                                            crf.ClassifyAndWriteAnswers(testFile, readerAndWriter, true);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                else
                {
                    if (testFiles != null)
                    {
                        // if there is a crf and testFiles was set , do the crf stuff for testFiles
                        // if testFile was set as well, testFile overrides
                        IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList());
                        if (flags.printProbs)
                        {
                            // there is a crf and printProbs
                            crf.PrintProbs(files, crf.DefaultReaderAndWriter());
                        }
                        else
                        {
                            log.Info("Warning: no crf test flag was provided, running classify files and write answers");
                            crf.ClassifyFilesAndWriteAnswers(files, crf.DefaultReaderAndWriter(), true);
                        }
                    }
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>The main method.</summary>
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            StringUtils.LogInvocationString(log, args);
            Properties         props = StringUtils.ArgsToProperties(args);
            SeqClassifierFlags flags = new SeqClassifierFlags(props, false);
            // false for print probs as printed in next code block
            string loadPath = props.GetProperty("loadClassifier");
            NERClassifierCombiner ncc;

            if (loadPath != null)
            {
                // note that when loading a serialized classifier, the philosophy is override
                // any settings in props with those given in the commandline
                // so if you dumped it with useSUTime = false, and you say -useSUTime at
                // the commandline, the commandline takes precedence
                ncc = ((NERClassifierCombiner)GetClassifier(loadPath, props));
            }
            else
            {
                // pass null for passDownProperties to let all props go through
                ncc = CreateNERClassifierCombiner("ner", null, props);
            }
            // write the NERClassifierCombiner to the given path on disk
            string serializeTo = props.GetProperty("serializeTo");

            if (serializeTo != null)
            {
                ncc.SerializeClassifier(serializeTo);
            }
            string textFile = props.GetProperty("textFile");

            if (textFile != null)
            {
                ncc.ClassifyAndWriteAnswers(textFile);
            }
            // run on multiple textFiles , based off CRFClassifier code
            string textFiles = props.GetProperty("textFiles");

            if (textFiles != null)
            {
                IList <File> files = new List <File>();
                foreach (string filename in textFiles.Split(","))
                {
                    files.Add(new File(filename));
                }
                ncc.ClassifyFilesAndWriteAnswers(files);
            }
            // options for run the NERClassifierCombiner on a testFile or testFiles
            string testFile     = props.GetProperty("testFile");
            string testFiles    = props.GetProperty("testFiles");
            string crfToExamine = props.GetProperty("crfToExamine");
            IDocumentReaderAndWriter <CoreLabel> readerAndWriter = ncc.DefaultReaderAndWriter();

            if (testFile != null || testFiles != null)
            {
                // check if there is not a crf specific request
                if (crfToExamine == null)
                {
                    // in this case there is no crfToExamine
                    if (testFile != null)
                    {
                        ncc.ClassifyAndWriteAnswers(testFile, readerAndWriter, true);
                    }
                    else
                    {
                        IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList());
                        ncc.ClassifyFilesAndWriteAnswers(files, ncc.DefaultReaderAndWriter(), true);
                    }
                }
                else
                {
                    ClassifierCombiner.ExamineCRF(ncc, crfToExamine, flags, testFile, testFiles, readerAndWriter);
                }
            }
            // option for showing info about the NERClassifierCombiner
            string showNCCInfo = props.GetProperty("showNCCInfo");

            if (showNCCInfo != null)
            {
                ShowNCCInfo(ncc);
            }
            // option for reading in from stdin
            if (flags.readStdin)
            {
                ncc.ClassifyStdin();
            }
        }
 protected internal override ICollection <IList <IN> > LoadAuxiliaryData(ICollection <IList <IN> > docs, IDocumentReaderAndWriter <IN> readerAndWriter)
 {
     if (flags.unsupDropoutFile != null)
     {
         log.Info("Reading unsupervised dropout data from file: " + flags.unsupDropoutFile);
         Timing timer = new Timing();
         timer.Start();
         unsupDocs = new List <IList <IN> >();
         ObjectBank <IList <IN> > unsupObjBank = MakeObjectBankFromFile(flags.unsupDropoutFile, readerAndWriter);
         foreach (IList <IN> doc in unsupObjBank)
         {
             foreach (IN tok in doc)
             {
                 tok.Set(typeof(CoreAnnotations.AnswerAnnotation), flags.backgroundSymbol);
                 tok.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), flags.backgroundSymbol);
             }
             unsupDocs.Add(doc);
         }
         long elapsedMs = timer.Stop();
         log.Info("Time to read: : " + Timing.ToSecondsString(elapsedMs) + " seconds");
     }
     if (unsupDocs != null && flags.doFeatureDiscovery)
     {
         IList <IList <IN> > totalDocs = new List <IList <IN> >();
         Sharpen.Collections.AddAll(totalDocs, docs);
         Sharpen.Collections.AddAll(totalDocs, unsupDocs);
         return(totalDocs);
     }
     else
     {
         return(docs);
     }
 }
        /// <summary>The main method, which is essentially the same as in CRFClassifier.</summary>
        /// <remarks>The main method, which is essentially the same as in CRFClassifier. See the class documentation.</remarks>
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            StringUtils.LogInvocationString(log, args);
            Properties props = StringUtils.ArgsToProperties(args);
            CRFBiasedClassifier <CoreLabel> crf = new CRFBiasedClassifier <CoreLabel>(props);
            string testFile = crf.flags.testFile;
            string loadPath = crf.flags.loadClassifier;

            if (loadPath != null)
            {
                crf.LoadClassifierNoExceptions(loadPath, props);
            }
            else
            {
                if (crf.flags.loadJarClassifier != null)
                {
                    // legacy support of old option
                    crf.LoadClassifierNoExceptions(crf.flags.loadJarClassifier, props);
                }
                else
                {
                    crf.LoadDefaultClassifier();
                }
            }
            if (crf.flags.classBias != null)
            {
                StringTokenizer biases = new StringTokenizer(crf.flags.classBias, ",");
                while (biases.HasMoreTokens())
                {
                    StringTokenizer bias  = new StringTokenizer(biases.NextToken(), ":");
                    string          cname = bias.NextToken();
                    double          w     = double.ParseDouble(bias.NextToken());
                    crf.SetBiasWeight(cname, w);
                    log.Info("Setting bias for class " + cname + " to " + w);
                }
            }
            if (testFile != null)
            {
                IDocumentReaderAndWriter <CoreLabel> readerAndWriter = crf.MakeReaderAndWriter();
                if (crf.flags.printFirstOrderProbs)
                {
                    crf.PrintFirstOrderProbs(testFile, readerAndWriter);
                }
                else
                {
                    if (crf.flags.printProbs)
                    {
                        crf.PrintProbs(testFile, readerAndWriter);
                    }
                    else
                    {
                        if (crf.flags.useKBest)
                        {
                            int k = crf.flags.kBest;
                            crf.ClassifyAndWriteAnswersKBest(testFile, k, readerAndWriter);
                        }
                        else
                        {
                            crf.ClassifyAndWriteAnswers(testFile, readerAndWriter, true);
                        }
                    }
                }
            }
        }
Ejemplo n.º 7
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            string serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";

            if (args.Length > 0)
            {
                serializedClassifier = args[0];
            }
            AbstractSequenceClassifier <CoreLabel> classifier = CRFClassifier.GetClassifier(serializedClassifier);

            /* For either a file to annotate or for the hardcoded text example, this
             * demo file shows several ways to process the input, for teaching purposes.
             */
            if (args.Length > 1)
            {
                /* For the file, it shows (1) how to run NER on a String, (2) how
                 * to get the entities in the String with character offsets, and
                 * (3) how to run NER on a whole file (without loading it into a String).
                 */
                string fileContents             = IOUtils.SlurpFile(args[1]);
                IList <IList <CoreLabel> > @out = classifier.Classify(fileContents);
                foreach (IList <CoreLabel> sentence in @out)
                {
                    foreach (CoreLabel word in sentence)
                    {
                        System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' ');
                    }
                    System.Console.Out.WriteLine();
                }
                System.Console.Out.WriteLine("---");
                @out = classifier.ClassifyFile(args[1]);
                foreach (IList <CoreLabel> sentence_1 in @out)
                {
                    foreach (CoreLabel word in sentence_1)
                    {
                        System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' ');
                    }
                    System.Console.Out.WriteLine();
                }
                System.Console.Out.WriteLine("---");
                IList <Triple <string, int, int> > list = classifier.ClassifyToCharacterOffsets(fileContents);
                foreach (Triple <string, int, int> item in list)
                {
                    System.Console.Out.WriteLine(item.First() + ": " + Sharpen.Runtime.Substring(fileContents, item.Second(), item.Third()));
                }
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("Ten best entity labelings");
                IDocumentReaderAndWriter <CoreLabel> readerAndWriter = classifier.MakePlainTextReaderAndWriter();
                classifier.ClassifyAndWriteAnswersKBest(args[1], 10, readerAndWriter);
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("Per-token marginalized probabilities");
                classifier.PrintProbs(args[1], readerAndWriter);
            }
            else
            {
                // -- This code prints out the first order (token pair) clique probabilities.
                // -- But that output is a bit overwhelming, so we leave it commented out by default.
                // System.out.println("---");
                // System.out.println("First Order Clique Probabilities");
                // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter);

                /* For the hard-coded String, it shows how to run it on a single
                 * sentence, and how to do this and produce several formats, including
                 * slash tags and an inline XML output format. It also shows the full
                 * contents of the {@code CoreLabel}s that are constructed by the
                 * classifier. And it shows getting out the probabilities of different
                 * assignments and an n-best list of classifications with probabilities.
                 */
                string[] example = new string[] { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." };
                foreach (string str in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyToString(str));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_1 in example)
                {
                    // This one puts in spaces and newlines between tokens, so just print not println.
                    System.Console.Out.Write(classifier.ClassifyToString(str_1, "slashTags", false));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_2 in example)
                {
                    // This one is best for dealing with the output as a TSV (tab-separated column) file.
                    // The first column gives entities, the second their classes, and the third the remaining text in a document
                    System.Console.Out.Write(classifier.ClassifyToString(str_2, "tabbedEntities", false));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_3 in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyWithInlineXML(str_3));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_4 in example)
                {
                    System.Console.Out.WriteLine(classifier.ClassifyToString(str_4, "xml", true));
                }
                System.Console.Out.WriteLine("---");
                foreach (string str_5 in example)
                {
                    System.Console.Out.Write(classifier.ClassifyToString(str_5, "tsv", false));
                }
                System.Console.Out.WriteLine("---");
                // This gets out entities with character offsets
                int j = 0;
                foreach (string str_6 in example)
                {
                    j++;
                    IList <Triple <string, int, int> > triples = classifier.ClassifyToCharacterOffsets(str_6);
                    foreach (Triple <string, int, int> trip in triples)
                    {
                        System.Console.Out.Printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.First(), trip.Second(), trip.third, j);
                    }
                }
                System.Console.Out.WriteLine("---");
                // This prints out all the details of what is stored for each token
                int i = 0;
                foreach (string str_7 in example)
                {
                    foreach (IList <CoreLabel> lcl in classifier.Classify(str_7))
                    {
                        foreach (CoreLabel cl in lcl)
                        {
                            System.Console.Out.Write(i++ + ": ");
                            System.Console.Out.WriteLine(cl.ToShorterString());
                        }
                    }
                }
                System.Console.Out.WriteLine("---");
            }
        }
 /// <summary>
 /// <inheritDoc/>
 ///
 /// </summary>
 public override void Train(ICollection <IList <In> > objectBankWrapper, IDocumentReaderAndWriter <In> readerAndWriter)
 {
 }