// these methods are not implemented for a rule-based sequence classifier public override void Train(ICollection <IList <CoreLabel> > docs, IDocumentReaderAndWriter <CoreLabel> readerAndWriter) { }
public override void Train(ICollection <IList <IN> > docs, IDocumentReaderAndWriter <IN> readerAndWriter) { throw new NotSupportedException(); }
// run a particular CRF of this ClassifierCombiner on a testFile // user can say -crfToExamine 0 to get 1st element or -crfToExamine /edu/stanford/models/muc7.crf.ser.gz // this does not currently support drill down on CMM's /// <exception cref="System.Exception"/> public static void ExamineCRF(Edu.Stanford.Nlp.IE.ClassifierCombiner cc, string crfNameOrIndex, SeqClassifierFlags flags, string testFile, string testFiles, IDocumentReaderAndWriter <CoreLabel> readerAndWriter) { CRFClassifier <CoreLabel> crf; // potential index into baseClassifiers int ci; // set ci with the following rules // 1. first see if ci is an index into baseClassifiers // 2. if its not an integer or wrong size, see if its a file name of a loadPath try { ci = System.Convert.ToInt32(crfNameOrIndex); if (ci < 0 || ci >= cc.baseClassifiers.Count) { // ci is not an int corresponding to an element in baseClassifiers, see if name of a crf loadPath ci = cc.initLoadPaths.IndexOf(crfNameOrIndex); } } catch (NumberFormatException) { // cannot interpret crfNameOrIndex as an integer, see if name of a crf loadPath ci = cc.initLoadPaths.IndexOf(crfNameOrIndex); } // if ci corresponds to an index in baseClassifiers, get the crf at that index, otherwise set crf to null if (ci >= 0 && ci < cc.baseClassifiers.Count) { // TODO: this will break if baseClassifiers contains something that is not a CRF crf = (CRFClassifier <CoreLabel>)cc.baseClassifiers[ci]; } else { crf = null; } // if you can get a specific crf, generate the appropriate report, if null do nothing if (crf != null) { // if there is a crf and testFile was set , do the crf stuff for a single testFile if (testFile != null) { if (flags.searchGraphPrefix != null) { crf.ClassifyAndWriteViterbiSearchGraph(testFile, flags.searchGraphPrefix, crf.MakeReaderAndWriter()); } else { if (flags.printFirstOrderProbs) { crf.PrintFirstOrderProbs(testFile, readerAndWriter); } else { if (flags.printFactorTable) { crf.PrintFactorTable(testFile, readerAndWriter); } else { if (flags.printProbs) { crf.PrintProbs(testFile, readerAndWriter); } else { if (flags.useKBest) { // TO DO: handle if user doesn't provide kBest int k = flags.kBest; crf.ClassifyAndWriteAnswersKBest(testFile, k, readerAndWriter); } else { if (flags.printLabelValue) { crf.PrintLabelInformation(testFile, readerAndWriter); } else { // no crf test flag provided log.Info("Warning: no crf test flag was provided, running classify and write answers"); crf.ClassifyAndWriteAnswers(testFile, readerAndWriter, true); } } } } } } } else { if (testFiles != null) { // if there is a crf and testFiles was set , do the crf stuff for testFiles // if testFile was set as well, testFile overrides IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList()); if (flags.printProbs) { // there is a crf and printProbs crf.PrintProbs(files, crf.DefaultReaderAndWriter()); } else { log.Info("Warning: no crf test flag was provided, running classify files and write answers"); crf.ClassifyFilesAndWriteAnswers(files, crf.DefaultReaderAndWriter(), true); } } } } }
/// <summary>The main method.</summary> /// <exception cref="System.Exception"/> public static void Main(string[] args) { StringUtils.LogInvocationString(log, args); Properties props = StringUtils.ArgsToProperties(args); SeqClassifierFlags flags = new SeqClassifierFlags(props, false); // false for print probs as printed in next code block string loadPath = props.GetProperty("loadClassifier"); NERClassifierCombiner ncc; if (loadPath != null) { // note that when loading a serialized classifier, the philosophy is override // any settings in props with those given in the commandline // so if you dumped it with useSUTime = false, and you say -useSUTime at // the commandline, the commandline takes precedence ncc = ((NERClassifierCombiner)GetClassifier(loadPath, props)); } else { // pass null for passDownProperties to let all props go through ncc = CreateNERClassifierCombiner("ner", null, props); } // write the NERClassifierCombiner to the given path on disk string serializeTo = props.GetProperty("serializeTo"); if (serializeTo != null) { ncc.SerializeClassifier(serializeTo); } string textFile = props.GetProperty("textFile"); if (textFile != null) { ncc.ClassifyAndWriteAnswers(textFile); } // run on multiple textFiles , based off CRFClassifier code string textFiles = props.GetProperty("textFiles"); if (textFiles != null) { IList <File> files = new List <File>(); foreach (string filename in textFiles.Split(",")) { files.Add(new File(filename)); } ncc.ClassifyFilesAndWriteAnswers(files); } // options for run the NERClassifierCombiner on a testFile or testFiles string testFile = props.GetProperty("testFile"); string testFiles = props.GetProperty("testFiles"); string crfToExamine = props.GetProperty("crfToExamine"); IDocumentReaderAndWriter <CoreLabel> readerAndWriter = ncc.DefaultReaderAndWriter(); if (testFile != null || testFiles != null) { // check if there is not a crf specific request if (crfToExamine == null) { // in this case there is no crfToExamine if (testFile != null) { ncc.ClassifyAndWriteAnswers(testFile, readerAndWriter, true); } else { IList <File> files = Arrays.Stream(testFiles.Split(",")).Map(null).Collect(Collectors.ToList()); ncc.ClassifyFilesAndWriteAnswers(files, ncc.DefaultReaderAndWriter(), true); } } else { ClassifierCombiner.ExamineCRF(ncc, crfToExamine, flags, testFile, testFiles, readerAndWriter); } } // option for showing info about the NERClassifierCombiner string showNCCInfo = props.GetProperty("showNCCInfo"); if (showNCCInfo != null) { ShowNCCInfo(ncc); } // option for reading in from stdin if (flags.readStdin) { ncc.ClassifyStdin(); } }
protected internal override ICollection <IList <IN> > LoadAuxiliaryData(ICollection <IList <IN> > docs, IDocumentReaderAndWriter <IN> readerAndWriter) { if (flags.unsupDropoutFile != null) { log.Info("Reading unsupervised dropout data from file: " + flags.unsupDropoutFile); Timing timer = new Timing(); timer.Start(); unsupDocs = new List <IList <IN> >(); ObjectBank <IList <IN> > unsupObjBank = MakeObjectBankFromFile(flags.unsupDropoutFile, readerAndWriter); foreach (IList <IN> doc in unsupObjBank) { foreach (IN tok in doc) { tok.Set(typeof(CoreAnnotations.AnswerAnnotation), flags.backgroundSymbol); tok.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), flags.backgroundSymbol); } unsupDocs.Add(doc); } long elapsedMs = timer.Stop(); log.Info("Time to read: : " + Timing.ToSecondsString(elapsedMs) + " seconds"); } if (unsupDocs != null && flags.doFeatureDiscovery) { IList <IList <IN> > totalDocs = new List <IList <IN> >(); Sharpen.Collections.AddAll(totalDocs, docs); Sharpen.Collections.AddAll(totalDocs, unsupDocs); return(totalDocs); } else { return(docs); } }
/// <summary>The main method, which is essentially the same as in CRFClassifier.</summary> /// <remarks>The main method, which is essentially the same as in CRFClassifier. See the class documentation.</remarks> /// <exception cref="System.Exception"/> public static void Main(string[] args) { StringUtils.LogInvocationString(log, args); Properties props = StringUtils.ArgsToProperties(args); CRFBiasedClassifier <CoreLabel> crf = new CRFBiasedClassifier <CoreLabel>(props); string testFile = crf.flags.testFile; string loadPath = crf.flags.loadClassifier; if (loadPath != null) { crf.LoadClassifierNoExceptions(loadPath, props); } else { if (crf.flags.loadJarClassifier != null) { // legacy support of old option crf.LoadClassifierNoExceptions(crf.flags.loadJarClassifier, props); } else { crf.LoadDefaultClassifier(); } } if (crf.flags.classBias != null) { StringTokenizer biases = new StringTokenizer(crf.flags.classBias, ","); while (biases.HasMoreTokens()) { StringTokenizer bias = new StringTokenizer(biases.NextToken(), ":"); string cname = bias.NextToken(); double w = double.ParseDouble(bias.NextToken()); crf.SetBiasWeight(cname, w); log.Info("Setting bias for class " + cname + " to " + w); } } if (testFile != null) { IDocumentReaderAndWriter <CoreLabel> readerAndWriter = crf.MakeReaderAndWriter(); if (crf.flags.printFirstOrderProbs) { crf.PrintFirstOrderProbs(testFile, readerAndWriter); } else { if (crf.flags.printProbs) { crf.PrintProbs(testFile, readerAndWriter); } else { if (crf.flags.useKBest) { int k = crf.flags.kBest; crf.ClassifyAndWriteAnswersKBest(testFile, k, readerAndWriter); } else { crf.ClassifyAndWriteAnswers(testFile, readerAndWriter, true); } } } } }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { string serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz"; if (args.Length > 0) { serializedClassifier = args[0]; } AbstractSequenceClassifier <CoreLabel> classifier = CRFClassifier.GetClassifier(serializedClassifier); /* For either a file to annotate or for the hardcoded text example, this * demo file shows several ways to process the input, for teaching purposes. */ if (args.Length > 1) { /* For the file, it shows (1) how to run NER on a String, (2) how * to get the entities in the String with character offsets, and * (3) how to run NER on a whole file (without loading it into a String). */ string fileContents = IOUtils.SlurpFile(args[1]); IList <IList <CoreLabel> > @out = classifier.Classify(fileContents); foreach (IList <CoreLabel> sentence in @out) { foreach (CoreLabel word in sentence) { System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' '); } System.Console.Out.WriteLine(); } System.Console.Out.WriteLine("---"); @out = classifier.ClassifyFile(args[1]); foreach (IList <CoreLabel> sentence_1 in @out) { foreach (CoreLabel word in sentence_1) { System.Console.Out.Write(word.Word() + '/' + word.Get(typeof(CoreAnnotations.AnswerAnnotation)) + ' '); } System.Console.Out.WriteLine(); } System.Console.Out.WriteLine("---"); IList <Triple <string, int, int> > list = classifier.ClassifyToCharacterOffsets(fileContents); foreach (Triple <string, int, int> item in list) { System.Console.Out.WriteLine(item.First() + ": " + Sharpen.Runtime.Substring(fileContents, item.Second(), item.Third())); } System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("Ten best entity labelings"); IDocumentReaderAndWriter <CoreLabel> readerAndWriter = classifier.MakePlainTextReaderAndWriter(); classifier.ClassifyAndWriteAnswersKBest(args[1], 10, readerAndWriter); System.Console.Out.WriteLine("---"); System.Console.Out.WriteLine("Per-token marginalized probabilities"); classifier.PrintProbs(args[1], readerAndWriter); } else { // -- This code prints out the first order (token pair) clique probabilities. // -- But that output is a bit overwhelming, so we leave it commented out by default. // System.out.println("---"); // System.out.println("First Order Clique Probabilities"); // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter); /* For the hard-coded String, it shows how to run it on a single * sentence, and how to do this and produce several formats, including * slash tags and an inline XML output format. It also shows the full * contents of the {@code CoreLabel}s that are constructed by the * classifier. And it shows getting out the probabilities of different * assignments and an n-best list of classifications with probabilities. */ string[] example = new string[] { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." }; foreach (string str in example) { System.Console.Out.WriteLine(classifier.ClassifyToString(str)); } System.Console.Out.WriteLine("---"); foreach (string str_1 in example) { // This one puts in spaces and newlines between tokens, so just print not println. System.Console.Out.Write(classifier.ClassifyToString(str_1, "slashTags", false)); } System.Console.Out.WriteLine("---"); foreach (string str_2 in example) { // This one is best for dealing with the output as a TSV (tab-separated column) file. // The first column gives entities, the second their classes, and the third the remaining text in a document System.Console.Out.Write(classifier.ClassifyToString(str_2, "tabbedEntities", false)); } System.Console.Out.WriteLine("---"); foreach (string str_3 in example) { System.Console.Out.WriteLine(classifier.ClassifyWithInlineXML(str_3)); } System.Console.Out.WriteLine("---"); foreach (string str_4 in example) { System.Console.Out.WriteLine(classifier.ClassifyToString(str_4, "xml", true)); } System.Console.Out.WriteLine("---"); foreach (string str_5 in example) { System.Console.Out.Write(classifier.ClassifyToString(str_5, "tsv", false)); } System.Console.Out.WriteLine("---"); // This gets out entities with character offsets int j = 0; foreach (string str_6 in example) { j++; IList <Triple <string, int, int> > triples = classifier.ClassifyToCharacterOffsets(str_6); foreach (Triple <string, int, int> trip in triples) { System.Console.Out.Printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.First(), trip.Second(), trip.third, j); } } System.Console.Out.WriteLine("---"); // This prints out all the details of what is stored for each token int i = 0; foreach (string str_7 in example) { foreach (IList <CoreLabel> lcl in classifier.Classify(str_7)) { foreach (CoreLabel cl in lcl) { System.Console.Out.Write(i++ + ": "); System.Console.Out.WriteLine(cl.ToShorterString()); } } } System.Console.Out.WriteLine("---"); } }
/// <summary> /// <inheritDoc/> /// /// </summary> public override void Train(ICollection <IList <In> > objectBankWrapper, IDocumentReaderAndWriter <In> readerAndWriter) { }