Example #1
 /// <exception cref="System.IO.IOException"/>
 public TestClassifier(MaxentTagger maxentTagger, string testFile)
     // TODO: can we break this class up in some way?  Perhaps we can
     // spread some functionality into TestSentence and some into MaxentTagger
     // TODO: at the very least, it doesn't seem to make sense to make it
     // an object with state, rather than just some static methods
     // TODO: only one boolean here instead of 4?  They all use the same
     // debug status
     this.maxentTagger = maxentTagger;
     this.config       = maxentTagger.config;
     fileRecord = TaggedFileRecord.CreateRecord(config, testFile);
     saveRoot   = config.GetDebugPrefix();
     if (saveRoot == null || saveRoot.Equals(string.Empty))
         saveRoot = fileRecord.Filename();
     if (writeConfusionMatrix)
         PrintFile pf = new PrintFile(saveRoot + ".confusion");
Example #2
        // end static class NERClient
        /// <summary>Starts this server on the specified port.</summary>
        /// <remarks>
        /// Starts this server on the specified port.  The classifier used can be
        /// either a default one stored in the jar file from which this code is
        /// invoked or you can specify it as a filename or as another classifier
        /// resource name, which must correspond to the name of a resource in the
        /// /classifiers/ directory of the jar file.
        /// <p>
        /// Usage: <code>java edu.stanford.nlp.tagger.maxent.MaxentTaggerServer [-model file|-client] -port portNumber [other MaxentTagger options]</code>
        /// </remarks>
        /// <param name="args">Command-line arguments (described above)</param>
        /// <exception cref="System.Exception">If file or Java class problems with serialized classifier</exception>
        public static void Main(string[] args)
            if (args.Length == 0)
            // Use both Properties and TaggerConfig.  It's okay.
            Properties props   = StringUtils.ArgsToProperties(args);
            string     client  = props.GetProperty("client");
            string     portStr = props.GetProperty("port");

            if (portStr == null || portStr.Equals(string.Empty))
            int port = 0;

                port = System.Convert.ToInt32(portStr);
            catch (NumberFormatException)
                log.Info("Non-numerical port");
            if (client != null && !client.Equals(string.Empty))
                // run a test client for illustration/testing
                string host     = props.GetProperty("host");
                string encoding = props.GetProperty("encoding");
                if (encoding == null || string.Empty.Equals(encoding))
                    encoding = "utf-8";
                MaxentTaggerServer.TaggerClient.CommunicateWithMaxentTaggerServer(host, port, encoding);
                TaggerConfig config = new TaggerConfig(args);
                MaxentTagger tagger = new MaxentTagger(config.GetModel(), config);
                // initializes tagger
                MaxentTagger.TaggerWrapper wrapper = new MaxentTagger.TaggerWrapper(tagger);
                new MaxentTaggerServer(port, wrapper, config.GetEncoding()).Run();
Example #3
        protected internal ReadDataTagged(TaggerConfig config, MaxentTagger maxentTagger, PairsHolder pairs)
            //TODO: make a class DataHolder that holds the dict, tags, pairs, etc, for tagger and pass it around
            this.maxentTagger = maxentTagger;
            this.pairs        = pairs;
            IList <TaggedFileRecord> fileRecords = TaggedFileRecord.CreateRecords(config, config.GetFile());
            IDictionary <string, IntCounter <string> > wordTagCounts = Generics.NewHashMap();

            foreach (TaggedFileRecord record in fileRecords)
                LoadFile(record.Reader(), wordTagCounts);
            // By counting the words and then filling the Dictionary, we can
            // make it so there are no calls that mutate the Dictionary or its
            // TagCount objects later
 /// <summary>This method gets feature statistics from a training file found in the TaggerConfig.</summary>
 /// <remarks>
 /// This method gets feature statistics from a training file found in the TaggerConfig.
 /// It is the start of the training process.
 /// </remarks>
 /// <exception cref="System.IO.IOException"/>
 protected internal TaggerExperiments(TaggerConfig config, MaxentTagger maxentTagger)
     : this(maxentTagger)
     log.Info("TaggerExperiments: adding word/tags");
     PairsHolder    pairs = new PairsHolder();
     ReadDataTagged c     = new ReadDataTagged(config, maxentTagger, pairs);
     vArray = new int[][] {  };
     log.Info("Featurizing tagged data tokens...");
     for (int i = 0; i < size; i++)
         DataWordTag d    = c.Get(i);
         string      yS   = d.GetY();
         History     h    = d.GetHistory();
         int         indX = tHistories.Add(h);
         int         indY = d.GetYInd();
         AddTemplatesNew(h, yS);
         AddRareTemplatesNew(h, yS);
         vArray[i][0] = indX;
         vArray[i][1] = indY;
     // It's the 2010s now and it doesn't take so long to featurize....
     // if (i > 0 && (i % 10000) == 0) {
     //   System.err.printf("%d ", i);
     //   if (i % 100000 == 0) { System.err.println(); }
     // }
     // log.info();
     log.Info("Featurized " + c.GetSize() + " data tokens [done].");
     maxentTagger.xSize = xSize;
     maxentTagger.ySize = ySize;
     log.Info("xSize [num Phi templates] = " + xSize + "; ySize [num classes] = " + ySize);
     // if we'll look at occurring tags only, we need the histories and pairs still
     if (!maxentTagger.occurringTagsOnly && !maxentTagger.possibleTagsOnly)
Example #5
 public TestSentence(MaxentTagger maxentTagger)
     // origWords is only set when run with a list of HasWords; when run
     // with a list of strings, this will be null
     // TODO this always has the value of sent.size(). Remove it? [cdm 2008]
     // protected double[][][] probabilities;
     // = 0;
     System.Diagnostics.Debug.Assert((maxentTagger != null));
     System.Diagnostics.Debug.Assert((maxentTagger.GetLambdaSolve() != null));
     this.maxentTagger = maxentTagger;
     if (maxentTagger.config != null)
         tagSeparator = maxentTagger.config.GetTagSeparator();
         encoding     = maxentTagger.config.GetEncoding();
         Verbose      = maxentTagger.config.GetVerbose();
         tagSeparator = TaggerConfig.GetDefaultTagSeparator();
         encoding     = "utf-8";
         Verbose      = false;
     history = new History(pairs, maxentTagger.extractors);