示例#1
0
 public SimpleLinearClassifier(SimpleLinearClassifier.ILoss loss, SimpleLinearClassifier.ILearningRateSchedule learningRateSchedule, double regularizationStrength, string modelFile)
 {
     if (modelFile != null)
     {
         try
         {
             if (modelFile.EndsWith(".tab.gz"))
             {
                 Timing.StartDoing("Reading " + modelFile);
                 this.weights = Counters.DeserializeStringCounter(modelFile);
                 Timing.EndDoing("Reading " + modelFile);
             }
             else
             {
                 this.weights = IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref model", modelFile);
             }
         }
         catch (Exception e)
         {
             throw new Exception("Error leading weights from " + modelFile, e);
         }
     }
     else
     {
         this.weights = new ClassicCounter <string>();
     }
     this.defaultLoss            = loss;
     this.regularizationStrength = regularizationStrength;
     this.learningRateSchedule   = learningRateSchedule;
     accessTimes  = new ClassicCounter <string>();
     examplesSeen = 0;
 }
        public DistSimClassifier(string filename, string format, string encoding, int distSimMaxBits, bool cased, bool numberEquivalence, string unknownWordClass)
        {
            this.cased             = cased;
            this.numberEquivalence = numberEquivalence;
            this.unknownWordClass  = unknownWordClass;
            Timing.StartDoing("Loading distsim lexicon from " + filename);
            lexicon = Generics.NewHashMap(1 << 15);
            // make a reasonable starting size
            bool terryKoo = "terryKoo".Equals(format);

            foreach (string line in ObjectBank.GetLineIterator(filename, encoding))
            {
                string word;
                string wordClass;
                if (terryKoo)
                {
                    string[] bits = line.Split("\\t");
                    word      = bits[1];
                    wordClass = bits[0];
                    if (distSimMaxBits > 0 && wordClass.Length > distSimMaxBits)
                    {
                        wordClass = Sharpen.Runtime.Substring(wordClass, 0, distSimMaxBits);
                    }
                }
                else
                {
                    // "alexClark"
                    string[] bits = line.Split("\\s+");
                    word      = bits[0];
                    wordClass = bits[1];
                }
                if (!cased)
                {
                    word = word.ToLower();
                }
                if (numberEquivalence)
                {
                    word = WordShapeClassifier.WordShape(word, WordShapeClassifier.Wordshapedigits);
                }
                lexicon[word] = wordClass;
            }
            Timing.EndDoing();
        }