public SimpleLinearClassifier(SimpleLinearClassifier.ILoss loss, SimpleLinearClassifier.ILearningRateSchedule learningRateSchedule, double regularizationStrength, string modelFile) { if (modelFile != null) { try { if (modelFile.EndsWith(".tab.gz")) { Timing.StartDoing("Reading " + modelFile); this.weights = Counters.DeserializeStringCounter(modelFile); Timing.EndDoing("Reading " + modelFile); } else { this.weights = IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref model", modelFile); } } catch (Exception e) { throw new Exception("Error leading weights from " + modelFile, e); } } else { this.weights = new ClassicCounter <string>(); } this.defaultLoss = loss; this.regularizationStrength = regularizationStrength; this.learningRateSchedule = learningRateSchedule; accessTimes = new ClassicCounter <string>(); examplesSeen = 0; }
public DistSimClassifier(string filename, string format, string encoding, int distSimMaxBits, bool cased, bool numberEquivalence, string unknownWordClass) { this.cased = cased; this.numberEquivalence = numberEquivalence; this.unknownWordClass = unknownWordClass; Timing.StartDoing("Loading distsim lexicon from " + filename); lexicon = Generics.NewHashMap(1 << 15); // make a reasonable starting size bool terryKoo = "terryKoo".Equals(format); foreach (string line in ObjectBank.GetLineIterator(filename, encoding)) { string word; string wordClass; if (terryKoo) { string[] bits = line.Split("\\t"); word = bits[1]; wordClass = bits[0]; if (distSimMaxBits > 0 && wordClass.Length > distSimMaxBits) { wordClass = Sharpen.Runtime.Substring(wordClass, 0, distSimMaxBits); } } else { // "alexClark" string[] bits = line.Split("\\s+"); word = bits[0]; wordClass = bits[1]; } if (!cased) { word = word.ToLower(); } if (numberEquivalence) { word = WordShapeClassifier.WordShape(word, WordShapeClassifier.Wordshapedigits); } lexicon[word] = wordClass; } Timing.EndDoing(); }