public void Save(ModelSaveContext ctx, bool saveText, VBuffer <ReadOnlyMemory <char> > mapping) { Contracts.AssertValue(ctx); long memBlockSize = 0; long aliasMemBlockSize = 0; _ldaTrainer.GetModelStat(out memBlockSize, out aliasMemBlockSize); // *** Binary format *** // <ColInfoEx> // int: vocabnum // long: memblocksize // long: aliasMemBlockSize // (serializing term by term, for one term) // int: term_id, int: topic_num, KeyValuePair<int, int>[]: termTopicVector InfoEx.Save(ctx); ctx.Writer.Write(_ldaTrainer.NumVocab); ctx.Writer.Write(memBlockSize); ctx.Writer.Write(aliasMemBlockSize); //save model from this interface for (int i = 0; i < _ldaTrainer.NumVocab; i++) { KeyValuePair <int, int>[] termTopicVector = _ldaTrainer.GetModel(i); //write the topic to disk through ctx ctx.Writer.Write(i); //term_id ctx.Writer.Write(termTopicVector.Length); foreach (KeyValuePair <int, int> p in termTopicVector) { ctx.Writer.Write(p.Key); ctx.Writer.Write(p.Value); } } var writeAction = GetTopicSummaryWriter(mapping); // save word-topic summary in text if (saveText) { ctx.SaveTextStream(WordTopicModelFilename, writeAction); } }
public override void Save(ModelSaveContext ctx) { Host.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(); ctx.SetVersionInfo(GetVersionInfo()); // *** Binary format *** // int: sizeof(Float) // <base> // for each added column // ColInfoEx // the ngram SequencePool // the ngram inverse document frequencies ctx.Writer.Write(sizeof(Float)); SaveBase(ctx); var ngramsNames = default(VBuffer <ReadOnlyMemory <char> >); for (int i = 0; i < _exes.Length; i++) { _exes[i].Save(ctx); _ngramMaps[i].Save(ctx.Writer); ctx.Writer.WriteDoubleArray(_invDocFreqs[i]); if (_slotNamesTypes[i] != null) { GetSlotNames(i, ref ngramsNames); Host.Assert(_ngramMaps[i].Count == ngramsNames.GetValues().Length); Host.Assert(ngramsNames.IsDense); ctx.SaveTextStream(string.Format("{0}-ngrams.txt", Infos[i].Name), writer => { var explicitNgramNames = ngramsNames.GetValues(); writer.WriteLine("# Number of Ngrams terms = {0}", explicitNgramNames.Length); for (int j = 0; j < explicitNgramNames.Length; j++) { writer.WriteLine("{0}\t{1}", j, explicitNgramNames[j]); } }); } } }
public override void Save(ModelSaveContext ctx) { Host.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(); ctx.SetVersionInfo(GetVersionInfo()); // *** Binary format *** // <base> SaveBase(ctx); const string dir = "Stopwords"; ctx.SaveSubModel(dir, c => { Host.CheckValue(c, nameof(ctx)); c.CheckAtModel(); c.SetVersionInfo(GetStopwrodsManagerVersionInfo()); // *** Binary format *** // int: number of stopwords // int[]: stopwords string ids Host.Assert(_stopWordsMap.Count > 0); ctx.Writer.Write(_stopWordsMap.Count); int id = 0; foreach (var nstr in _stopWordsMap) { Host.Assert(nstr.Id == id); ctx.SaveString(nstr.Value); id++; } ctx.SaveTextStream("Stopwords.txt", writer => { foreach (var nstr in _stopWordsMap) { writer.WriteLine("{0}\t{1}", nstr.Id, nstr.Value); } }); }); }
private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory, ref VBuffer <ReadOnlyMemory <char> > values) { Contracts.AssertValue(ch); ch.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(); ctx.SetVersionInfo(GetVersionInfo()); // *** Binary format *** // Codec parameterization: A codec parameterization that should be a ReadOnlyMemory codec // int: n, the number of bytes used to write the values // byte[n]: As encoded using the codec // Get the codec from the factory IValueCodec codec; var result = factory.TryGetCodec(new VectorType(TextType.Instance), out codec); ch.Assert(result); ch.Assert(codec.Type.IsVector); ch.Assert(codec.Type.VectorSize == 0); ch.Assert(codec.Type.ItemType.RawType == typeof(ReadOnlyMemory <char>)); IValueCodec <VBuffer <ReadOnlyMemory <char> > > textCodec = (IValueCodec <VBuffer <ReadOnlyMemory <char> > >)codec; factory.WriteCodec(ctx.Writer.BaseStream, codec); using (var mem = new MemoryStream()) { using (var writer = textCodec.OpenWriter(mem)) { writer.Write(ref values); writer.Commit(); } ctx.Writer.WriteByteArray(mem.ToArray()); } // Make this resemble, more or less, the auxiliary output from the TermTransform. // It will differ somewhat due to the vector being possibly sparse. To distinguish // between missing and empty, empties are not written at all, while missings are. var v = values; char[] buffer = null; ctx.SaveTextStream("Terms.txt", writer => { writer.WriteLine("# Number of terms = {0} of length {1}", v.Count, v.Length); foreach (var pair in v.Items()) { var text = pair.Value; if (text.IsEmpty) { continue; } writer.Write("{0}\t", pair.Key); // REVIEW: What about escaping this, *especially* for linebreaks? // Do C# and .NET really have no equivalent to Python's "repr"? :( if (text.IsEmpty) { writer.WriteLine(); continue; } Utils.EnsureSize(ref buffer, text.Length); var span = text.Span; for (int i = 0; i < text.Length; i++) { buffer[i] = span[i]; } writer.WriteLine(buffer, 0, text.Length); } }); }