internal void ReadModelFromTrainedLDA(LDAModel model) { _ldaTrainer.GetModelStat(out var memBlockSize, out var aliasMemBlockSize); model.MemBlockSize = memBlockSize; model.AliasMemBlockSize = aliasMemBlockSize; Debug.Assert(_ldaTrainer.NumVocab == model.VocabularyBuckets); model.LDA_Data = Enumerable.Range(0, _ldaTrainer.NumVocab) .Select(i => _ldaTrainer.GetModel(i)) .ToArray(); }
public void Save(ModelSaveContext ctx, bool saveText, VBuffer <ReadOnlyMemory <char> > mapping) { Contracts.AssertValue(ctx); long memBlockSize = 0; long aliasMemBlockSize = 0; _ldaTrainer.GetModelStat(out memBlockSize, out aliasMemBlockSize); // *** Binary format *** // <ColInfoEx> // int: vocabnum // long: memblocksize // long: aliasMemBlockSize // (serializing term by term, for one term) // int: term_id, int: topic_num, KeyValuePair<int, int>[]: termTopicVector InfoEx.Save(ctx); ctx.Writer.Write(_ldaTrainer.NumVocab); ctx.Writer.Write(memBlockSize); ctx.Writer.Write(aliasMemBlockSize); //save model from this interface for (int i = 0; i < _ldaTrainer.NumVocab; i++) { KeyValuePair <int, int>[] termTopicVector = _ldaTrainer.GetModel(i); //write the topic to disk through ctx ctx.Writer.Write(i); //term_id ctx.Writer.Write(termTopicVector.Length); foreach (KeyValuePair <int, int> p in termTopicVector) { ctx.Writer.Write(p.Key); ctx.Writer.Write(p.Value); } } var writeAction = GetTopicSummaryWriter(mapping); // save word-topic summary in text if (saveText) { ctx.SaveTextStream(WordTopicModelFilename, writeAction); } }