public void Export(CntkTrainDataSet trainData) { Console.WriteLine("creating export_info.json in folder : Data "); var info = this.CreateExportInfo(trainData); string filePath = @"Data\export_info.json"; File.WriteAllText(filePath, JsonHelper.ToJson(info)); }
private ExportInfo CreateExportInfo(CntkTrainDataSet trainData) { var info = new ExportInfo() { LabelDimension = trainData.LabelIndexes.Count, WordDimension = trainData.WordSpace.DimensionSize, }; return(info); }
public CntkTrainDataSet Transform(DataSet dataSet) { var segmentator = new MeCabTextSegmentator(); var topicStore = new CntkTopicStore(); var corpus = new HashSet <string>(); int sentenceId = 0; var n = 0; //var count = sentences.Count; List <CntkSentence> sentences = dataSet.Select(row => { n++; if (n % 10 == 0) { Console.WriteLine(n + " sentences"); } CntkTopic topic = topicStore.GetOrRegister(row.Label); // クレンジングをかけつつ単語収集 List <string> words = segmentator.Split(row.Sentence); words.ForEach(w => corpus.Add(w)); return(new CntkSentence() { Id = sentenceId++, Topic = topic, Sentence = row.Sentence, Words = words.Select(w => new CntkWord() { Text = w }).ToList(), }); }).ToList(); // 取り込む単語が全て確定しないとVector表現が決まらないので、そこだけ最後 var space = OneHotWordVectorSpace.Build(corpus); foreach (var sentence in sentences) { foreach (var word in sentence.Words) { word.Value = space.ToVector(word.Text); } } var trainData = new CntkTrainDataSet(topicStore.Indexer, space, sentences); return(trainData); }
public void Run() { // 学習データの取り込み string dataSetFilePath = @"Resource\train_dataset.tsv";//context.BaseContext.TrainDataSetPath(); DataSet data = this.dataSetImport.Import(dataSetFilePath); // CNTKトレーニングデータ生成 Console.WriteLine("reading data..."); CntkTrainDataSet trainDataSet = this.trainDataSetTransform.Transform(data); // CNTKトレーニングデータ出力 this.trainDataSetExport.Export(trainDataSet); //トレーニングデータサマリー出力 this.summaryInfoExport.Export(trainDataSet); }
public void Export(CntkTrainDataSet trainData) { // trainning_data.tsv this.WriteTranInputData(trainData.Sentences); // label Console.WriteLine("creating label_index.tsv in folder : Data"); string labelFile = @"Data\labels_index.tsv"; this.WriteWordIndexStore(trainData.LabelIndexes, labelFile); // words Console.WriteLine("creating words_index.tsv in folder: Data"); ITextIndexes indexes = (trainData.WordSpace).WordIndexes; if (indexes != null) { string wordsFilePath = @"Data\words_index.tsv"; WriteWordIndexStore(indexes, wordsFilePath); } }