private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch, CancellationToken cancellationToken = default) { // TODO: Truncate buffers with extraneous padding / unused capacity if (!HasWrittenSchema) { await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false); HasWrittenSchema = true; } if (!HasWrittenDictionaryBatch) { DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo); await WriteDictionariesAsync(recordBatch, cancellationToken).ConfigureAwait(false); HasWrittenDictionaryBatch = true; } (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) = PreparingWritingRecordBatch(recordBatch); VectorOffset buffersVectorOffset = Builder.EndVector(); // Serialize record batch StartingWritingRecordBatch(); Offset <Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length, fieldNodesVectorOffset, buffersVectorOffset); long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch, recordBatchOffset, recordBatchBuilder.TotalLength, cancellationToken).ConfigureAwait(false); long bufferLength = await WriteBufferDataAsync(recordBatchBuilder.Buffers, cancellationToken).ConfigureAwait(false); FinishedWritingRecordBatch(bufferLength, metadataLength); }
private protected void WriteRecordBatchInternal(RecordBatch recordBatch) { // TODO: Truncate buffers with extraneous padding / unused capacity if (!HasWrittenSchema) { WriteSchema(Schema); HasWrittenSchema = true; } if (!HasWrittenDictionaryBatch) { DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo); WriteDictionaries(recordBatch); HasWrittenDictionaryBatch = true; } (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) = PreparingWritingRecordBatch(recordBatch); VectorOffset buffersVectorOffset = Builder.EndVector(); // Serialize record batch StartingWritingRecordBatch(); Offset <Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length, fieldNodesVectorOffset, buffersVectorOffset); long metadataLength = WriteMessage(Flatbuf.MessageHeader.RecordBatch, recordBatchOffset, recordBatchBuilder.TotalLength); long bufferLength = WriteBufferData(recordBatchBuilder.Buffers); FinishedWritingRecordBatch(bufferLength, metadataLength); }
public Classifier(DictionaryCollector collector, FileHandler fileHandler) { _collector = collector; _fileHandler = fileHandler; _mode = DictionaryMode; }
public void Start() { if (DataFilePath == null || DataFilePath == "") { MessageBox.Show("Не был выбран файл с данными для классификации", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error); return; } else { FileHandler fileHandler; if (ObjectFilePath != null && ObjectFilePath != "") { fileHandler = new FileHandler(DataFilePath, ObjectFilePath); } else { fileHandler = new FileHandler(DataFilePath); } var directory = Directory.CreateDirectory(Path.Combine(Environment.CurrentDirectory, resultDirectoryName)); var resultFileName = string.Concat(Path.GetFileNameWithoutExtension(DataFilePath), resultName); var resultPath = Path.Combine(directory.FullName, resultFileName); if (IsDictionaryEnabled) { //словарный метод классификации Classifier classifier; if (IsNewDict) { if (DictionaryDataFilePath == null || DictionaryDataFilePath == "") { MessageBox.Show("Не был выбран файл с размеченной выборкой для создания словаря", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error); return; } var collector = new DictionaryCollector(DictionaryDataFilePath); classifier = new Classifier(collector, fileHandler); } else { classifier = new Classifier(fileHandler, Classifier.DictionaryMode); } classifier.Classify(resultPath); MessageBox.Show(string.Format("Работа программы успешно завершена! Результаты сохранены в файле {0} в подкаталоге {1}.", resultFileName, resultDirectoryName)); } else if (IsMachineLearningEnabled) { //машинное обучение if (TrainFilePath == null || TrainFilePath == "") { MessageBox.Show("Не был выбран файл с обучающей выборкой", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error); return; } var classifier = new Classifier(fileHandler, Classifier.MachineLearningMode); if (IsSVM) { classifier.Method = "svm"; } else if (IsStochasticGradient) { classifier.Method = "stochastic_gradient"; } else if (IsRandomForest) { classifier.Method = "random_forest"; } else if (IsLogisticRegression) { classifier.Method = "logistic_regression"; } else if (IsBernoulliNB) { classifier.Method = "bernoulli_naive_bayes"; } else { MessageBox.Show("Не был выбран способ классификации", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error); return; } classifier.TrainFilePath = TrainFilePath; classifier.Classify(resultPath); MessageBox.Show(string.Format("Работа программы успешно завершена! Результаты сохранены в файле {0} в подкаталоге {1}.", resultFileName, resultDirectoryName)); } else { MessageBox.Show("Не был выбран способ классификации", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error); return; } } }