Exemplo n.º 1
0
        private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch,
                                                                   CancellationToken cancellationToken = default)
        {
            // TODO: Truncate buffers with extraneous padding / unused capacity

            if (!HasWrittenSchema)
            {
                await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false);

                HasWrittenSchema = true;
            }

            if (!HasWrittenDictionaryBatch)
            {
                DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo);
                await WriteDictionariesAsync(recordBatch, cancellationToken).ConfigureAwait(false);

                HasWrittenDictionaryBatch = true;
            }

            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
                PreparingWritingRecordBatch(recordBatch);

            VectorOffset buffersVectorOffset = Builder.EndVector();

            // Serialize record batch

            StartingWritingRecordBatch();

            Offset <Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                                                                                                   fieldNodesVectorOffset,
                                                                                                   buffersVectorOffset);

            long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
                                                          recordBatchOffset, recordBatchBuilder.TotalLength,
                                                          cancellationToken).ConfigureAwait(false);

            long bufferLength = await WriteBufferDataAsync(recordBatchBuilder.Buffers, cancellationToken).ConfigureAwait(false);

            FinishedWritingRecordBatch(bufferLength, metadataLength);
        }
Exemplo n.º 2
0
        private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
        {
            // TODO: Truncate buffers with extraneous padding / unused capacity

            if (!HasWrittenSchema)
            {
                WriteSchema(Schema);
                HasWrittenSchema = true;
            }

            if (!HasWrittenDictionaryBatch)
            {
                DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo);
                WriteDictionaries(recordBatch);
                HasWrittenDictionaryBatch = true;
            }

            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
                PreparingWritingRecordBatch(recordBatch);

            VectorOffset buffersVectorOffset = Builder.EndVector();

            // Serialize record batch

            StartingWritingRecordBatch();

            Offset <Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                                                                                                   fieldNodesVectorOffset,
                                                                                                   buffersVectorOffset);

            long metadataLength = WriteMessage(Flatbuf.MessageHeader.RecordBatch,
                                               recordBatchOffset, recordBatchBuilder.TotalLength);

            long bufferLength = WriteBufferData(recordBatchBuilder.Buffers);

            FinishedWritingRecordBatch(bufferLength, metadataLength);
        }
Exemplo n.º 3
0
 public Classifier(DictionaryCollector collector, FileHandler fileHandler)
 {
     _collector   = collector;
     _fileHandler = fileHandler;
     _mode        = DictionaryMode;
 }
Exemplo n.º 4
0
        public void Start()
        {
            if (DataFilePath == null || DataFilePath == "")
            {
                MessageBox.Show("Не был выбран файл с данными для классификации", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error);
                return;
            }
            else
            {
                FileHandler fileHandler;
                if (ObjectFilePath != null && ObjectFilePath != "")
                {
                    fileHandler = new FileHandler(DataFilePath, ObjectFilePath);
                }
                else
                {
                    fileHandler = new FileHandler(DataFilePath);
                }

                var directory      = Directory.CreateDirectory(Path.Combine(Environment.CurrentDirectory, resultDirectoryName));
                var resultFileName = string.Concat(Path.GetFileNameWithoutExtension(DataFilePath), resultName);
                var resultPath     = Path.Combine(directory.FullName, resultFileName);

                if (IsDictionaryEnabled)
                {
                    //словарный метод классификации
                    Classifier classifier;
                    if (IsNewDict)
                    {
                        if (DictionaryDataFilePath == null || DictionaryDataFilePath == "")
                        {
                            MessageBox.Show("Не был выбран файл с размеченной выборкой для создания словаря", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error);
                            return;
                        }
                        var collector = new DictionaryCollector(DictionaryDataFilePath);
                        classifier = new Classifier(collector, fileHandler);
                    }
                    else
                    {
                        classifier = new Classifier(fileHandler, Classifier.DictionaryMode);
                    }
                    classifier.Classify(resultPath);
                    MessageBox.Show(string.Format("Работа программы успешно завершена! Результаты сохранены в файле {0} в подкаталоге {1}.", resultFileName, resultDirectoryName));
                }
                else if (IsMachineLearningEnabled)
                {
                    //машинное обучение
                    if (TrainFilePath == null || TrainFilePath == "")
                    {
                        MessageBox.Show("Не был выбран файл с обучающей выборкой", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error);
                        return;
                    }
                    var classifier = new Classifier(fileHandler, Classifier.MachineLearningMode);
                    if (IsSVM)
                    {
                        classifier.Method = "svm";
                    }
                    else if (IsStochasticGradient)
                    {
                        classifier.Method = "stochastic_gradient";
                    }
                    else if (IsRandomForest)
                    {
                        classifier.Method = "random_forest";
                    }
                    else if (IsLogisticRegression)
                    {
                        classifier.Method = "logistic_regression";
                    }
                    else if (IsBernoulliNB)
                    {
                        classifier.Method = "bernoulli_naive_bayes";
                    }
                    else
                    {
                        MessageBox.Show("Не был выбран способ классификации", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error);
                        return;
                    }
                    classifier.TrainFilePath = TrainFilePath;
                    classifier.Classify(resultPath);
                    MessageBox.Show(string.Format("Работа программы успешно завершена! Результаты сохранены в файле {0} в подкаталоге {1}.", resultFileName, resultDirectoryName));
                }
                else
                {
                    MessageBox.Show("Не был выбран способ классификации", "Ошибка", MessageBoxButton.OK, MessageBoxImage.Error);
                    return;
                }
            }
        }