public void GetMembers_ValidMembers_ReturnsCorrectMembers() { string filePath = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx"; FileInfo spreadsheetFile = new FileInfo(filePath); string expectedFirst = "Aegon"; string expectedLast = "Targaryen"; string expectedZip = "10003"; int expectedCount = 4; string expectedAddress = "51-38 Codwise Pl"; Standardizer standardizer = new Standardizer(); List <Member> members = new List <Member>(); using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) { members = standardizer.GetMembers(package, 0); } Assert.Equal(expectedFirst, members[0].FirstName); Assert.Equal(expectedLast, members[0].LastName); Assert.Equal(expectedZip, members[0].ZipCode); Assert.Equal(expectedAddress, members[1].Address); Assert.Equal(expectedCount, members.Count); }
private static Molecule StandardizeMolecule(Molecule mol, out bool ischiral_out) { Molecule molChem = null; bool blIsChiral = false; try { Standardizer molSdz = new Standardizer("absolutestereo:set"); molChem = molSdz.standardize(mol); blIsChiral = molChem.isAbsStereo(); #region Code Commented //string strDirPath = AppDomain.CurrentDomain.BaseDirectory.ToString(); //string strXmlPath = strDirPath + "chiral.xml"; //StandardizerConfiguration sconfing = new StandardizerConfiguration(); //sconfing.read(strXmlPath); //Standardizer sdz = sconfing.getStandardizer(); //molChem = sdz.standardize(mol); //Standardizer sdz = new Standardizer(new File(strXmlPath)); #endregion ischiral_out = blIsChiral; return(molChem); } catch (Exception ex) { ErrorHandling.WriteErrorLog(ex.ToString()); } ischiral_out = blIsChiral; return(molChem); }
public void ReplaceSpecChars_ConvertsRootSignToAtSign() { var equation = "(35+7)√(9)"; var expected = "(35+7)@(9)"; var actual = Standardizer.ReplaceSpecChars(equation); Assert.AreEqual(expected, actual); }
public void ReplaceSpecChars_ConvertsMultiplicationSignToAtSign() { var equation = "22+63×(2+7)"; var expected = "22+63*(2+7)"; var actual = Standardizer.ReplaceSpecChars(equation); Assert.AreEqual(expected, actual); }
public void ReplaceSpecChars_ConvertsDivisionSignToSlash() { var equation = "32+7÷(13-4)"; var expected = "32+7/(13-4)"; var actual = Standardizer.ReplaceSpecChars(equation); Assert.AreEqual(expected, actual); }
public void AddMultSigns_ClosingBracketBeforeOpeningBracketInsertsAsterix() { var equation = "(14-8)(6+7)+9"; var expected = "(14-8)*(6+7)+9"; var actual = Standardizer.AddMultSigns(equation); Assert.AreEqual(expected, actual); }
public void AddMultSigns_RootAfterPiInsertsAsterix() { var equation = "155315(#@(14)(37^3))"; var expected = "155315*(#*@(14)*(37^3))"; var actual = Standardizer.AddMultSigns(equation); Assert.AreEqual(expected, actual); }
public void ReplaceSpecChars_ConvertsPiSignToHashtag() { var equation = "14(32-7)/π"; var expected = "14(32-7)/#"; var actual = Standardizer.ReplaceSpecChars(equation); Assert.AreEqual(expected, actual); }
public void Standardize_RemovesSpaces() { var equation = "34 + 97 * 3 - 7"; var expected = "34+97*3-7"; var actual = Standardizer.Standardize(equation); Assert.AreEqual(expected, actual); }
public void GetHeaders_Worksheet0_ReturnsCorrectHeaders() { string filePath = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx"; FileInfo spreadsheetFile = new FileInfo(filePath); string expectedFirstColumn = "Last"; string expectedLastColumn = "E-Mail address"; Standardizer standardizer = new Standardizer(); List <string> headers = new List <string>(); using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) { headers = standardizer.GetHeaders(package, 0); } Assert.Equal(expectedFirstColumn, headers[0]); Assert.Equal(expectedLastColumn, headers[7]); }
public void GetLastNameColumnNumber_ColumnExists_ReturnsColNumber() { string filePath = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx"; FileInfo spreadsheetFile = new FileInfo(filePath); Standardizer standardizer = new Standardizer(); int expectedColumnNumber = 1; List <string> myHeaders; // Act using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) { ExcelWorksheet worksheet = package.Workbook.Worksheets[0]; myHeaders = worksheet.GetHeaderColumns(); } int columnResult = standardizer.GetLastNameColumnNumber(myHeaders); Assert.Equal(expectedColumnNumber, columnResult); }
public void GetColumnNumberOfFieldThatStartsWith_ColumnDoesNotExist_ReturnsZero() { string filePath = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx"; FileInfo spreadsheetFile = new FileInfo(filePath); Standardizer standardizer = new Standardizer(); string fieldNameToSearch = "Favorite Weapon"; int expectedColumnNumber = 0; List <string> myHeaders; // Act using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) { ExcelWorksheet worksheet = package.Workbook.Worksheets[0]; myHeaders = worksheet.GetHeaderColumns(); } int columnResult = standardizer.GetColumnNumberOfFieldThatStartsWith(myHeaders, fieldNameToSearch); Assert.Equal(expectedColumnNumber, columnResult); }
public static Molecule AddRGrpMolToCoreMolecule(RgMolecule _rgMol, Molecule _coreMol, int _rgrpNum) { try { //Get RGroup position int rgrpPos = GetRGroupPosition(_coreMol, _rgrpNum); //Get Radical position int radPos = GetRadicalPosition(_rgMol); //Add Rgroup Molecule to Core Molecule AddRGrpMolToCoreMol(ref _coreMol, _rgMol, radPos); _coreMol.add(new MolBond(_coreMol.getAtom(rgrpPos).getBond(0).getAtom1(), _rgMol.getAtom(radPos))); _coreMol.removeNode(_coreMol.getNode(rgrpPos), CGraph.RMCLEANUP_ALL); _coreMol.clean(2, null, null); rgrpPos = GetRGroupPosition(_coreMol, _rgrpNum); _coreMol.removeNode(_coreMol.getNode(rgrpPos), CGraph.RMCLEANUP_ALL); _coreMol.clean(2, null, null); //chemaxon.reaction.Standardizer stnd = new chemaxon.reaction.Standardizer("removeexplicitH:radical"); //Molecule molCore = stnd.standardize(_coreMol); //Standardize the molecule Standardizer objStnd = new Standardizer(new File(@"C:\Documents and Settings\sairam.punyamantula\Desktop\stand.xml")); Molecule molCore = objStnd.standardize(_coreMol); molCore.clean(2, null, null); return(molCore); } catch (Exception ex) { PepsiLiteErrorHandling.WriteErrorLog(ex.ToString()); } return(null); }
public void GetMembers_MissingFields_ReturnsCorrectMembers() { string filePath = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx"; FileInfo spreadsheetFile = new FileInfo(filePath); string expectedFirst = "Tony"; string expectedLast = "Stark"; string expectedZip = null; int expectedCount = 4; Standardizer standardizer = new Standardizer(); List <Member> members = new List <Member>(); using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) { members = standardizer.GetMembers(package, 1); } Assert.Equal(expectedFirst, members[2].FirstName); Assert.Equal(expectedLast, members[2].LastName); Assert.Equal(expectedZip, members[2].ZipCode); Assert.Equal(expectedCount, members.Count); }
[DataRow("32(97-8+84)((", "32(97-8+84)")] // Multiple trailing left brackets public void FixBrackets_StripsEndLeftBrackets(string equation, string expected) { var actual = Standardizer.FixBrackets(equation); Assert.AreEqual(expected, actual); }
[DataRow("365(37+94(32", "365(37+94(32))")] // Multiple missing right brackets public void FixBrackets_AppendsRightBrackets(string equation, string expected) { var actual = Standardizer.FixBrackets(equation); Assert.AreEqual(expected, actual); }
public void Standardize_CombinationsStandardizeCorrectly(string equation, string expected) { var actual = Standardizer.Standardize(equation); Assert.AreEqual(expected, actual); }
public void AddMultSigns_PiAfterNumberAndBracketInsertsAsterix(string equation, string expected) { var actual = Standardizer.AddMultSigns(equation); Assert.AreEqual(expected, actual); }
static void Main(string[] args) { Console.WriteLine("\nBegin neural network Pain data regression demo\n"); Console.WriteLine("Goal is to predict the PSPI\n"); Console.Write("Hidden node:"); numHidden = Convert.ToInt32(Console.ReadLine()); Console.Write("Iterations:"); maxEpochs = Convert.ToInt32(Console.ReadLine()); Console.Write("Learn Rate:"); learnRate = Convert.ToDouble(Console.ReadLine()); // artificial; in realistic scenarios you'd read from a text file int numItems = 46641 * 2; Console.WriteLine("\nProgrammatically reading " + numItems + " training data items"); double[][] trainData = new double[numItems][]; //讀取所有特徵資料, 並標準化 StreamReader[] arrSR = new StreamReader[] { new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_FlipH_EquHist.csv"), //new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_EquHist.csv"), //new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_FlipH.csv"), new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64.csv") }; { string[][] RawTrainData = new string[trainData.Length][]; for (int k = 0; k < arrSR.Length; k++) { string Line; for (int i = 0; (Line = arrSR[k].ReadLine()) != null; i++) { string[] ReadLine_Array = Line.Split(','); string _PSPI = ReadLine_Array[1]; string[] PixelValue4096_PSPI = new string[numInput + 1]; for (int j = 0; j < numInput; j++) { PixelValue4096_PSPI[j] = ReadLine_Array[j + 2]; } PixelValue4096_PSPI[numInput] = _PSPI; RawTrainData[k * 46641 + i] = PixelValue4096_PSPI; } } string[] colTypes = new string[numInput + numOutput]; for (int i = 0; i < colTypes.Length; i++) { colTypes[i] = "numeric"; } Standardizer stder = new Standardizer(RawTrainData, colTypes); trainData = stder.StandardizeAll(RawTrainData); } //Console.WriteLine("\nTraining data:\n"); //Show.ShowMatrix(trainData, 3, 4, true); //呈現視覺化資料 //視覺化.ShowPlot(trainData); Console.WriteLine("\nCreating a " + numInput + "-" + numHidden + "-" + numOutput + " regression neural network"); Console.WriteLine("Using tanh hidden layer activation"); NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput, rndSeed); //Per-training //Console.WriteLine("\nPer-Training...\n"); //double[] perTrainData = new double[1024 + 1]; for (int i = 0; i < perTrainData.Length; i++) perTrainData[i] = 0.01; //nn.Train(new double[1][]{perTrainData}, 1000, learnRate, momentum); Console.WriteLine("\nSetting maxEpochs = " + maxEpochs); Console.WriteLine("Setting learnRate = " + learnRate.ToString("F4")); Console.WriteLine("Setting momentum = " + momentum.ToString("F4")); Console.WriteLine("\nStarting training (using stochastic back-propagation)"); double[] weights = nn.Train(trainData, maxEpochs, learnRate, momentum); Console.WriteLine("Finished training"); //Console.WriteLine("\nFinal neural network model weights:\n"); //ShowVector(weights, 4, 8, true); //讀取所有驗證資料, 並標準化 double[][] inputVector = new double[numItems][]; using (StreamReader SR = new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_FlipH.csv")) { string Line; string[][] RawVerificationData = new string[trainData.Length][]; for (int i = 0; (Line = SR.ReadLine()) != null; i++) { string[] ReadLine_Array = Line.Split(','); string _PSPI = ReadLine_Array[1]; string[] uniLBPfeature118Vector_PSPI = new string[numInput + 1]; for (int j = 0; j < numInput; j++) { uniLBPfeature118Vector_PSPI[j] = ReadLine_Array[j + 2]; } uniLBPfeature118Vector_PSPI[numInput] = _PSPI; RawVerificationData[i] = uniLBPfeature118Vector_PSPI; } string[] colTypes = new string[numInput + numOutput]; for (int i = 0; i < colTypes.Length; i++) { colTypes[i] = "numeric"; } Standardizer stder = new Standardizer(RawVerificationData, colTypes); inputVector = stder.StandardizeAll(RawVerificationData); } //驗證0~15範例, 並求出 MSE for (int target = 15, i = 0; i < inputVector.Length && -1 != target; i++) { if (target == inputVector[i][inputVector[i].Length - 1]) //inputVector[i][last] = PSPI { double Predicted = nn.ComputeOutputs(inputVector[i])[0]; String str = String.Format("Actual PSPI = {0} Predicted = {1}\n", target, Predicted); Console.Write(str); textStream += str; target--; } } //求SetB 皮爾森積差 double CORR = 0; double ActualPSPIAvg = 0, PredictedPSPIAvg = 0; double COVxy = 0, Sx = 0, Sy = 0; for (int i = 0; i < inputVector.Length; i++) {//算出實際和預測的平均值 PredictedPSPIAvg += nn.ComputeOutputs(inputVector[i])[0]; ActualPSPIAvg += inputVector[i][inputVector[i].Length - 1]; } PredictedPSPIAvg /= inputVector.Length; ActualPSPIAvg /= inputVector.Length; for (int i = 0; i < inputVector.Length; i++) {//求差 double Xerr = 0, Yerr = 0; Xerr = inputVector[i][inputVector[i].Length - 1] - ActualPSPIAvg; Yerr = nn.ComputeOutputs(inputVector[i])[0] - PredictedPSPIAvg; COVxy += Xerr * Yerr; Sx += Math.Pow(Xerr, 2); Sy += Math.Pow(Yerr, 2); } CORR = COVxy / Math.Pow(Sx * Sy, 0.5); String strCORR = String.Format("CORR = {0}\n", CORR); Console.Write(strCORR); textStream += strCORR; //結束 //求SetB MSE double MSE = 0; for (int i = 0; i < inputVector.Length; i++) { //nn.ComputeOutputs只看建構子的numInput讀資料長度,所以inputVector[i]最後一項y-data會自動被忽略。 double Predicted = nn.ComputeOutputs(inputVector[i])[0]; double Actual = inputVector[i][inputVector[i].Length - 1]; MSE += Math.Pow(Actual - Predicted, 2); } MSE = MSE / inputVector.Length; String strMSE = String.Format("MSE = {0}\n", MSE); Console.Write(strMSE); textStream += strMSE; //存檔 textStream += String.Format("\n" + numInput + "-" + numHidden + "-" + numOutput + " regression neural network\n"); textStream += String.Format("maxEpochs = " + maxEpochs + "\n"); textStream += String.Format("learnRate = " + learnRate.ToString("F4") + "\n"); textStream += String.Format("momentum = " + momentum.ToString("F4") + "\n"); textStream += String.Format("\nFinal neural network model weights:\n"); double[] weightsOfNN = nn.GetWeights(); foreach (double weight in weightsOfNN) { textStream += String.Format("{0},", weight); } textStream += String.Format("\n-----------------------------------------------------------------------\n"); File.AppendAllText("trained.txt", textStream); Console.WriteLine("\nEnd demo\n"); Console.ReadLine(); } // Main
static double[][] ReadDataSet(FileInfo dataSet, bool standardize) { string Line; StreamReader SR = new StreamReader(dataSet.FullName); int numItems = GetFileRows(dataSet.FullName) - 1;//取得該DataSet總資料筆數(減掉第一行資料標籤) double[][] trainData = new double[numItems][]; if (standardize) { string[][] RawTrainData = new string[trainData.Length][]; for (int i = -1; (Line = SR.ReadLine()) != null; i++) { if (-1 == i) { continue; //跳過最初行 } string[] ReadLine_Array = Line.Split(','); string _PSPI = ReadLine_Array[1]; string[] painFeatureVector_PSPI = new string[numInput + 1]; for (int j = 0; j < numInput; j++) { painFeatureVector_PSPI[j] = ReadLine_Array[j + 2]; } painFeatureVector_PSPI[numInput] = _PSPI; RawTrainData[i] = painFeatureVector_PSPI; } string[] colTypes = new string[numInput + numOutput]; for (int i = 0; i < colTypes.Length; i++) { colTypes[i] = "numeric"; } Standardizer stder = new Standardizer(RawTrainData, colTypes); trainData = stder.StandardizeAll(RawTrainData); } else {//不標準化 for (int i = -1; (Line = SR.ReadLine()) != null; i++) { if (-1 == i) { continue; //跳過最初行 } string[] ReadLine_Array = Line.Split(','); double _PSPI = Convert.ToDouble(ReadLine_Array[1]); double[] painFeatureVector_PSPI = new double[numInput + 1]; for (int j = 0; j < numInput; j++) { painFeatureVector_PSPI[j] = Convert.ToDouble(ReadLine_Array[j + 2]); } painFeatureVector_PSPI[numInput] = _PSPI; trainData[i] = painFeatureVector_PSPI; } } return(trainData); }
// 1 + 4 = 5 // 2 + 5 = 12 // 3 + 6 = 21 // 8 + 11 = ? static void Main(string[] args) { Console.WriteLine("\nBegin neural network regression demo\n"); Console.WriteLine("Goal is to predict the sin(x)"); //訓練資料 string[][] strTrainData = new string[][] { new string[] { "1", "4", "5" }, new string[] { "2", "5", "12" }, new string[] { "3", "6", "21" } }; double[][] trainData = new double[][] { new double[] { 1, 4, 5 }, new double[] { 2, 5, 12 }, new double[] { 3, 6, 21 } }; Standardizer s = new Standardizer(strTrainData, new string[] { "numeric", "numeric", "numeric" }); trainData = s.StandardizeAll(strTrainData); //測試資料 string[][] strTestData = new string[][] { new string[] { "0", "3", "0" }, new string[] { "2.5", "5.5", "0" }, new string[] { "8", "11", "0" } }; double[][] testData = new double[][] { new double[] { 0, 3, 0 }, new double[] { 2.5, 5.5, 0 }, new double[] { 8, 11, 0 } }; testData = s.StandardizeAll(strTestData); //類神經網路規格參數 int numInput = 2; // usually more int numHidden = 100; int numOutput = 1; // usual for regression int rndSeed = 0; Random rnd = new Random(1); Console.WriteLine("\nTraining data:\n"); Show.ShowMatrix(trainData, 3, 4, true); //呈現視覺化資料 //視覺化.ShowPlot(trainData); CvInvoke.WaitKey(1000); Console.WriteLine("\nCreating a " + numInput + "-" + numHidden + "-" + numOutput + " regression neural network"); Console.WriteLine("Using tanh hidden layer activation"); NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput, rndSeed); int maxEpochs = 3000; double learnRate = 0.008; double momentum = 0.001; Console.WriteLine("\nSetting maxEpochs = " + maxEpochs); Console.WriteLine("Setting learnRate = " + learnRate.ToString("F4")); Console.WriteLine("Setting momentum = " + momentum.ToString("F4")); Console.WriteLine("\nStarting training (using stochastic back-propagation)"); double[] weights = nn.Train(trainData, maxEpochs, learnRate, momentum); Console.WriteLine("Finished training"); Console.WriteLine("\nFinal neural network model weights:\n"); Show.ShowVector(weights, 4, 8, true); double[] y = nn.ComputeOutputs(testData[0]); foreach (double[] input in testData) { Console.WriteLine("\n {0} + {1} = {2} ", input[0], input[1], nn.ComputeOutputs(input)[0].ToString("F6")); } Console.WriteLine("\nEnd demo\n"); Console.ReadLine(); } // Main
public void Train(DataPackage data, CancellationToken token) { if (data is null) { throw new ArgumentNullException(nameof(data)); } log.Debug("Training with {0} records", data.Y.Length); standardizer = Standardizer.GetNumericStandardizer(data.X); var xTraining = data.X; var yTraining = data.Y; var xTesting = xTraining; var yTesting = yTraining; int testSize = 100; if (xTraining.Length > testSize * 4) { var training = xTraining.Length - testSize; xTesting = xTraining.Skip(training).ToArray(); yTesting = yTraining.Skip(training).ToArray(); xTraining = xTraining.Take(training).ToArray(); yTraining = yTraining.Take(training).ToArray(); } xTraining = standardizer.StandardizeAll(xTraining); // Instantiate a new Grid Search algorithm for Kernel Support Vector Machines var gridsearch = new GridSearch <SupportVectorMachine <Gaussian>, double[], int>() { // Here we can specify the range of the parameters to be included in the search ParameterRanges = new GridSearchRangeCollection { new GridSearchRange("complexity", new [] { 0.001, 0.01, 0.1, 1, 10 }), new GridSearchRange("gamma", new [] { 0.001, 0.01, 0.1, 1 }) }, // Indicate how learning algorithms for the models should be created Learner = p => new SequentialMinimalOptimization <Gaussian> { Complexity = p["complexity"], Kernel = new Gaussian { Gamma = p["gamma"] } }, // Define how the performance of the models should be measured Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual) }; gridsearch.Token = token; var randomized = new Random().Shuffle(xTraining, yTraining).ToArray(); yTraining = randomized[1].Cast <int>().ToArray(); xTraining = randomized[0].Cast <double[]>().ToArray(); var result = gridsearch.Learn(xTraining, yTraining); // Get the best SVM found during the parameter search SupportVectorMachine <Gaussian> svm = result.BestModel; // Instantiate the probabilistic calibration (using Platt's scaling) var calibration = new ProbabilisticOutputCalibration <Gaussian>(svm); // Run the calibration algorithm calibration.Learn(xTraining, yTraining); // returns the same machine model = calibration.Model; var predicted = ClassifyInternal(xTraining); var confusionMatrix = new GeneralConfusionMatrix(classes: 2, expected: yTraining, predicted: predicted); log.Debug("Performance on training dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore); predicted = Classify(xTesting); confusionMatrix = new GeneralConfusionMatrix(classes: 2, expected: yTesting, predicted: predicted); TestSetPerformance = confusionMatrix; log.Debug("Performance on testing dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore); }
public void FixBrackets_ExceptionOnTooManyRightBracks() => Standardizer.FixBrackets("(32*7)+4(3)+17)");
[DataRow("(35+7)RoOt(9)", "(35+7)@(9)")] // Mixedcase word root public void ReplaceSpecChars_ConvertsWordRootToAtSign(string equation, string expected) { var actual = Standardizer.ReplaceSpecChars(equation); Assert.AreEqual(expected, actual); }
public DetectionResults Filter(DocumentClusters document) { if (document.Clusters.Length < 3) { logger.Info("Not enought text clusters for clustering"); return(new DetectionResults(document.Clusters)); } double[][] observations = vectorSource.GetVectors(document.Clusters, NormalizationType.None); var standardizer = Standardizer.GetNumericStandardizer(observations); observations = standardizer.StandardizeAll(observations); var data = observations.ToArray(); for (int i = 0; i < observations.Length; i++) { for (int j = 0; j < observations[i].Length; j++) { if (double.IsNaN(observations[i][j])) { observations[i][j] = 0; } } } var teacher = new OneclassSupportVectorLearning <Gaussian> { Kernel = Gaussian.FromGamma(1.0 / data.Length), Nu = 0.5, Shrinking = true, Tolerance = 0.001 }; var svm = teacher.Learn(data); double[] prediction = svm.Score(data); Dictionary <int, List <double> > weights = new Dictionary <int, List <double> >(); for (int i = 0; i < prediction.Length; i++) { foreach (var sentenceItem in document.Clusters[i].Sentences) { if (!weights.TryGetValue(sentenceItem.Index, out var classType)) { classType = new List <double>(); weights[sentenceItem.Index] = classType; } classType.Add(prediction[i]); } } List <ProcessingTextBlock> anomaly = new List <ProcessingTextBlock>(); List <ProcessingTextBlock> resultData = new List <ProcessingTextBlock>(); List <SentenceItem> sentences = new List <SentenceItem>(); ProcessingTextBlock cluster; bool?lastResult = null; var cutoffIndex = (int)(weights.Count * 0.2); var cutoff = weights.Select(item => item.Value.Sum()).OrderBy(item => item).Skip(cutoffIndex).First(); var allSentences = document.Clusters.SelectMany(item => item.Sentences) .Distinct() .OrderBy(item => item.Index) .ToArray(); if (allSentences.Length != weights.Count) { throw new ArgumentOutOfRangeException(nameof(document), "Sentence length mismatch"); } foreach (var sentence in allSentences) { var current = weights[sentence.Index].Sum(); var result = current > cutoff; if (lastResult != null && result != lastResult) { cluster = new ProcessingTextBlock(sentences.ToArray()); sentences.Clear(); if (lastResult.Value) { resultData.Add(cluster); } else { anomaly.Add(cluster); } } sentences.Add(sentence); lastResult = result; } cluster = new ProcessingTextBlock(sentences.ToArray()); sentences.Clear(); if (lastResult.Value) { resultData.Add(cluster); } else { anomaly.Add(cluster); } StringBuilder builder = new StringBuilder(); foreach (var textCluster in anomaly) { foreach (var sentenceItem in textCluster.Sentences) { builder.AppendLine(sentenceItem.Text); } } return(new DetectionResults(resultData.ToArray(), anomaly.ToArray())); }