Esempio n. 1
0
        public void GetMembers_ValidMembers_ReturnsCorrectMembers()
        {
            string   filePath        = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx";
            FileInfo spreadsheetFile = new FileInfo(filePath);
            string   expectedFirst   = "Aegon";
            string   expectedLast    = "Targaryen";
            string   expectedZip     = "10003";
            int      expectedCount   = 4;

            string expectedAddress = "51-38 Codwise Pl";

            Standardizer standardizer = new Standardizer();

            List <Member> members = new List <Member>();

            using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) {
                members = standardizer.GetMembers(package, 0);
            }

            Assert.Equal(expectedFirst, members[0].FirstName);
            Assert.Equal(expectedLast, members[0].LastName);
            Assert.Equal(expectedZip, members[0].ZipCode);
            Assert.Equal(expectedAddress, members[1].Address);
            Assert.Equal(expectedCount, members.Count);
        }
Esempio n. 2
0
        private static Molecule StandardizeMolecule(Molecule mol, out bool ischiral_out)
        {
            Molecule molChem    = null;
            bool     blIsChiral = false;

            try
            {
                Standardizer molSdz = new Standardizer("absolutestereo:set");
                molChem = molSdz.standardize(mol);

                blIsChiral = molChem.isAbsStereo();

                #region Code Commented
                //string strDirPath = AppDomain.CurrentDomain.BaseDirectory.ToString();
                //string strXmlPath = strDirPath + "chiral.xml";
                //StandardizerConfiguration sconfing = new StandardizerConfiguration();
                //sconfing.read(strXmlPath);
                //Standardizer sdz = sconfing.getStandardizer();
                //molChem = sdz.standardize(mol);
                //Standardizer sdz = new Standardizer(new File(strXmlPath));
                #endregion

                ischiral_out = blIsChiral;
                return(molChem);
            }
            catch (Exception ex)
            {
                ErrorHandling.WriteErrorLog(ex.ToString());
            }
            ischiral_out = blIsChiral;
            return(molChem);
        }
Esempio n. 3
0
        public void ReplaceSpecChars_ConvertsRootSignToAtSign()
        {
            var equation = "(35+7)√(9)";
            var expected = "(35+7)@(9)";

            var actual = Standardizer.ReplaceSpecChars(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 4
0
        public void ReplaceSpecChars_ConvertsMultiplicationSignToAtSign()
        {
            var equation = "22+63×(2+7)";
            var expected = "22+63*(2+7)";

            var actual = Standardizer.ReplaceSpecChars(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 5
0
        public void ReplaceSpecChars_ConvertsDivisionSignToSlash()
        {
            var equation = "32+7÷(13-4)";
            var expected = "32+7/(13-4)";

            var actual = Standardizer.ReplaceSpecChars(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 6
0
        public void AddMultSigns_ClosingBracketBeforeOpeningBracketInsertsAsterix()
        {
            var equation = "(14-8)(6+7)+9";
            var expected = "(14-8)*(6+7)+9";

            var actual = Standardizer.AddMultSigns(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 7
0
        public void AddMultSigns_RootAfterPiInsertsAsterix()
        {
            var equation = "155315(#@(14)(37^3))";
            var expected = "155315*(#*@(14)*(37^3))";

            var actual = Standardizer.AddMultSigns(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 8
0
        public void ReplaceSpecChars_ConvertsPiSignToHashtag()
        {
            var equation = "14(32-7)/π";
            var expected = "14(32-7)/#";

            var actual = Standardizer.ReplaceSpecChars(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 9
0
        public void Standardize_RemovesSpaces()
        {
            var equation = "34 + 97 * 3 - 7";
            var expected = "34+97*3-7";

            var actual = Standardizer.Standardize(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 10
0
        public void GetHeaders_Worksheet0_ReturnsCorrectHeaders()
        {
            string   filePath            = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx";
            FileInfo spreadsheetFile     = new FileInfo(filePath);
            string   expectedFirstColumn = "Last";
            string   expectedLastColumn  = "E-Mail address";

            Standardizer standardizer = new Standardizer();

            List <string> headers = new List <string>();

            using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) {
                headers = standardizer.GetHeaders(package, 0);
            }

            Assert.Equal(expectedFirstColumn, headers[0]);
            Assert.Equal(expectedLastColumn, headers[7]);
        }
Esempio n. 11
0
        public void GetLastNameColumnNumber_ColumnExists_ReturnsColNumber()
        {
            string       filePath             = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx";
            FileInfo     spreadsheetFile      = new FileInfo(filePath);
            Standardizer standardizer         = new Standardizer();
            int          expectedColumnNumber = 1;

            List <string> myHeaders;

            // Act
            using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) {
                ExcelWorksheet worksheet = package.Workbook.Worksheets[0];
                myHeaders = worksheet.GetHeaderColumns();
            }

            int columnResult = standardizer.GetLastNameColumnNumber(myHeaders);

            Assert.Equal(expectedColumnNumber, columnResult);
        }
Esempio n. 12
0
        public void GetColumnNumberOfFieldThatStartsWith_ColumnDoesNotExist_ReturnsZero()
        {
            string       filePath             = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx";
            FileInfo     spreadsheetFile      = new FileInfo(filePath);
            Standardizer standardizer         = new Standardizer();
            string       fieldNameToSearch    = "Favorite Weapon";
            int          expectedColumnNumber = 0;

            List <string> myHeaders;

            // Act
            using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) {
                ExcelWorksheet worksheet = package.Workbook.Worksheets[0];
                myHeaders = worksheet.GetHeaderColumns();
            }

            int columnResult = standardizer.GetColumnNumberOfFieldThatStartsWith(myHeaders, fieldNameToSearch);

            Assert.Equal(expectedColumnNumber, columnResult);
        }
Esempio n. 13
0
        public static Molecule AddRGrpMolToCoreMolecule(RgMolecule _rgMol, Molecule _coreMol, int _rgrpNum)
        {
            try
            {
                //Get RGroup position
                int rgrpPos = GetRGroupPosition(_coreMol, _rgrpNum);

                //Get Radical position
                int radPos = GetRadicalPosition(_rgMol);

                //Add Rgroup Molecule to Core Molecule
                AddRGrpMolToCoreMol(ref _coreMol, _rgMol, radPos);

                _coreMol.add(new MolBond(_coreMol.getAtom(rgrpPos).getBond(0).getAtom1(), _rgMol.getAtom(radPos)));

                _coreMol.removeNode(_coreMol.getNode(rgrpPos), CGraph.RMCLEANUP_ALL);
                _coreMol.clean(2, null, null);

                rgrpPos = GetRGroupPosition(_coreMol, _rgrpNum);

                _coreMol.removeNode(_coreMol.getNode(rgrpPos), CGraph.RMCLEANUP_ALL);
                _coreMol.clean(2, null, null);

                //chemaxon.reaction.Standardizer stnd = new chemaxon.reaction.Standardizer("removeexplicitH:radical");
                //Molecule molCore = stnd.standardize(_coreMol);

                //Standardize the molecule
                Standardizer objStnd = new Standardizer(new File(@"C:\Documents and Settings\sairam.punyamantula\Desktop\stand.xml"));
                Molecule     molCore = objStnd.standardize(_coreMol);

                molCore.clean(2, null, null);

                return(molCore);
            }
            catch (Exception ex)
            {
                PepsiLiteErrorHandling.WriteErrorLog(ex.ToString());
            }
            return(null);
        }
Esempio n. 14
0
        public void GetMembers_MissingFields_ReturnsCorrectMembers()
        {
            string   filePath        = @"C:\Users\Prime Time Pauly G\Documents\ProgHackNight TestAddresses.xlsx";
            FileInfo spreadsheetFile = new FileInfo(filePath);
            string   expectedFirst   = "Tony";
            string   expectedLast    = "Stark";
            string   expectedZip     = null;
            int      expectedCount   = 4;

            Standardizer standardizer = new Standardizer();

            List <Member> members = new List <Member>();

            using (ExcelPackage package = new ExcelPackage(spreadsheetFile)) {
                members = standardizer.GetMembers(package, 1);
            }

            Assert.Equal(expectedFirst, members[2].FirstName);
            Assert.Equal(expectedLast, members[2].LastName);
            Assert.Equal(expectedZip, members[2].ZipCode);
            Assert.Equal(expectedCount, members.Count);
        }
Esempio n. 15
0
        [DataRow("32(97-8+84)((", "32(97-8+84)")] // Multiple trailing left brackets
        public void FixBrackets_StripsEndLeftBrackets(string equation, string expected)
        {
            var actual = Standardizer.FixBrackets(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 16
0
        [DataRow("365(37+94(32", "365(37+94(32))")] // Multiple missing right brackets
        public void FixBrackets_AppendsRightBrackets(string equation, string expected)
        {
            var actual = Standardizer.FixBrackets(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 17
0
        public void Standardize_CombinationsStandardizeCorrectly(string equation, string expected)
        {
            var actual = Standardizer.Standardize(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 18
0
        public void AddMultSigns_PiAfterNumberAndBracketInsertsAsterix(string equation, string expected)
        {
            var actual = Standardizer.AddMultSigns(equation);

            Assert.AreEqual(expected, actual);
        }
Esempio n. 19
0
        static void Main(string[] args)
        {
            Console.WriteLine("\nBegin neural network Pain data regression demo\n");
            Console.WriteLine("Goal is to predict the PSPI\n");

            Console.Write("Hidden node:"); numHidden = Convert.ToInt32(Console.ReadLine());
            Console.Write("Iterations:"); maxEpochs  = Convert.ToInt32(Console.ReadLine());
            Console.Write("Learn Rate:"); learnRate  = Convert.ToDouble(Console.ReadLine());

            // artificial; in realistic scenarios you'd read from a text file
            int numItems = 46641 * 2;

            Console.WriteLine("\nProgrammatically reading " + numItems + " training data items");

            double[][] trainData = new double[numItems][];

            //讀取所有特徵資料, 並標準化
            StreamReader[] arrSR = new StreamReader[] {
                new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_FlipH_EquHist.csv"),
                //new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_EquHist.csv"),
                //new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_FlipH.csv"),
                new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64.csv")
            };
            {
                string[][] RawTrainData = new string[trainData.Length][];
                for (int k = 0; k < arrSR.Length; k++)
                {
                    string Line;
                    for (int i = 0; (Line = arrSR[k].ReadLine()) != null; i++)
                    {
                        string[] ReadLine_Array      = Line.Split(',');
                        string   _PSPI               = ReadLine_Array[1];
                        string[] PixelValue4096_PSPI = new string[numInput + 1];

                        for (int j = 0; j < numInput; j++)
                        {
                            PixelValue4096_PSPI[j] = ReadLine_Array[j + 2];
                        }

                        PixelValue4096_PSPI[numInput] = _PSPI;
                        RawTrainData[k * 46641 + i]   = PixelValue4096_PSPI;
                    }
                }

                string[] colTypes = new string[numInput + numOutput];
                for (int i = 0; i < colTypes.Length; i++)
                {
                    colTypes[i] = "numeric";
                }
                Standardizer stder = new Standardizer(RawTrainData, colTypes);
                trainData = stder.StandardizeAll(RawTrainData);
            }


            //Console.WriteLine("\nTraining data:\n");
            //Show.ShowMatrix(trainData, 3, 4, true);

            //呈現視覺化資料
            //視覺化.ShowPlot(trainData);

            Console.WriteLine("\nCreating a " + numInput + "-" + numHidden + "-" + numOutput + " regression neural network");
            Console.WriteLine("Using tanh hidden layer activation");
            NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput, rndSeed);

            //Per-training
            //Console.WriteLine("\nPer-Training...\n");
            //double[] perTrainData = new double[1024 + 1]; for (int i = 0; i < perTrainData.Length; i++) perTrainData[i] = 0.01;
            //nn.Train(new double[1][]{perTrainData}, 1000, learnRate, momentum);

            Console.WriteLine("\nSetting maxEpochs = " + maxEpochs);
            Console.WriteLine("Setting learnRate = " + learnRate.ToString("F4"));
            Console.WriteLine("Setting momentum  = " + momentum.ToString("F4"));

            Console.WriteLine("\nStarting training (using stochastic back-propagation)");
            double[] weights = nn.Train(trainData, maxEpochs, learnRate, momentum);
            Console.WriteLine("Finished training");
            //Console.WriteLine("\nFinal neural network model weights:\n");
            //ShowVector(weights, 4, 8, true);

            //讀取所有驗證資料, 並標準化
            double[][] inputVector = new double[numItems][];
            using (StreamReader SR = new StreamReader(@"C:\Users\deo\Google 雲端硬碟\碩士論文\實驗用資料庫\PoolingTestSet\UNBC_PainFaceMaxPoolingPixel_64x64_FlipH.csv"))
            {
                string     Line;
                string[][] RawVerificationData = new string[trainData.Length][];

                for (int i = 0; (Line = SR.ReadLine()) != null; i++)
                {
                    string[] ReadLine_Array = Line.Split(',');
                    string   _PSPI          = ReadLine_Array[1];
                    string[] uniLBPfeature118Vector_PSPI = new string[numInput + 1];

                    for (int j = 0; j < numInput; j++)
                    {
                        uniLBPfeature118Vector_PSPI[j] = ReadLine_Array[j + 2];
                    }

                    uniLBPfeature118Vector_PSPI[numInput] = _PSPI;
                    RawVerificationData[i] = uniLBPfeature118Vector_PSPI;
                }

                string[] colTypes = new string[numInput + numOutput];
                for (int i = 0; i < colTypes.Length; i++)
                {
                    colTypes[i] = "numeric";
                }
                Standardizer stder = new Standardizer(RawVerificationData, colTypes);
                inputVector = stder.StandardizeAll(RawVerificationData);
            }

            //驗證0~15範例, 並求出 MSE
            for (int target = 15, i = 0; i < inputVector.Length && -1 != target; i++)
            {
                if (target == inputVector[i][inputVector[i].Length - 1]) //inputVector[i][last] = PSPI
                {
                    double Predicted = nn.ComputeOutputs(inputVector[i])[0];
                    String str       = String.Format("Actual PSPI = {0}   Predicted = {1}\n", target, Predicted);
                    Console.Write(str);
                    textStream += str;
                    target--;
                }
            }

            //求SetB 皮爾森積差
            double CORR = 0;
            double ActualPSPIAvg = 0, PredictedPSPIAvg = 0;
            double COVxy = 0, Sx = 0, Sy = 0;

            for (int i = 0; i < inputVector.Length; i++)
            {//算出實際和預測的平均值
                PredictedPSPIAvg += nn.ComputeOutputs(inputVector[i])[0];
                ActualPSPIAvg    += inputVector[i][inputVector[i].Length - 1];
            }
            PredictedPSPIAvg /= inputVector.Length;
            ActualPSPIAvg    /= inputVector.Length;

            for (int i = 0; i < inputVector.Length; i++)
            {//求差
                double Xerr = 0, Yerr = 0;
                Xerr   = inputVector[i][inputVector[i].Length - 1] - ActualPSPIAvg;
                Yerr   = nn.ComputeOutputs(inputVector[i])[0] - PredictedPSPIAvg;
                COVxy += Xerr * Yerr;
                Sx    += Math.Pow(Xerr, 2);
                Sy    += Math.Pow(Yerr, 2);
            }
            CORR = COVxy / Math.Pow(Sx * Sy, 0.5);
            String strCORR = String.Format("CORR = {0}\n", CORR);

            Console.Write(strCORR);
            textStream += strCORR;
            //結束

            //求SetB MSE
            double MSE = 0;

            for (int i = 0; i < inputVector.Length; i++)
            {
                //nn.ComputeOutputs只看建構子的numInput讀資料長度,所以inputVector[i]最後一項y-data會自動被忽略。
                double Predicted = nn.ComputeOutputs(inputVector[i])[0];
                double Actual    = inputVector[i][inputVector[i].Length - 1];
                MSE += Math.Pow(Actual - Predicted, 2);
            }
            MSE = MSE / inputVector.Length;
            String strMSE = String.Format("MSE = {0}\n", MSE);

            Console.Write(strMSE);
            textStream += strMSE;

            //存檔
            textStream += String.Format("\n" + numInput + "-" + numHidden + "-" + numOutput + " regression neural network\n");
            textStream += String.Format("maxEpochs = " + maxEpochs + "\n");
            textStream += String.Format("learnRate = " + learnRate.ToString("F4") + "\n");
            textStream += String.Format("momentum  = " + momentum.ToString("F4") + "\n");

            textStream += String.Format("\nFinal neural network model weights:\n");
            double[] weightsOfNN = nn.GetWeights();
            foreach (double weight in weightsOfNN)
            {
                textStream += String.Format("{0},", weight);
            }

            textStream += String.Format("\n-----------------------------------------------------------------------\n");

            File.AppendAllText("trained.txt", textStream);
            Console.WriteLine("\nEnd demo\n");
            Console.ReadLine();
        } // Main
        static double[][] ReadDataSet(FileInfo dataSet, bool standardize)
        {
            string       Line;
            StreamReader SR       = new StreamReader(dataSet.FullName);
            int          numItems = GetFileRows(dataSet.FullName) - 1;//取得該DataSet總資料筆數(減掉第一行資料標籤)

            double[][] trainData = new double[numItems][];

            if (standardize)
            {
                string[][] RawTrainData = new string[trainData.Length][];

                for (int i = -1; (Line = SR.ReadLine()) != null; i++)
                {
                    if (-1 == i)
                    {
                        continue;         //跳過最初行
                    }
                    string[] ReadLine_Array         = Line.Split(',');
                    string   _PSPI                  = ReadLine_Array[1];
                    string[] painFeatureVector_PSPI = new string[numInput + 1];

                    for (int j = 0; j < numInput; j++)
                    {
                        painFeatureVector_PSPI[j] = ReadLine_Array[j + 2];
                    }

                    painFeatureVector_PSPI[numInput] = _PSPI;
                    RawTrainData[i] = painFeatureVector_PSPI;
                }

                string[] colTypes = new string[numInput + numOutput];
                for (int i = 0; i < colTypes.Length; i++)
                {
                    colTypes[i] = "numeric";
                }
                Standardizer stder = new Standardizer(RawTrainData, colTypes);
                trainData = stder.StandardizeAll(RawTrainData);
            }
            else
            {//不標準化
                for (int i = -1; (Line = SR.ReadLine()) != null; i++)
                {
                    if (-1 == i)
                    {
                        continue;         //跳過最初行
                    }
                    string[] ReadLine_Array         = Line.Split(',');
                    double   _PSPI                  = Convert.ToDouble(ReadLine_Array[1]);
                    double[] painFeatureVector_PSPI = new double[numInput + 1];

                    for (int j = 0; j < numInput; j++)
                    {
                        painFeatureVector_PSPI[j] = Convert.ToDouble(ReadLine_Array[j + 2]);
                    }

                    painFeatureVector_PSPI[numInput] = _PSPI;
                    trainData[i] = painFeatureVector_PSPI;
                }
            }
            return(trainData);
        }
        //  1 + 4 = 5
        //  2 + 5 = 12
        //  3 + 6 = 21
        //  8 + 11 = ?

        static void Main(string[] args)
        {
            Console.WriteLine("\nBegin neural network regression demo\n");
            Console.WriteLine("Goal is to predict the sin(x)");

            //訓練資料
            string[][] strTrainData = new string[][] { new string[] { "1", "4", "5" },
                                                       new string[] { "2", "5", "12" },
                                                       new string[] { "3", "6", "21" } };
            double[][] trainData = new double[][] { new double[] { 1, 4, 5 },
                                                    new double[] { 2, 5, 12 },
                                                    new double[] { 3, 6, 21 } };
            Standardizer s = new Standardizer(strTrainData, new string[] { "numeric", "numeric", "numeric" });

            trainData = s.StandardizeAll(strTrainData);


            //測試資料
            string[][] strTestData = new string[][] { new string[] { "0", "3", "0" },
                                                      new string[] { "2.5", "5.5", "0" },
                                                      new string[] { "8", "11", "0" } };
            double[][] testData = new double[][] { new double[] { 0, 3, 0 },
                                                   new double[] { 2.5, 5.5, 0 },
                                                   new double[] { 8, 11, 0 } };
            testData = s.StandardizeAll(strTestData);


            //類神經網路規格參數
            int numInput  = 2; // usually more
            int numHidden = 100;
            int numOutput = 1; // usual for regression
            int rndSeed   = 0;

            Random rnd = new Random(1);



            Console.WriteLine("\nTraining data:\n");
            Show.ShowMatrix(trainData, 3, 4, true);

            //呈現視覺化資料
            //視覺化.ShowPlot(trainData);
            CvInvoke.WaitKey(1000);

            Console.WriteLine("\nCreating a " + numInput + "-" +
                              numHidden + "-" + numOutput + " regression neural network");
            Console.WriteLine("Using tanh hidden layer activation");

            NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput, rndSeed);

            int    maxEpochs = 3000;
            double learnRate = 0.008;
            double momentum  = 0.001;

            Console.WriteLine("\nSetting maxEpochs = " + maxEpochs);
            Console.WriteLine("Setting learnRate = " + learnRate.ToString("F4"));
            Console.WriteLine("Setting momentum  = " + momentum.ToString("F4"));

            Console.WriteLine("\nStarting training (using stochastic back-propagation)");
            double[] weights = nn.Train(trainData, maxEpochs, learnRate, momentum);
            Console.WriteLine("Finished training");
            Console.WriteLine("\nFinal neural network model weights:\n");
            Show.ShowVector(weights, 4, 8, true);

            double[] y = nn.ComputeOutputs(testData[0]);

            foreach (double[] input in testData)
            {
                Console.WriteLine("\n {0} + {1} = {2} ", input[0], input[1], nn.ComputeOutputs(input)[0].ToString("F6"));
            }

            Console.WriteLine("\nEnd demo\n");
            Console.ReadLine();
        } // Main
Esempio n. 22
0
        public void Train(DataPackage data, CancellationToken token)
        {
            if (data is null)
            {
                throw new ArgumentNullException(nameof(data));
            }

            log.Debug("Training with {0} records", data.Y.Length);

            standardizer = Standardizer.GetNumericStandardizer(data.X);
            var xTraining = data.X;
            var yTraining = data.Y;

            var xTesting = xTraining;
            var yTesting = yTraining;

            int testSize = 100;

            if (xTraining.Length > testSize * 4)
            {
                var training = xTraining.Length - testSize;
                xTesting  = xTraining.Skip(training).ToArray();
                yTesting  = yTraining.Skip(training).ToArray();
                xTraining = xTraining.Take(training).ToArray();
                yTraining = yTraining.Take(training).ToArray();
            }

            xTraining = standardizer.StandardizeAll(xTraining);
            // Instantiate a new Grid Search algorithm for Kernel Support Vector Machines
            var gridsearch = new GridSearch <SupportVectorMachine <Gaussian>, double[], int>()
            {
                // Here we can specify the range of the parameters to be included in the search
                ParameterRanges = new GridSearchRangeCollection
                {
                    new GridSearchRange("complexity", new [] { 0.001, 0.01, 0.1, 1, 10 }),
                    new GridSearchRange("gamma", new [] { 0.001, 0.01, 0.1, 1 })
                },

                // Indicate how learning algorithms for the models should be created
                Learner = p => new SequentialMinimalOptimization <Gaussian>
                {
                    Complexity = p["complexity"],
                    Kernel     = new Gaussian
                    {
                        Gamma = p["gamma"]
                    }
                },

                // Define how the performance of the models should be measured
                Loss = (actual, expected, m) => new ZeroOneLoss(expected).Loss(actual)
            };

            gridsearch.Token = token;

            var randomized = new Random().Shuffle(xTraining, yTraining).ToArray();

            yTraining = randomized[1].Cast <int>().ToArray();
            xTraining = randomized[0].Cast <double[]>().ToArray();

            var result = gridsearch.Learn(xTraining, yTraining);

            // Get the best SVM found during the parameter search
            SupportVectorMachine <Gaussian> svm = result.BestModel;

            // Instantiate the probabilistic calibration (using Platt's scaling)
            var calibration = new ProbabilisticOutputCalibration <Gaussian>(svm);

            // Run the calibration algorithm
            calibration.Learn(xTraining, yTraining); // returns the same machine
            model = calibration.Model;
            var predicted       = ClassifyInternal(xTraining);
            var confusionMatrix = new GeneralConfusionMatrix(classes: 2, expected: yTraining, predicted: predicted);

            log.Debug("Performance on training dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore);

            predicted          = Classify(xTesting);
            confusionMatrix    = new GeneralConfusionMatrix(classes: 2, expected: yTesting, predicted: predicted);
            TestSetPerformance = confusionMatrix;
            log.Debug("Performance on testing dataset . F1(0):{0} F1(1):{1}", confusionMatrix.PerClassMatrices[0].FScore, confusionMatrix.PerClassMatrices[1].FScore);
        }
Esempio n. 23
0
 public void FixBrackets_ExceptionOnTooManyRightBracks()
 => Standardizer.FixBrackets("(32*7)+4(3)+17)");
Esempio n. 24
0
        [DataRow("(35+7)RoOt(9)", "(35+7)@(9)")] // Mixedcase word root
        public void ReplaceSpecChars_ConvertsWordRootToAtSign(string equation, string expected)
        {
            var actual = Standardizer.ReplaceSpecChars(equation);

            Assert.AreEqual(expected, actual);
        }
        public DetectionResults Filter(DocumentClusters document)
        {
            if (document.Clusters.Length < 3)
            {
                logger.Info("Not enought text clusters for clustering");
                return(new DetectionResults(document.Clusters));
            }

            double[][] observations = vectorSource.GetVectors(document.Clusters, NormalizationType.None);
            var        standardizer = Standardizer.GetNumericStandardizer(observations);

            observations = standardizer.StandardizeAll(observations);
            var data = observations.ToArray();

            for (int i = 0; i < observations.Length; i++)
            {
                for (int j = 0; j < observations[i].Length; j++)
                {
                    if (double.IsNaN(observations[i][j]))
                    {
                        observations[i][j] = 0;
                    }
                }
            }

            var teacher = new OneclassSupportVectorLearning <Gaussian>
            {
                Kernel    = Gaussian.FromGamma(1.0 / data.Length),
                Nu        = 0.5,
                Shrinking = true,
                Tolerance = 0.001
            };

            var svm = teacher.Learn(data);

            double[] prediction = svm.Score(data);

            Dictionary <int, List <double> > weights = new Dictionary <int, List <double> >();

            for (int i = 0; i < prediction.Length; i++)
            {
                foreach (var sentenceItem in document.Clusters[i].Sentences)
                {
                    if (!weights.TryGetValue(sentenceItem.Index, out var classType))
                    {
                        classType = new List <double>();
                        weights[sentenceItem.Index] = classType;
                    }

                    classType.Add(prediction[i]);
                }
            }

            List <ProcessingTextBlock> anomaly    = new List <ProcessingTextBlock>();
            List <ProcessingTextBlock> resultData = new List <ProcessingTextBlock>();
            List <SentenceItem>        sentences  = new List <SentenceItem>();
            ProcessingTextBlock        cluster;
            bool?lastResult   = null;
            var  cutoffIndex  = (int)(weights.Count * 0.2);
            var  cutoff       = weights.Select(item => item.Value.Sum()).OrderBy(item => item).Skip(cutoffIndex).First();
            var  allSentences = document.Clusters.SelectMany(item => item.Sentences)
                                .Distinct()
                                .OrderBy(item => item.Index)
                                .ToArray();

            if (allSentences.Length != weights.Count)
            {
                throw new ArgumentOutOfRangeException(nameof(document), "Sentence length mismatch");
            }

            foreach (var sentence in allSentences)
            {
                var current = weights[sentence.Index].Sum();
                var result  = current > cutoff;
                if (lastResult != null &&
                    result != lastResult)
                {
                    cluster = new ProcessingTextBlock(sentences.ToArray());
                    sentences.Clear();
                    if (lastResult.Value)
                    {
                        resultData.Add(cluster);
                    }
                    else
                    {
                        anomaly.Add(cluster);
                    }
                }

                sentences.Add(sentence);
                lastResult = result;
            }

            cluster = new ProcessingTextBlock(sentences.ToArray());
            sentences.Clear();
            if (lastResult.Value)
            {
                resultData.Add(cluster);
            }
            else
            {
                anomaly.Add(cluster);
            }

            StringBuilder builder = new StringBuilder();

            foreach (var textCluster in anomaly)
            {
                foreach (var sentenceItem in textCluster.Sentences)
                {
                    builder.AppendLine(sentenceItem.Text);
                }
            }

            return(new DetectionResults(resultData.ToArray(), anomaly.ToArray()));
        }