Exemplo n.º 1
0
        /**
         * Builds the classifier
         *
         * @param train the training data to be used for generating the
         * boosted classifier.
         * @throws Exception if the classifier could not be built successfully
         */
        public void buildClassifier()
        {
            Console.ForegroundColor = ConsoleColor.White;
            Console.WriteLine("Extracting Data");
            for (int i = 2; i >= 0; i--)
            {
                Console.Write(".");
                Thread.Sleep(330);
                Console.Write(".");
                Thread.Sleep(330);
                Console.Write(".");
                Thread.Sleep(330);
                Console.Write("\b\b\b   \b\b\b");
                Thread.Sleep(330);
            }
            char[] delimiters = new char[] { '\r', '\n' };
            /* Load Labeled Data */
            StreamReader s = new StreamReader("traindata.txt");

            /* Read the data in the file */
            string AllData = s.ReadToEnd();

            /* Split off each row at the Carriage Return/Line Feed
               This will work for Excel, Access, etc. default exports */
            string[] rows = AllData.Split(delimiters, StringSplitOptions.RemoveEmptyEntries);
            int i_1 = 0, j_1 = 0;

            double[,] Data = new double[rows.Length, 20];

            /* Add each row to the matrix */
            foreach (string r in rows)
            {
                /* Split the row at the delimiter */
                string[] items = r.Split('\t');
                foreach (string ite in items)
                {
                    Data[i_1, j_1] = Convert.ToDouble(ite);
                    j_1++;
                }
                j_1 = 0;
                i_1++;
            }
            s.Close();

            //const int m_ClassIndex = 19;
            //const int m_NumClasses = 6;

            int nK = m_NumClasses - 1;                  // Only K-1 class labels needed 
            int nR = Data.GetLength(1) - 1;             // Parameters
            int nC = Data.GetLength(0);                 // Trained Instances

            double[,] m_Data = new double[nC, nR + 1];      // Data in new format
            int[] Y = new int[nC];                          // Class labels
            double[] xMean = new double[nR + 1];            // Attribute means
            double[] xSD = new double[nR + 1];              // Attribute stddev's
            double[] sY = new double[nK + 1];               // Number of classes
            double[,] m_Par = new double[nR + 1, nK];       // Optimized parameter values

            for (int i = 0; i < nC; i++)
            {
                // initialize X[][]
                Y[i] = (int)(Data[i, 19] - 1);
                m_Data[i, 0] = 1;
                int j = 1;
                for (int k = 0; k <= nR; k++)
                {
                    if (k != m_ClassIndex)
                    {
                        double z = Data[i, k];
                        m_Data[i, j] = z;
                        xMean[j] += z;
                        xSD[j] += z * z;
                        j++;
                    }
                }
                // Class count
                sY[Y[i]]++;
            }
            xMean[0] = 0; xSD[0] = 1;
            for (int j = 1; j <= nR; j++)
            {
                xMean[j] = xMean[j] / nC;
                xSD[j] = Math.Sqrt(Math.Abs(xSD[j] - nC * xMean[j] * xMean[j]) / (nC - 1));
            }
            if (m_Debug)
            {
                // Output stats about input data
                Console.WriteLine("Descriptives...");
                for (int m = 0; m <= nK; m++)
                    Console.WriteLine("{0} cases have class {1}", sY[m], m);
                Console.WriteLine("\n Variable            Avg                   SD    ");
                for (int j = 1; j <= nR; j++)
                    Console.WriteLine("   {0}        {1}       {2}", j, xMean[j], xSD[j]);
            }

            // Normalise input data 
            for (int i = 0; i < nC; i++)
            {
                for (int j = 0; j <= nR; j++)
                {
                    if (xSD[j] != 0)
                        m_Data[i, j] = (m_Data[i, j] - xMean[j]) / xSD[j];
                }
            }

            Console.WriteLine("\nBuilding Model(This may take a while)");

            double[] x = new double[(nR + 1) * nK];
            double[,] b = new double[2, x.Length];// Boundary constraints, N/A here

            // Initialize
            for (int p = 0; p < nK; p++)
            {
                int offset = p * (nR + 1);
                x[offset] = Math.Log(sY[p] + 1.0) - Math.Log(sY[nK] + 1.0); // Null model
                b[0, offset] = Double.NaN;
                b[1, offset] = Double.NaN;
                for (int q = 1; q <= nR; q++)
                {
                    x[offset + q] = 0.0;
                    b[0, offset + q] = Double.NaN;
                    b[1, offset + q] = Double.NaN;
                }
            }
            setVar(m_Data, m_Par, xMean, xSD);
            LogicOpt opt = new LogicOpt();
            opt.setClassLabels(Y);

            if (m_MaxIts == -1) // Search until convergence
            {
                x = opt.findArgMin(x, b);
                while (x == null)
                {
                    x = opt.getVarbValues();
                    Console.WriteLine("200 iterations finished, not enough!");
                    x = opt.findArgMin(x, b);
                }
                Console.WriteLine(" -------------<Converged>--------------");
            }
            else
            {
                opt.setMaxIteration(m_MaxIts);
                x = opt.findArgMin(x, b);
                if (x == null) // Not enough, but use the current value
                    x = opt.getVarbValues();
            }

            m_LL = -opt.getMinFunction(); // Log-likelihood

            // Convert coefficients back to non-normalized attribute units
            for (int i = 0; i < nK; i++)
            {
                m_Par[0, i] = x[i * (nR + 1)];
                for (int j = 1; j <= nR; j++)
                {
                    m_Par[j, i] = x[i * (nR + 1) + j];
                    if (xSD[j] != 0)
                    {
                        m_Par[j, i] /= xSD[j];
                        m_Par[0, i] -= m_Par[j, i] * xMean[j];
                    }
                }
            }
            // Copy for global Parameters 
            setVar(m_Data, m_Par, xMean, xSD);
            // Don't need data matrix anymore
            m_Data = null;

        }
Exemplo n.º 2
0
 public WhereSegmentNode(WhereNode left, LogicOpt opt, WhereNode right)
 {
     this.left  = left;
     this.opt   = opt;
     this.right = right;
 }