示例#1
0
        //////////////////////////////////////////////////////
        // Mutator:
        //	CDTNode::Train
        //
        // Purpose:
        //	Calculates the information gain for the remaining
        //	attributes, the entropy of the remaining examples
        //	and creates splits/sub-nodes
        //////////////////////////////////////////////////////
        public void Train()
        {
            // Note:
            // I need a better approach to situations where noe more attributes
            // are left to split on, but multiple classes exist in the examples.


            // If we don't have any examples to train on, then
            // this node HAS to be a leaf. We will keep the classification
            // given to us which was the most common class from the parent node
            if (m_rwExampleIDs.Count() == 0)
            {
                m_bIsLeaf = true;

                return;
            }

            m_bIsLeaf  = false;
            m_fEntropy = GetEntropy();

            //assert( m_fEntropy >= 0.0f );
            //assert( m_fEntropy <= ( (float)m_pDecisionTree->GetNumClasses() - 1.0f ) );

            // If we don't have any entropy, then go ahead and treat this like a leaf
            // since we don't have any reason to calculate any more
            if (m_fEntropy == 0.0f)
            {
                m_bIsLeaf = true;
                m_iClass  = m_pDecisionTree.GetExample(m_rwExampleIDs[0]).GetClassIdentifier();

                return;
            }

            // Calculate the gains for the attributes

            int   iAttributeID     = 0;
            int   i                = 0;
            int   iBestClassCount  = 0;                                    // The number of times the most common class appears
            int   iBestAttributeID = m_rwRemainingAttributeIDs[0];         // Treat the first attribute initially as the best
            float fBestGain        = GetInformationGain(iBestAttributeID); // Save the information gain from the first attribute

            m_rwInformationGain = new List <float>();

            for (i = 0; i < m_pDecisionTree.GetNumAttributes(); ++i)
            {
                m_rwInformationGain.Add(0.0f);
            }

            // Put the gain we just calculated into the proper place
            m_rwInformationGain[iBestAttributeID] = fBestGain;

            // Find the information gain for each attribute and store it
            // while keeping track of the best gain and the attribute
            // that it goes with.
            int iRemainingAttributeCount = m_rwRemainingAttributeIDs.Count();

            for (i = 1; i < iRemainingAttributeCount; ++i)
            {
                iAttributeID = m_rwRemainingAttributeIDs[i];
                m_rwInformationGain[iAttributeID] = GetInformationGain(iAttributeID);

                // If we find a better gain, store it
                // and remember which attribute it came from
                if (m_rwInformationGain[iAttributeID] > fBestGain)
                {
                    fBestGain        = m_rwInformationGain[iAttributeID];
                    iBestAttributeID = iAttributeID;
                }
            }

            // Set the class counts to 0
            m_rwClassCount = new List <int>();

            int iTreeClassCount = m_pDecisionTree.GetNumClasses();

            for (i = 0; i < iTreeClassCount; ++i)
            {
                m_rwClassCount.Add(0);
            }

            // Store the attribute this splits on
            m_iAttributeID = iBestAttributeID;

            // Modify the list of attributes to give to the new children
            List <int> rwNewAttributeList = m_rwRemainingAttributeIDs;

            // If we don't have any attributes left to split on
            // then this node has to be a leaf, so we need to
            // find the most common class for the examples
            // and make that the node's classification
            m_bIsLeaf = !m_rwRemainingAttributeIDs.Any();

            // If we are not a leaf, then generate the children nodes
            // and give them a list of remaining attributes they can split on
            if (!m_bIsLeaf)
            {
                rwNewAttributeList = (from attributeId in rwNewAttributeList where attributeId != m_iAttributeID select attributeId).ToList();

                // Create the sub-nodes
                CAttribute attributeToSplitOn   = m_pDecisionTree.GetAttribute(m_iAttributeID);
                int        iAttributeValueCount = attributeToSplitOn.GetNumValues();

                for (i = 0; i < iAttributeValueCount; ++i)
                {
                    CDTNode pNewNode = new CDTNode(m_pDecisionTree, rwNewAttributeList);
                    m_rwChildren.Add(pNewNode);
                }
            }

            // Now we know the best attribute to split/branch on
            // send the examples to their appropriate child nodes
            // while finding the most common classification
            // for this node's examples
            int iExampleCount = m_rwExampleIDs.Count();

            for (i = 0; i < iExampleCount; ++i)
            {
                int      iExampleID      = m_rwExampleIDs[i];
                CExample curExample      = m_pDecisionTree.GetExample(iExampleID);
                int      iExampleClassID = curExample.GetClassIdentifier();

                // Save a tally of the occurrence of each classification
                // and which class is the most common
                ++m_rwClassCount[iExampleClassID];

                if (iBestClassCount < m_rwClassCount[iExampleClassID])
                {
                    iBestClassCount = m_rwClassCount[iExampleClassID];
                    m_iClass        = iExampleClassID;
                }

                // Match the examples with the sub tree that matches the attribute's value
                if (!m_bIsLeaf)
                {
                    int iValueID = curExample.GetValueIdentifier(m_iAttributeID);

                    // Add the example into the correct sub-tree and remove it from this level
                    m_rwChildren[iValueID].GetExampleIdentifierList().Add(iExampleID);
                }
            }

            // If we are a leaf, then we don't have any children
            // nodes to calculate so just return back
            if (m_bIsLeaf)
            {
                return;
            }

            // No more examples should be associated with this node.
            m_rwExampleIDs.Clear();


            // Calculate all the subtrees for this node

            int iNumValues = m_pDecisionTree.GetAttribute(m_iAttributeID).GetNumValues();

            for (i = 0; i < iNumValues; ++i)
            {
                m_rwChildren[i].m_iClass = m_iClass;
                m_rwChildren[i].Train();
            }
        }
示例#2
0
        //////////////////////////////////////////////////////
        // Procedure:
        //  Stream
        //
        // Purpose:
        //  Outputs a human readable text stream to the given
        //  stream offset, indented to refelect the given
        //  depth of the node.
        //////////////////////////////////////////////////////
        public void Stream(
            StreamWriter inOutputStream,
            int iInDepth = 0)
        {
            int  j = (0);
            int  i = (0);
            bool bIsBuiltFromExamples = m_pDecisionTree.GetNumExamples() > 0;

            PrintTabs(inOutputStream, iInDepth);

            if (bIsBuiltFromExamples)
            {
                inOutputStream.WriteLine($"Data set had an entropy of {m_fEntropy}");
            }

            if (bIsBuiltFromExamples && m_rwRemainingAttributeIDs.Any() && m_rwInformationGain.Any())
            {
                PrintTabs(inOutputStream, iInDepth);

                inOutputStream.Write($"Node had {m_rwRemainingAttributeIDs.Count()} attributes to choose from with gains of");

                for (i = 0; i < m_rwRemainingAttributeIDs.Count(); i++)
                {
                    int attributeID = (m_rwRemainingAttributeIDs[i]);

                    inOutputStream.Write($" {m_pDecisionTree.GetAttribute(attributeID).GetName()}:{m_rwInformationGain[attributeID]}");
                }

                inOutputStream.WriteLine();
            }

            PrintTabs(inOutputStream, iInDepth);

            if (!m_bIsLeaf)
            {
                inOutputStream.WriteLine();
                PrintTabs(inOutputStream, iInDepth);
                inOutputStream.WriteLine($"Split on attribute {m_pDecisionTree.GetAttribute(m_iAttributeID).GetName()}");

                for (i = 0; i < m_pDecisionTree.GetAttribute(m_iAttributeID).GetNumValues(); ++i)
                {
                    PrintTabs(inOutputStream, iInDepth);
                    inOutputStream.WriteLine($"Value {m_pDecisionTree.GetAttribute(m_iAttributeID).GetValue(i)}");

                    m_rwChildren[i].Stream(inOutputStream, (iInDepth + 1));
                }
            }
            else
            {
                inOutputStream.WriteLine($"Class = {m_pDecisionTree.GetClass(m_iClass)}");
            }

            if (m_rwExampleIDs.Any())
            {
                PrintTabs(inOutputStream, iInDepth);
                inOutputStream.WriteLine("Examples:");
            }

            for (j = 0; j < m_rwExampleIDs.Count(); j++)
            {
                CExample curExample = m_pDecisionTree.GetExample(m_rwExampleIDs[j]);

                PrintTabs(inOutputStream, iInDepth);
                inOutputStream.Write($"Class: {m_pDecisionTree.GetClass(curExample.GetClassIdentifier())}");

                for (i = 0; i < m_pDecisionTree.GetNumAttributes(); i++)
                {
                    inOutputStream.Write($" {m_pDecisionTree.GetAttribute(i).GetName()}:{m_pDecisionTree.GetAttribute(i).GetValue(curExample.GetValueIdentifier(i))}");
                }

                inOutputStream.WriteLine();
            }
        }
示例#3
0
    private static bool TestComplex(string szInFilename)
    {
        bool bOutIsTestSuccessful = false;
        int  iSuccessCount        = 0;
        int  iFailedCount         = 0;

        Console.WriteLine("Starting self test.");

        var testTree = new CDecisionTree();

        bOutIsTestSuccessful = testTree.LoadTrainingData(szInFilename);

        //	cout << "Examples:" << endl;
        //
        //	for( unsigned int iIndex = 0; iIndex < testTree.GetNumExamples(); ++iIndex )
        //	{
        //		cout << testTree.GetExample( iIndex );
        //	}
        //
        //	cout << endl;

        if (bOutIsTestSuccessful)
        {
            Console.WriteLine($"Starting self test using example file `{szInFilename}`");
            Console.WriteLine("\nTraining");
            testTree.Train();
            Console.WriteLine("Resulting tree:");
            Console.WriteLine(testTree);

            int iExampleCount = testTree.GetNumExamples();

            for (int iExampleIndex = 0; iExampleIndex < iExampleCount; ++iExampleIndex)
            {
                CExample example            = testTree.GetExample(iExampleIndex);
                string   szCorrectAnswer    = testTree.GetClass(example.GetClassIdentifier());
                string   szCalculatedAnswer = testTree.Classify(example);

                bool bIsExampleAMatchWithClassification = szCorrectAnswer == szCalculatedAnswer;
                iSuccessCount += bIsExampleAMatchWithClassification ? 1 : 0;
                iFailedCount  += bIsExampleAMatchWithClassification ? 0 : 1;

                Console.WriteLine("----");
                Console.WriteLine($"Example has outcome of '{szCorrectAnswer}'");
                Console.WriteLine($"Classified example '{iExampleIndex}' as class `{szCalculatedAnswer}`");
                Console.WriteLine($"SELF TEST {(bIsExampleAMatchWithClassification ? "SUCCEEDED" : "FAILED")}!");
            }

            Console.WriteLine($"\nDone. {iSuccessCount} examples matched their classification, {iFailedCount} did not.");
            Console.WriteLine($"Saving to {szInFilename}.dts");

            string szOutputFileName = $"{szInFilename}.dts";

            testTree.SavePrebuiltTree(szOutputFileName);

            Console.WriteLine("Finished saving. Now attempting load!");

            bool bIsLoadSuccess = testTree.LoadPrebuiltTree(szOutputFileName);

            Console.WriteLine($"Save = {bIsLoadSuccess}");
            Console.WriteLine("Loaded tree");
            Console.WriteLine(testTree);
        }
        else
        {
            Console.Error.WriteLine($"ERROR - Unable to open test file '{szInFilename}'");
        }

        return(bOutIsTestSuccessful);
    }