Esempio n. 1
0
        static void Train()
        {
            List <LabeledTomogram> tomograms = LabeledTomogramsFromPaintedFiles();

            DecisionTreeOptions options = new DecisionTreeOptions
            {
                // TODO: Fill in
                MaximumNumberOfRecursionLevels = 25,
                NumberOfFeatures        = 300,
                NumberOfThresholds      = 35,
                OffsetXMax              = 40,
                OffsetXMin              = -40,
                OffsetYMax              = 40,
                OffsetYMin              = -40,
                OutOfRangeValue         = 1000000,
                SplittingThresholdMax   = .2f,
                SufficientGainLevel     = 0,
                PercentageOfPixelsToUse = .9f,
                //DistanceThreshold = .1f,
            };

            DecisionTreeNode node = DecisionTreeBuilder.Train(tomograms, new Random(1234), options);

            BinaryFormatter bf = new BinaryFormatter();

            using (FileStream fs = File.Create("serialized.dat"))
            {
                bf.Serialize(fs, node);
            }

            return;
        }
        protected override void DoBuild(
            EmitSyntax emit,
            Pipe <EmitSyntax> ldvalue,
            SwitchGeneratorAction action)
        {
            this.action = action;

#if false
            var decisionTree = new BinaryDecisionTreeBuilder(intMap.DefaultValue, platformInfo);
            var node         = decisionTree.Build(intMap.Enumerate().ToArray());
#else
            this.builder = new DecisionTreeBuilder(platformInfo);
            var node = builder.Build(
                intMap,
                possibleBounds,
                frequency);
#endif
            this.emit    = emit;
            this.ldvalue = ldvalue;
            this.labels  = new List <Ref <Labels> >();

            strategy.PlanCode(node);
            strategy.GenerateCode();

            // Debug.Write(node);
        }
Esempio n. 3
0
        public static Dictionary <Record, List <Record> > GetMatches(List <Record> records)
        {
            Dictionary <Record, List <Record> > ret = new Dictionary <Record, List <Record> >();

            string[]       finalDataSetList = File.ReadAllLines("c:/users/brush/desktop/finaldataset.csv");
            DecisionTree[] forest           = DataLoader.LoadForestFromDirectory("C:/users/brush/desktop/forest");

            foreach (Record record in records)
            {
                ret.Add(record, new List <Record>());
            }

            Console.WriteLine("Searching for matches...");
            //for (int c = 1; c < finalDataSetList.Length; c++)
            Parallel.For(1, finalDataSetList.Length, c =>
            {
                if (c % 10000 == 0)
                {
                    Console.WriteLine($"{(c / (finalDataSetList.Length * 1.0) * 100)}%");
                }

                string finalDataSetRow = finalDataSetList[c];
                string[] bits          = finalDataSetRow.Split(',');

                if (bits[0] != "")
                {
                    int enterpriseId = int.Parse(bits[0]);

                    if (enterpriseId > 15374761)
                    {
                        Record comparisonRecord = DataCleaner.CleanRecord(Record.FromFinalDatasetString(bits));

                        foreach (Record toMatch in records)
                        {
                            if (!toMatch.Equals(comparisonRecord))
                            {
                                RecordPair pair = new RecordPair
                                {
                                    Record1 = toMatch,
                                    Record2 = comparisonRecord,
                                };

                                bool isMatch = DecisionTreeBuilder.IsMatch(pair, forest, null);

                                if (isMatch)
                                {
                                    lock (ret)
                                    {
                                        ret[toMatch].Add(comparisonRecord);
                                    }
                                }
                            }
                        }
                    }
                }
            });

            return(ret);
        }
        /// <summary>
        /// Creates a new <see cref="ActionSelectionDecisionTree"/>.
        /// </summary>
        /// <param name="actions">The <see cref="ActionDescriptorCollection"/>.</param>
        public ActionSelectionDecisionTree(ActionDescriptorCollection actions)
        {
            Version = actions.Version;

            _root = DecisionTreeBuilder <ActionDescriptor> .GenerateTree(
                actions.Items,
                new ActionDescriptorClassifier());
        }
        /// <summary>
        /// Creates a new <see cref="ActionSelectionDecisionTree"/>.
        /// </summary>
        /// <param name="actions">The <see cref="ActionDescriptorCollection"/>.</param>
        public ActionSelectionDecisionTree(ActionDescriptorCollection actions)
        {
            Version = actions.Version;

            var conventionalRoutedActions = actions.Items.Where(a => a.AttributeRouteInfo?.Template == null).ToArray();

            _root = DecisionTreeBuilder <ActionDescriptor> .GenerateTree(
                conventionalRoutedActions,
                new ActionDescriptorClassifier());
        }
Esempio n. 6
0
        public static void Test()
        {
            Console.WriteLine("Line 1:");
            string line1 = Console.ReadLine();

            Console.WriteLine("Line 2:");
            string line2 = Console.ReadLine();

            RecordPair pair = new RecordPair();

            pair.Record1 = DataCleaner.CleanRecord(Record.FromString(line1));
            pair.Record2 = DataCleaner.CleanRecord(Record.FromString(line2));

            DecisionTree[] forest = DataLoader.LoadForestFromDirectory(".");

            TreeLogger logger  = new TreeLogger();
            bool       isMatch = DecisionTreeBuilder.IsMatch(pair, forest, logger);

            Console.WriteLine("Search for those in training data who make it there?");
            string response = Console.ReadLine();

            //RecordPair pair2 = new RecordPair();
            //pair2.Record1 = Record.FromString("LAZAR,,KALLER,,M,,16/10/1965,,,,,,,-1,,,4839002,15479245,");
            //pair2.Record2 = Record.FromString("ADRIENNE,,KELLEY,,F,895535860,16/10/1965,9175738850,,1560 SILVER ST,2H,BRONX,NY,10461,,[email protected],4799491,15637549,");

            //bool ret = DecisionTreeBuilder.ReplayDecision(pair2, logger.SplittingQuestionsToTheBottom);

            if (response.ToLower() == "y")
            {
                using (StreamWriter sw = File.AppendText("c:/users/brush/desktop/gothere.txt"))
                {
                    List <RecordPair> pairs = new List <RecordPair>();
                    Console.Write("Loading training data for this iteration...");
                    pairs.AddRange(DataLoader.GetPositivesFromMRNData("mrns.csv"));
                    pairs.AddRange(DataLoader.GetHandPassedSets("more.csv"));
                    pairs.AddRange(DataLoader.GetRejectedRecordPairs("rejected.txt"));
                    //pairs.AddRange(DataLoader.GetPairsFromMisfitsFile("misfits.txt"));
                    Console.WriteLine("...done");

                    Parallel.ForEach(pairs, p =>
                    {
                        if (DecisionTreeBuilder.ReplayDecision(p, logger.SplittingQuestionsToTheBottom))
                        {
                            lock (sw)
                            {
                                sw.WriteLine(p);
                            }
                        }
                    });
                }
            }

            return;
        }
    public void BuildTree_Empty()
    {
        // Arrange
        var items = new List <Item>();

        // Act
        var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier());

        // Assert
        Assert.Empty(tree.Criteria);
        Assert.Empty(tree.Matches);
    }
    public void BuildTree_WithMultipleItems()
    {
        // Arrange
        var items = new List <Item>();

        var item1 = new Item();

        item1.Criteria.Add("controller", new DecisionCriterionValue(value: "Store"));
        item1.Criteria.Add("action", new DecisionCriterionValue(value: "Buy"));
        items.Add(item1);

        var item2 = new Item();

        item2.Criteria.Add("controller", new DecisionCriterionValue(value: "Store"));
        item2.Criteria.Add("action", new DecisionCriterionValue(value: "Checkout"));
        items.Add(item2);

        // Act
        var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier());

        // Assert
        Assert.Empty(tree.Matches);

        var action = Assert.Single(tree.Criteria);

        Assert.Equal("action", action.Key);

        var buy = action.Branches["Buy"];

        Assert.Empty(buy.Matches);

        var controller = Assert.Single(buy.Criteria);

        Assert.Equal("controller", controller.Key);

        var store = Assert.Single(controller.Branches);

        Assert.Equal("Store", store.Key);
        Assert.Empty(store.Value.Criteria);
        Assert.Same(item1, Assert.Single(store.Value.Matches));

        var checkout = action.Branches["Checkout"];

        Assert.Empty(checkout.Matches);

        controller = Assert.Single(checkout.Criteria);
        Assert.Equal("controller", controller.Key);

        store = Assert.Single(controller.Branches);
        Assert.Equal("Store", store.Key);
        Assert.Empty(store.Value.Criteria);
        Assert.Same(item2, Assert.Single(store.Value.Matches));
    }
    public void BuildTree_TrivialMatch()
    {
        // Arrange
        var items = new List <Item>();

        var item = new Item();

        items.Add(item);

        // Act
        var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier());

        // Assert
        Assert.Empty(tree.Criteria);
        Assert.Same(item, Assert.Single(tree.Matches));
    }
Esempio n. 10
0
    public void BuildTree_WithMultipleCriteria()
    {
        // Arrange
        var items = new List <Item>();

        var item = new Item();

        item.Criteria.Add("area", new DecisionCriterionValue(value: "Admin"));
        item.Criteria.Add("controller", new DecisionCriterionValue(value: "Users"));
        item.Criteria.Add("action", new DecisionCriterionValue(value: "AddUser"));
        items.Add(item);

        // Act
        var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier());

        // Assert
        Assert.Empty(tree.Matches);

        var area = Assert.Single(tree.Criteria);

        Assert.Equal("area", area.Key);

        var admin = Assert.Single(area.Branches);

        Assert.Equal("Admin", admin.Key);
        Assert.Empty(admin.Value.Matches);

        var controller = Assert.Single(admin.Value.Criteria);

        Assert.Equal("controller", controller.Key);

        var users = Assert.Single(controller.Branches);

        Assert.Equal("Users", users.Key);
        Assert.Empty(users.Value.Matches);

        var action = Assert.Single(users.Value.Criteria);

        Assert.Equal("action", action.Key);

        var addUser = Assert.Single(action.Branches);

        Assert.Equal("AddUser", addUser.Key);
        Assert.Empty(addUser.Value.Criteria);
        Assert.Same(item, Assert.Single(addUser.Value.Matches));
    }
Esempio n. 11
0
        public void TestElementaryChecks()
        {
            const int DefaultValue    = -100;
            var       elementToAction = new MutableIntMap <int>();

            elementToAction.DefaultValue = DefaultValue;
            var bounds    = new IntInterval(0, 9);
            var frequency = new UniformIntFrequency(bounds);

            elementToAction.Set(new IntArrow <int>(2, 100));
            elementToAction.Set(new IntArrow <int>(5, 200));

            var target = new DecisionTreeBuilder(platformInfo);
            var node   = target.Build(elementToAction, bounds, frequency);

            PrintProgram(node, target.DefaultActionDecision);
        }
Esempio n. 12
0
        public void TestSameActionUnification()
        {
            var frequency = new UniformIntFrequency(new IntInterval(-100, 100));

            const int DefaultValue    = -100;
            var       elementToAction = new MutableIntMap <int>();

            elementToAction.DefaultValue = DefaultValue;
            elementToAction.Set(new IntArrow <int>(1, 1));
            elementToAction.Set(new IntArrow <int>(3, 49, 1));
            elementToAction.Set(new IntArrow <int>(51, 1));
            elementToAction.Set(new IntArrow <int>(54, 100, 1));

            var target = new DecisionTreeBuilder(platformInfo);
            var bounds = new IntInterval(0, 1000);
            var node   = target.Build(elementToAction, bounds, frequency);

            PrintProgram(node);
        }
Esempio n. 13
0
        public static void List()
        {
            Console.WriteLine("Line to match:");
            string line1   = Console.ReadLine();
            Record toMatch = Record.FromString(line1);

            string[] finalDataSetList = File.ReadAllLines("c:/users/brush/desktop/finaldataset.csv");

            Console.WriteLine("Searching for matches...");
            for (int c = 1; c < finalDataSetList.Length; c++)
            {
                string   finalDataSetRow = finalDataSetList[c];
                string[] bits            = finalDataSetRow.Split(',');

                if (bits[0] != "")
                {
                    int enterpriseId = int.Parse(bits[0]);

                    if (enterpriseId > 15374761)
                    {
                        Record comparisonRecord = Record.FromFinalDatasetString(bits);

                        RecordPair pair = new RecordPair
                        {
                            Record1 = toMatch,
                            Record2 = comparisonRecord,
                        };

                        DecisionTree[] forest = DataLoader.LoadForestFromDirectory("C:/users/brush/desktop/forest");

                        bool isMatch = DecisionTreeBuilder.IsMatch(pair, forest, null);

                        if (isMatch)
                        {
                            Console.WriteLine(comparisonRecord);
                            Console.WriteLine();
                        }
                    }
                }
            }
        }
        public LinkGenerationDecisionTree(IReadOnlyList <OutboundMatch> entries)
        {
            // We split up the entries into:
            // 1. attribute routes - these go into the tree
            // 2. conventional routes - these are a list
            var attributedEntries = new List <OutboundMatch>();

            _conventionalEntries = new List <OutboundMatch>();

            // Anything with a RoutePattern.RequiredValueAny as a RequiredValue is a conventional route.
            // This is because RequiredValueAny acts as a wildcard, whereas an attribute route entry
            // is denormalized to contain an exact set of required values.
            //
            // We will only see conventional routes show up here for endpoint routing.
            for (var i = 0; i < entries.Count; i++)
            {
                var isAttributeRoute = true;
                var entry            = entries[i];
                foreach (var kvp in entry.Entry.RequiredLinkValues)
                {
                    if (RoutePattern.IsRequiredValueAny(kvp.Value))
                    {
                        isAttributeRoute = false;
                        break;
                    }
                }

                if (isAttributeRoute)
                {
                    attributedEntries.Add(entry);
                }
                else
                {
                    _conventionalEntries.Add(entry);
                }
            }

            _root = DecisionTreeBuilder <OutboundMatch> .GenerateTree(
                attributedEntries,
                new OutboundMatchClassifier());
        }
Esempio n. 15
0
        public void BuildTreeFromDocument_HostileMocking_TreeIsProperlyBuilt()
        {
            //ARRANGE
            DecisionTreeBuilder builder = new DecisionTreeBuilder();

            // ReSharper disable once InconsistentNaming
            var Morty = new Person("Morty", Gender.Male, Sex.Male, Orientation.Undefined, Guid.NewGuid());

            var character = new global::RNPC.Core.Character(Morty, Archetype.TheInnocent)
            {
                FileController      = new DecisionTreeFileController(),
                DecisionTreeBuilder = new DecisionTreeBuilder()
            };

            //Hostile Mockery
            Action mockery = new Action
            {
                Tone       = Tone.Mocking,
                EventType  = EventType.Interaction,
                ActionType = ActionType.Verbal,
                Intent     = Intent.Hostile,
                Message    = "Did your mother dress you up this morning?",
                Target     = character.MyName,
                EventName  = "Mocking",
                Source     = "The Bully"
            };


            //ACT
            var rootNode = builder.BuildTreeFromDocument(new DecisionTreeFileController(), mockery, "Morty") as AbstractDecisionNode;

            //ASSERT
            //ASSERT
            Assert.IsNotNull(rootNode);
            //ajusted per event
            Assert.IsFalse(string.IsNullOrEmpty(rootNode.DefaultTreeReaction));
            Assert.AreEqual(60, rootNode.ConfiguredPassFailValue);
            Assert.IsTrue(rootNode.LeftNode != null);
            Assert.IsTrue(rootNode.RightNode != null);
        }
Esempio n. 16
0
        public LinkGenerationDecisionTree(IReadOnlyList <OutboundMatch> entries)
        {
            _root = DecisionTreeBuilder <OutboundMatch> .GenerateTree(
                entries,
                new OutboundMatchClassifier());

            _knownValues = new Dictionary <string, HashSet <object> >(StringComparer.OrdinalIgnoreCase);
            for (var i = 0; i < entries.Count; i++)
            {
                var entry = entries[i];
                foreach (var kvp in entry.Entry.RequiredLinkValues)
                {
                    if (!_knownValues.TryGetValue(kvp.Key, out var values))
                    {
                        values = new HashSet <object>(RouteValueEqualityComparer.Default);
                        _knownValues.Add(kvp.Key, values);
                    }

                    values.Add(kvp.Value ?? string.Empty);
                }
            }
        }
Esempio n. 17
0
        public void BuildTreeFromDocument_NeutralSalute_TreeIsProperlyBuilt()
        {
            //ARRANGE
            DecisionTreeBuilder builder = new DecisionTreeBuilder();

            // ReSharper disable once InconsistentNaming
            var Morty = new Person("Morty", Gender.Male, Sex.Male, Orientation.Undefined, Guid.NewGuid());

            var character = new global::RNPC.Core.Character(Morty, Archetype.TheInnocent)
            {
                FileController      = new DecisionTreeFileController(),
                DecisionTreeBuilder = new DecisionTreeBuilder()
            };

            //Neutral Salute
            Action salute = new Action
            {
                EventType  = EventType.Interaction,
                ActionType = ActionType.NonVerbal,
                Intent     = Intent.Neutral,
                Message    = "",
                Target     = character.MyName,
                EventName  = "Salute",
                Source     = "The Ambassador"
            };


            //ACT
            var rootNode = builder.BuildTreeFromDocument(new DecisionTreeFileController(), salute, "Morty") as AbstractDecisionNode;

            //ASSERT
            //ASSERT
            Assert.IsNotNull(rootNode);
            //ajusted per event
            Assert.IsFalse(string.IsNullOrEmpty(rootNode.DefaultTreeReaction));
            Assert.AreEqual(0, rootNode.ConfiguredPassFailValue);
            Assert.IsTrue(rootNode.LeftNode != null);
            Assert.IsTrue(rootNode.RightNode != null);
        }
Esempio n. 18
0
        public void TestBalanced()
        {
            var frequency = new MutableIntFrequency();

            frequency.DefaultValue = 0.0000001;
            frequency.Set(new IntArrow <double>(1, 520.0));
            frequency.Set(new IntArrow <double>(2, 49, 3.0));
            frequency.Set(new IntArrow <double>(50, 236.0));
            frequency.Set(new IntArrow <double>(51, 100, 2.0));

            const int DefaultValue    = -1;
            var       elementToAction = new MutableIntMap <int>();

            elementToAction.DefaultValue = DefaultValue;
            elementToAction.Set(new IntArrow <int>(1, 1));
            elementToAction.Set(new IntArrow <int>(2, 49, 2));
            elementToAction.Set(new IntArrow <int>(50, 3));
            elementToAction.Set(new IntArrow <int>(51, 100, 4));

            var target = new DecisionTreeBuilder(platformInfo);
            var bounds = new IntInterval(int.MinValue, int.MaxValue);
            var node   = target.Build(elementToAction, bounds, frequency);

            PrintProgram(node, target.DefaultActionDecision);

            Assert.AreEqual(-1, node.Decide(int.MinValue));
            Assert.AreEqual(-1, node.Decide(0));
            Assert.AreEqual(1, node.Decide(1));
            Assert.AreEqual(2, node.Decide(2));
            Assert.AreEqual(2, node.Decide(49));
            Assert.AreEqual(3, node.Decide(50));
            Assert.AreEqual(4, node.Decide(51));
            Assert.AreEqual(4, node.Decide(100));
            Assert.AreEqual(-1, node.Decide(200));
            Assert.AreEqual(-1, node.Decide(bounds.Last));
        }
Esempio n. 19
0
    public void BuildTree_WithInteriorMatch()
    {
        // Arrange
        var items = new List <Item>();

        var item1 = new Item();

        item1.Criteria.Add("controller", new DecisionCriterionValue(value: "Store"));
        item1.Criteria.Add("action", new DecisionCriterionValue(value: "Buy"));
        items.Add(item1);

        var item2 = new Item();

        item2.Criteria.Add("controller", new DecisionCriterionValue(value: "Store"));
        item2.Criteria.Add("action", new DecisionCriterionValue(value: "Checkout"));
        items.Add(item2);

        var item3 = new Item();

        item3.Criteria.Add("action", new DecisionCriterionValue(value: "Buy"));
        items.Add(item3);

        // Act
        var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier());

        // Assert
        Assert.Empty(tree.Matches);

        var action = Assert.Single(tree.Criteria);

        Assert.Equal("action", action.Key);

        var buy = action.Branches["Buy"];

        Assert.Same(item3, Assert.Single(buy.Matches));
    }
Esempio n. 20
0
    public void BuildTree_WithDivergentCriteria()
    {
        // Arrange
        var items = new List <Item>();

        var item1 = new Item();

        item1.Criteria.Add("controller", new DecisionCriterionValue(value: "Store"));
        item1.Criteria.Add("action", new DecisionCriterionValue(value: "Buy"));
        items.Add(item1);

        var item2 = new Item();

        item2.Criteria.Add("controller", new DecisionCriterionValue(value: "Store"));
        item2.Criteria.Add("action", new DecisionCriterionValue(value: "Checkout"));
        items.Add(item2);

        var item3 = new Item();

        item3.Criteria.Add("stub", new DecisionCriterionValue(value: "Bleh"));
        items.Add(item3);

        // Act
        var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier());

        // Assert
        Assert.Empty(tree.Matches);

        var action = tree.Criteria[0];

        Assert.Equal("action", action.Key);

        var stub = tree.Criteria[1];

        Assert.Equal("stub", stub.Key);
    }
Esempio n. 21
0
        public static void DoIt(string misfitsFilePath, string answerKeyPath, string finalDataSetPath)
        {
            if (!File.Exists(misfitsFilePath))
            {
                File.Create(misfitsFilePath).Close();
            }

            Console.Write("Loading final data set...");
            Dictionary <int, Record> finalDataSet = DataLoader.LoadFinalDataSet(finalDataSetPath);

            Console.WriteLine("...done");

            List <RecordPair> trainingData = new List <RecordPair>();

            Console.Write("Getting positive training data...");
            List <RecordPair> allPositives = DataLoader.LoadAllPositivesFromAnswerKey(answerKeyPath, finalDataSet);

            trainingData.AddRange(allPositives);
            Console.WriteLine("...done");
            Console.Write("Getting negative training data (misfits)...");
            trainingData.AddRange(DataLoader.GetPairsFromMisfitsFile(misfitsFilePath, finalDataSet));
            Console.WriteLine("...done");


            Console.Write("Generating splitting questions for this iteration...");
            int maximumEditDistance = 3;

            SplittingQuestion[] splittingQuestions = DecisionTreeBuilder.GenerateSplittingQuestions(maximumEditDistance);
            Console.WriteLine("...done");

            // 1. get a random assort of training data.
            for (; ;)
            {
                DecisionTreeBuilder builder = new DecisionTreeBuilder();

                Console.Write("Training...");
                int positiveCount = trainingData.Count(n => n.IsMatch);
                int negativeCount = trainingData.Count(n => !n.IsMatch);
                Console.WriteLine($"\tThere are {positiveCount} positive instances and {negativeCount} negative instances in the data...");
                DecisionTree trainedTree = builder.Train(trainingData, splittingQuestions, 1, 0, null);
                Console.WriteLine("...done.");

                Console.Write("Serializing to disk the latest tree...");
                BinaryFormatter bf = new BinaryFormatter();
                using (FileStream fout = File.Create("tree.dat"))
                {
                    bf.Serialize(fout, trainedTree);
                }
                Console.WriteLine("...done");

                Console.WriteLine("Now let's test the tree and find the misfits...");
                List <RecordPair> misfits = new List <RecordPair>();
                bool allDoneTraining      = true;
                long runCounter           = 0;
                int  left = Console.CursorLeft;
                int  top  = Console.CursorTop;

                Parallel.ForEach(DataLoader.LoadNegativesFromAnswerKey(allPositives), (pair, state) =>
                {
                    Interlocked.Increment(ref runCounter);

                    if (runCounter % 1000000 == 0)
                    {
                        lock (misfits)
                        {
                            Console.SetCursorPosition(left, top);
                            Console.WriteLine($"\tExamined {runCounter.ToString("N0")} entries thus far. {misfits.Count.ToString("N0")} misfits found.");
                        }
                    }

                    if (misfits.Count() < 100000)
                    {
                        if (DecisionTreeBuilder.IsMatch(pair, new DecisionTree[] { trainedTree }, null) != pair.IsMatch)
                        {
                            lock (misfits)
                            {
                                misfits.Add(pair);
                            }
                            allDoneTraining = false;
                        }
                    }
                    else
                    {
                        state.Break();
                    }
                });
                Console.WriteLine($"..done. {misfits.Count} misfits found.");

                if (allDoneTraining)
                {
                    break;
                }
                else
                {
                    Console.Write("Writing misfits to disk...");
                    //using (StreamWriter outputForThisRound = File.AppendText($"c:/users/brush/desktop/misfits/{DateTime.Now.ToString()}.txt"))
                    {
                        using (StreamWriter sw = File.AppendText(misfitsFilePath))
                        {
                            foreach (RecordPair misfit in misfits)
                            {
                                string toWrite = $"{misfit.Record1.EnterpriseId},{misfit.Record2.EnterpriseId}";

                                sw.WriteLine(toWrite);
                                //outputForThisRound.WriteLine(toWrite);
                            }
                        }
                    }

                    Console.WriteLine("...done");

                    Console.Write("Adding misfits to training data...");
                    int trainingDataOriginalSize = trainingData.Count;
                    trainingData.AddRange(misfits);
                    int trainingDataNowSize = trainingData.Count;
                    Console.WriteLine($"...done. Training data increased by {trainingDataNowSize - trainingDataOriginalSize} records.");

                    Console.WriteLine("Starting over!");
                }
            }
        }
Esempio n. 22
0
        static void Test()
        {
            BinaryFormatter bf = new BinaryFormatter();

            using (FileStream fs = File.OpenRead("serialized.dat"))
            {
                DecisionTreeNode node = bf.Deserialize(fs) as DecisionTreeNode;


                DecisionTreeOptions options = new DecisionTreeOptions
                {
                    // TODO: Fill in
                    MaximumNumberOfRecursionLevels = 25,
                    NumberOfFeatures        = 300,
                    NumberOfThresholds      = 35,
                    OffsetXMax              = 40,
                    OffsetXMin              = -40,
                    OffsetYMax              = 40,
                    OffsetYMin              = -40,
                    OutOfRangeValue         = 1000000,
                    SplittingThresholdMax   = .2f,
                    SufficientGainLevel     = 0,
                    PercentageOfPixelsToUse = .9f,
                    //DistanceThreshold = .1f,
                };


                MRCFile file = MRCParser.Parse(Path.Combine("/home/brush/tomography2_fullsirtcliptrim.mrc"));

                MRCFrame frame = file.Frames[145];

                LabeledTomogram tom = new LabeledTomogram();
                tom.Width  = frame.Width;
                tom.Height = frame.Height;
                tom.Data   = new float[frame.Width * frame.Height];

                for (int i = 0; i < frame.Data.Length; i++)
                {
                    tom.Data[i] = frame.Data[i];
                }

                //for (int y = 264, i = 0; y < 364; y++)
                //{
                //    for (int x = 501; x < 601; x++, i++)
                //    {
                //        tom.Data[i] = frame.Data[y * frame.Width + x];
                //    }
                //}


                float[] labels = DecisionTreeBuilder.Predict(tom, node, options);
                //Bitmap bmp = DataManipulator.Tomogram2Bitmap(tom);
                Bitmap bmp = Drawing.TomogramDrawing.PaintClassifiedPixelsOnTomogram(tom, labels);
                bmp.Save("/var/www/html/static/labeled_real.png", System.Drawing.Imaging.ImageFormat.Png);

                LabeledTomogram tom2 = DataReader.ReadDatFile("/home/brush/tom4/0.dat");

                labels = DecisionTreeBuilder.Predict(tom2, node, options);
                //Bitmap bmp = DataManipulator.Tomogram2Bitmap(tom);
                bmp = Drawing.TomogramDrawing.PaintClassifiedPixelsOnTomogram(tom2, labels);
                bmp.Save("/var/www/html/static/labeled_simulated.png", System.Drawing.Imaging.ImageFormat.Png);
            }
        }
Esempio n. 23
0
 public LinkGenerationDecisionTree(IReadOnlyList <TreeRouteLinkGenerationEntry> entries)
 {
     _root = DecisionTreeBuilder <TreeRouteLinkGenerationEntry> .GenerateTree(
         entries,
         new AttributeRouteLinkGenerationEntryClassifier());
 }
 public LinkGenerationDecisionTree(IReadOnlyList <OutboundMatch> entries)
 {
     _root = DecisionTreeBuilder <OutboundMatch> .GenerateTree(
         entries,
         new OutboundMatchClassifier());
 }
Esempio n. 25
0
        static void Train(int numberOfTrees, string outputDirectory, double subsamplingPercentage,
                          double minGain, int maximumEditDistance)
        {
            Console.WriteLine("Train options:");
            Console.WriteLine("\t1. Start full training.");
            Console.WriteLine("\t2. Start debug training from nohomo file.");
            Console.Write("Choice:");
            int option = int.Parse(Console.ReadLine());

            Stopwatch sw = new Stopwatch();

            sw.Start();

            List <RecordPair> trainingData = null;

            if (option == 1)
            {
                trainingData = DataLoader.BuildTrainingData("mrns.csv", "more.csv", "rejected.txt");

                if (trainingData.Any(n => n.Record1 == null || n.Record2 == null))
                {
                    Console.WriteLine("YUP, ITS HERE");
                }
            }
            else if (option == 2)
            {
                Console.Write("Nohomo file path:");
                string filePath = Console.ReadLine().Replace("\"", "");
                trainingData = DataLoader.LoadTrainingDataFromNoHomoFile(filePath);
            }
            //
            //List<RecordPair> trainingData = LoadTrainingData("D:/positives.csv", "D:/negatives.csv");

            int numberPerTree = trainingData.Count / numberOfTrees;

            for (int c = 0; c < numberOfTrees; c++)
            {
                List <RecordPair> trainingDataSubset = new List <RecordPair>();
                int startIndex = c * numberPerTree;
                int length     = numberPerTree;
                if (c == numberOfTrees - 1)
                {
                    length += trainingData.Count % numberPerTree;
                }

                for (int d = startIndex; d < (startIndex + length); d++)
                {
                    trainingDataSubset.Add(trainingData[d]);
                }

                SplittingQuestion[] splittingQuestions = DecisionTreeBuilder.GenerateSplittingQuestions(maximumEditDistance);

                DecisionTreeBuilder treeBuilder = new DecisionTreeBuilder();


                List <Tuple <SplittingQuestion, bool> > preComputedQuestions = new List <Tuple <SplittingQuestion, bool> >();

                preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>(
                                             new SplittingQuestion
                {
                    Field               = FieldEnum.DOB,
                    MatchType           = MatchTypeEnum.EditDistance,
                    MaximumEditDistance = 0,
                }, false));
                preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>(
                                             new SplittingQuestion
                {
                    Field          = FieldEnum.MRN,
                    MatchType      = MatchTypeEnum.MRNDistance,
                    MRNMaxDistance = 100,
                }, false));
                preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>(
                                             new SplittingQuestion
                {
                    Field               = FieldEnum.LastName,
                    MatchType           = MatchTypeEnum.EditDistance,
                    MaximumEditDistance = 1
                }, false));
                preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>(
                                             new SplittingQuestion
                {
                    Field               = FieldEnum.DOB,
                    MatchType           = MatchTypeEnum.EditDistance,
                    MaximumEditDistance = 1,
                }, false));
                preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>(null, false));

                DecisionTree tree = treeBuilder.Train(trainingDataSubset, splittingQuestions,
                                                      subsamplingPercentage, minGain, null);

                BinaryFormatter bf = new BinaryFormatter();
                using (FileStream fout = File.Create(Path.Combine(outputDirectory, $"tree{c}.dat")))
                {
                    bf.Serialize(fout, tree);
                }
            }

            sw.Stop();

            Console.WriteLine($"Whole operation took {sw.ElapsedMilliseconds / 1000.0 / 60.0} minutes");
        }