static void Train() { List <LabeledTomogram> tomograms = LabeledTomogramsFromPaintedFiles(); DecisionTreeOptions options = new DecisionTreeOptions { // TODO: Fill in MaximumNumberOfRecursionLevels = 25, NumberOfFeatures = 300, NumberOfThresholds = 35, OffsetXMax = 40, OffsetXMin = -40, OffsetYMax = 40, OffsetYMin = -40, OutOfRangeValue = 1000000, SplittingThresholdMax = .2f, SufficientGainLevel = 0, PercentageOfPixelsToUse = .9f, //DistanceThreshold = .1f, }; DecisionTreeNode node = DecisionTreeBuilder.Train(tomograms, new Random(1234), options); BinaryFormatter bf = new BinaryFormatter(); using (FileStream fs = File.Create("serialized.dat")) { bf.Serialize(fs, node); } return; }
protected override void DoBuild( EmitSyntax emit, Pipe <EmitSyntax> ldvalue, SwitchGeneratorAction action) { this.action = action; #if false var decisionTree = new BinaryDecisionTreeBuilder(intMap.DefaultValue, platformInfo); var node = decisionTree.Build(intMap.Enumerate().ToArray()); #else this.builder = new DecisionTreeBuilder(platformInfo); var node = builder.Build( intMap, possibleBounds, frequency); #endif this.emit = emit; this.ldvalue = ldvalue; this.labels = new List <Ref <Labels> >(); strategy.PlanCode(node); strategy.GenerateCode(); // Debug.Write(node); }
public static Dictionary <Record, List <Record> > GetMatches(List <Record> records) { Dictionary <Record, List <Record> > ret = new Dictionary <Record, List <Record> >(); string[] finalDataSetList = File.ReadAllLines("c:/users/brush/desktop/finaldataset.csv"); DecisionTree[] forest = DataLoader.LoadForestFromDirectory("C:/users/brush/desktop/forest"); foreach (Record record in records) { ret.Add(record, new List <Record>()); } Console.WriteLine("Searching for matches..."); //for (int c = 1; c < finalDataSetList.Length; c++) Parallel.For(1, finalDataSetList.Length, c => { if (c % 10000 == 0) { Console.WriteLine($"{(c / (finalDataSetList.Length * 1.0) * 100)}%"); } string finalDataSetRow = finalDataSetList[c]; string[] bits = finalDataSetRow.Split(','); if (bits[0] != "") { int enterpriseId = int.Parse(bits[0]); if (enterpriseId > 15374761) { Record comparisonRecord = DataCleaner.CleanRecord(Record.FromFinalDatasetString(bits)); foreach (Record toMatch in records) { if (!toMatch.Equals(comparisonRecord)) { RecordPair pair = new RecordPair { Record1 = toMatch, Record2 = comparisonRecord, }; bool isMatch = DecisionTreeBuilder.IsMatch(pair, forest, null); if (isMatch) { lock (ret) { ret[toMatch].Add(comparisonRecord); } } } } } } }); return(ret); }
/// <summary> /// Creates a new <see cref="ActionSelectionDecisionTree"/>. /// </summary> /// <param name="actions">The <see cref="ActionDescriptorCollection"/>.</param> public ActionSelectionDecisionTree(ActionDescriptorCollection actions) { Version = actions.Version; _root = DecisionTreeBuilder <ActionDescriptor> .GenerateTree( actions.Items, new ActionDescriptorClassifier()); }
/// <summary> /// Creates a new <see cref="ActionSelectionDecisionTree"/>. /// </summary> /// <param name="actions">The <see cref="ActionDescriptorCollection"/>.</param> public ActionSelectionDecisionTree(ActionDescriptorCollection actions) { Version = actions.Version; var conventionalRoutedActions = actions.Items.Where(a => a.AttributeRouteInfo?.Template == null).ToArray(); _root = DecisionTreeBuilder <ActionDescriptor> .GenerateTree( conventionalRoutedActions, new ActionDescriptorClassifier()); }
public static void Test() { Console.WriteLine("Line 1:"); string line1 = Console.ReadLine(); Console.WriteLine("Line 2:"); string line2 = Console.ReadLine(); RecordPair pair = new RecordPair(); pair.Record1 = DataCleaner.CleanRecord(Record.FromString(line1)); pair.Record2 = DataCleaner.CleanRecord(Record.FromString(line2)); DecisionTree[] forest = DataLoader.LoadForestFromDirectory("."); TreeLogger logger = new TreeLogger(); bool isMatch = DecisionTreeBuilder.IsMatch(pair, forest, logger); Console.WriteLine("Search for those in training data who make it there?"); string response = Console.ReadLine(); //RecordPair pair2 = new RecordPair(); //pair2.Record1 = Record.FromString("LAZAR,,KALLER,,M,,16/10/1965,,,,,,,-1,,,4839002,15479245,"); //pair2.Record2 = Record.FromString("ADRIENNE,,KELLEY,,F,895535860,16/10/1965,9175738850,,1560 SILVER ST,2H,BRONX,NY,10461,,[email protected],4799491,15637549,"); //bool ret = DecisionTreeBuilder.ReplayDecision(pair2, logger.SplittingQuestionsToTheBottom); if (response.ToLower() == "y") { using (StreamWriter sw = File.AppendText("c:/users/brush/desktop/gothere.txt")) { List <RecordPair> pairs = new List <RecordPair>(); Console.Write("Loading training data for this iteration..."); pairs.AddRange(DataLoader.GetPositivesFromMRNData("mrns.csv")); pairs.AddRange(DataLoader.GetHandPassedSets("more.csv")); pairs.AddRange(DataLoader.GetRejectedRecordPairs("rejected.txt")); //pairs.AddRange(DataLoader.GetPairsFromMisfitsFile("misfits.txt")); Console.WriteLine("...done"); Parallel.ForEach(pairs, p => { if (DecisionTreeBuilder.ReplayDecision(p, logger.SplittingQuestionsToTheBottom)) { lock (sw) { sw.WriteLine(p); } } }); } } return; }
public void BuildTree_Empty() { // Arrange var items = new List <Item>(); // Act var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier()); // Assert Assert.Empty(tree.Criteria); Assert.Empty(tree.Matches); }
public void BuildTree_WithMultipleItems() { // Arrange var items = new List <Item>(); var item1 = new Item(); item1.Criteria.Add("controller", new DecisionCriterionValue(value: "Store")); item1.Criteria.Add("action", new DecisionCriterionValue(value: "Buy")); items.Add(item1); var item2 = new Item(); item2.Criteria.Add("controller", new DecisionCriterionValue(value: "Store")); item2.Criteria.Add("action", new DecisionCriterionValue(value: "Checkout")); items.Add(item2); // Act var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier()); // Assert Assert.Empty(tree.Matches); var action = Assert.Single(tree.Criteria); Assert.Equal("action", action.Key); var buy = action.Branches["Buy"]; Assert.Empty(buy.Matches); var controller = Assert.Single(buy.Criteria); Assert.Equal("controller", controller.Key); var store = Assert.Single(controller.Branches); Assert.Equal("Store", store.Key); Assert.Empty(store.Value.Criteria); Assert.Same(item1, Assert.Single(store.Value.Matches)); var checkout = action.Branches["Checkout"]; Assert.Empty(checkout.Matches); controller = Assert.Single(checkout.Criteria); Assert.Equal("controller", controller.Key); store = Assert.Single(controller.Branches); Assert.Equal("Store", store.Key); Assert.Empty(store.Value.Criteria); Assert.Same(item2, Assert.Single(store.Value.Matches)); }
public void BuildTree_TrivialMatch() { // Arrange var items = new List <Item>(); var item = new Item(); items.Add(item); // Act var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier()); // Assert Assert.Empty(tree.Criteria); Assert.Same(item, Assert.Single(tree.Matches)); }
public void BuildTree_WithMultipleCriteria() { // Arrange var items = new List <Item>(); var item = new Item(); item.Criteria.Add("area", new DecisionCriterionValue(value: "Admin")); item.Criteria.Add("controller", new DecisionCriterionValue(value: "Users")); item.Criteria.Add("action", new DecisionCriterionValue(value: "AddUser")); items.Add(item); // Act var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier()); // Assert Assert.Empty(tree.Matches); var area = Assert.Single(tree.Criteria); Assert.Equal("area", area.Key); var admin = Assert.Single(area.Branches); Assert.Equal("Admin", admin.Key); Assert.Empty(admin.Value.Matches); var controller = Assert.Single(admin.Value.Criteria); Assert.Equal("controller", controller.Key); var users = Assert.Single(controller.Branches); Assert.Equal("Users", users.Key); Assert.Empty(users.Value.Matches); var action = Assert.Single(users.Value.Criteria); Assert.Equal("action", action.Key); var addUser = Assert.Single(action.Branches); Assert.Equal("AddUser", addUser.Key); Assert.Empty(addUser.Value.Criteria); Assert.Same(item, Assert.Single(addUser.Value.Matches)); }
public void TestElementaryChecks() { const int DefaultValue = -100; var elementToAction = new MutableIntMap <int>(); elementToAction.DefaultValue = DefaultValue; var bounds = new IntInterval(0, 9); var frequency = new UniformIntFrequency(bounds); elementToAction.Set(new IntArrow <int>(2, 100)); elementToAction.Set(new IntArrow <int>(5, 200)); var target = new DecisionTreeBuilder(platformInfo); var node = target.Build(elementToAction, bounds, frequency); PrintProgram(node, target.DefaultActionDecision); }
public void TestSameActionUnification() { var frequency = new UniformIntFrequency(new IntInterval(-100, 100)); const int DefaultValue = -100; var elementToAction = new MutableIntMap <int>(); elementToAction.DefaultValue = DefaultValue; elementToAction.Set(new IntArrow <int>(1, 1)); elementToAction.Set(new IntArrow <int>(3, 49, 1)); elementToAction.Set(new IntArrow <int>(51, 1)); elementToAction.Set(new IntArrow <int>(54, 100, 1)); var target = new DecisionTreeBuilder(platformInfo); var bounds = new IntInterval(0, 1000); var node = target.Build(elementToAction, bounds, frequency); PrintProgram(node); }
public static void List() { Console.WriteLine("Line to match:"); string line1 = Console.ReadLine(); Record toMatch = Record.FromString(line1); string[] finalDataSetList = File.ReadAllLines("c:/users/brush/desktop/finaldataset.csv"); Console.WriteLine("Searching for matches..."); for (int c = 1; c < finalDataSetList.Length; c++) { string finalDataSetRow = finalDataSetList[c]; string[] bits = finalDataSetRow.Split(','); if (bits[0] != "") { int enterpriseId = int.Parse(bits[0]); if (enterpriseId > 15374761) { Record comparisonRecord = Record.FromFinalDatasetString(bits); RecordPair pair = new RecordPair { Record1 = toMatch, Record2 = comparisonRecord, }; DecisionTree[] forest = DataLoader.LoadForestFromDirectory("C:/users/brush/desktop/forest"); bool isMatch = DecisionTreeBuilder.IsMatch(pair, forest, null); if (isMatch) { Console.WriteLine(comparisonRecord); Console.WriteLine(); } } } } }
public LinkGenerationDecisionTree(IReadOnlyList <OutboundMatch> entries) { // We split up the entries into: // 1. attribute routes - these go into the tree // 2. conventional routes - these are a list var attributedEntries = new List <OutboundMatch>(); _conventionalEntries = new List <OutboundMatch>(); // Anything with a RoutePattern.RequiredValueAny as a RequiredValue is a conventional route. // This is because RequiredValueAny acts as a wildcard, whereas an attribute route entry // is denormalized to contain an exact set of required values. // // We will only see conventional routes show up here for endpoint routing. for (var i = 0; i < entries.Count; i++) { var isAttributeRoute = true; var entry = entries[i]; foreach (var kvp in entry.Entry.RequiredLinkValues) { if (RoutePattern.IsRequiredValueAny(kvp.Value)) { isAttributeRoute = false; break; } } if (isAttributeRoute) { attributedEntries.Add(entry); } else { _conventionalEntries.Add(entry); } } _root = DecisionTreeBuilder <OutboundMatch> .GenerateTree( attributedEntries, new OutboundMatchClassifier()); }
public void BuildTreeFromDocument_HostileMocking_TreeIsProperlyBuilt() { //ARRANGE DecisionTreeBuilder builder = new DecisionTreeBuilder(); // ReSharper disable once InconsistentNaming var Morty = new Person("Morty", Gender.Male, Sex.Male, Orientation.Undefined, Guid.NewGuid()); var character = new global::RNPC.Core.Character(Morty, Archetype.TheInnocent) { FileController = new DecisionTreeFileController(), DecisionTreeBuilder = new DecisionTreeBuilder() }; //Hostile Mockery Action mockery = new Action { Tone = Tone.Mocking, EventType = EventType.Interaction, ActionType = ActionType.Verbal, Intent = Intent.Hostile, Message = "Did your mother dress you up this morning?", Target = character.MyName, EventName = "Mocking", Source = "The Bully" }; //ACT var rootNode = builder.BuildTreeFromDocument(new DecisionTreeFileController(), mockery, "Morty") as AbstractDecisionNode; //ASSERT //ASSERT Assert.IsNotNull(rootNode); //ajusted per event Assert.IsFalse(string.IsNullOrEmpty(rootNode.DefaultTreeReaction)); Assert.AreEqual(60, rootNode.ConfiguredPassFailValue); Assert.IsTrue(rootNode.LeftNode != null); Assert.IsTrue(rootNode.RightNode != null); }
public LinkGenerationDecisionTree(IReadOnlyList <OutboundMatch> entries) { _root = DecisionTreeBuilder <OutboundMatch> .GenerateTree( entries, new OutboundMatchClassifier()); _knownValues = new Dictionary <string, HashSet <object> >(StringComparer.OrdinalIgnoreCase); for (var i = 0; i < entries.Count; i++) { var entry = entries[i]; foreach (var kvp in entry.Entry.RequiredLinkValues) { if (!_knownValues.TryGetValue(kvp.Key, out var values)) { values = new HashSet <object>(RouteValueEqualityComparer.Default); _knownValues.Add(kvp.Key, values); } values.Add(kvp.Value ?? string.Empty); } } }
public void BuildTreeFromDocument_NeutralSalute_TreeIsProperlyBuilt() { //ARRANGE DecisionTreeBuilder builder = new DecisionTreeBuilder(); // ReSharper disable once InconsistentNaming var Morty = new Person("Morty", Gender.Male, Sex.Male, Orientation.Undefined, Guid.NewGuid()); var character = new global::RNPC.Core.Character(Morty, Archetype.TheInnocent) { FileController = new DecisionTreeFileController(), DecisionTreeBuilder = new DecisionTreeBuilder() }; //Neutral Salute Action salute = new Action { EventType = EventType.Interaction, ActionType = ActionType.NonVerbal, Intent = Intent.Neutral, Message = "", Target = character.MyName, EventName = "Salute", Source = "The Ambassador" }; //ACT var rootNode = builder.BuildTreeFromDocument(new DecisionTreeFileController(), salute, "Morty") as AbstractDecisionNode; //ASSERT //ASSERT Assert.IsNotNull(rootNode); //ajusted per event Assert.IsFalse(string.IsNullOrEmpty(rootNode.DefaultTreeReaction)); Assert.AreEqual(0, rootNode.ConfiguredPassFailValue); Assert.IsTrue(rootNode.LeftNode != null); Assert.IsTrue(rootNode.RightNode != null); }
public void TestBalanced() { var frequency = new MutableIntFrequency(); frequency.DefaultValue = 0.0000001; frequency.Set(new IntArrow <double>(1, 520.0)); frequency.Set(new IntArrow <double>(2, 49, 3.0)); frequency.Set(new IntArrow <double>(50, 236.0)); frequency.Set(new IntArrow <double>(51, 100, 2.0)); const int DefaultValue = -1; var elementToAction = new MutableIntMap <int>(); elementToAction.DefaultValue = DefaultValue; elementToAction.Set(new IntArrow <int>(1, 1)); elementToAction.Set(new IntArrow <int>(2, 49, 2)); elementToAction.Set(new IntArrow <int>(50, 3)); elementToAction.Set(new IntArrow <int>(51, 100, 4)); var target = new DecisionTreeBuilder(platformInfo); var bounds = new IntInterval(int.MinValue, int.MaxValue); var node = target.Build(elementToAction, bounds, frequency); PrintProgram(node, target.DefaultActionDecision); Assert.AreEqual(-1, node.Decide(int.MinValue)); Assert.AreEqual(-1, node.Decide(0)); Assert.AreEqual(1, node.Decide(1)); Assert.AreEqual(2, node.Decide(2)); Assert.AreEqual(2, node.Decide(49)); Assert.AreEqual(3, node.Decide(50)); Assert.AreEqual(4, node.Decide(51)); Assert.AreEqual(4, node.Decide(100)); Assert.AreEqual(-1, node.Decide(200)); Assert.AreEqual(-1, node.Decide(bounds.Last)); }
public void BuildTree_WithInteriorMatch() { // Arrange var items = new List <Item>(); var item1 = new Item(); item1.Criteria.Add("controller", new DecisionCriterionValue(value: "Store")); item1.Criteria.Add("action", new DecisionCriterionValue(value: "Buy")); items.Add(item1); var item2 = new Item(); item2.Criteria.Add("controller", new DecisionCriterionValue(value: "Store")); item2.Criteria.Add("action", new DecisionCriterionValue(value: "Checkout")); items.Add(item2); var item3 = new Item(); item3.Criteria.Add("action", new DecisionCriterionValue(value: "Buy")); items.Add(item3); // Act var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier()); // Assert Assert.Empty(tree.Matches); var action = Assert.Single(tree.Criteria); Assert.Equal("action", action.Key); var buy = action.Branches["Buy"]; Assert.Same(item3, Assert.Single(buy.Matches)); }
public void BuildTree_WithDivergentCriteria() { // Arrange var items = new List <Item>(); var item1 = new Item(); item1.Criteria.Add("controller", new DecisionCriterionValue(value: "Store")); item1.Criteria.Add("action", new DecisionCriterionValue(value: "Buy")); items.Add(item1); var item2 = new Item(); item2.Criteria.Add("controller", new DecisionCriterionValue(value: "Store")); item2.Criteria.Add("action", new DecisionCriterionValue(value: "Checkout")); items.Add(item2); var item3 = new Item(); item3.Criteria.Add("stub", new DecisionCriterionValue(value: "Bleh")); items.Add(item3); // Act var tree = DecisionTreeBuilder <Item> .GenerateTree(items, new ItemClassifier()); // Assert Assert.Empty(tree.Matches); var action = tree.Criteria[0]; Assert.Equal("action", action.Key); var stub = tree.Criteria[1]; Assert.Equal("stub", stub.Key); }
public static void DoIt(string misfitsFilePath, string answerKeyPath, string finalDataSetPath) { if (!File.Exists(misfitsFilePath)) { File.Create(misfitsFilePath).Close(); } Console.Write("Loading final data set..."); Dictionary <int, Record> finalDataSet = DataLoader.LoadFinalDataSet(finalDataSetPath); Console.WriteLine("...done"); List <RecordPair> trainingData = new List <RecordPair>(); Console.Write("Getting positive training data..."); List <RecordPair> allPositives = DataLoader.LoadAllPositivesFromAnswerKey(answerKeyPath, finalDataSet); trainingData.AddRange(allPositives); Console.WriteLine("...done"); Console.Write("Getting negative training data (misfits)..."); trainingData.AddRange(DataLoader.GetPairsFromMisfitsFile(misfitsFilePath, finalDataSet)); Console.WriteLine("...done"); Console.Write("Generating splitting questions for this iteration..."); int maximumEditDistance = 3; SplittingQuestion[] splittingQuestions = DecisionTreeBuilder.GenerateSplittingQuestions(maximumEditDistance); Console.WriteLine("...done"); // 1. get a random assort of training data. for (; ;) { DecisionTreeBuilder builder = new DecisionTreeBuilder(); Console.Write("Training..."); int positiveCount = trainingData.Count(n => n.IsMatch); int negativeCount = trainingData.Count(n => !n.IsMatch); Console.WriteLine($"\tThere are {positiveCount} positive instances and {negativeCount} negative instances in the data..."); DecisionTree trainedTree = builder.Train(trainingData, splittingQuestions, 1, 0, null); Console.WriteLine("...done."); Console.Write("Serializing to disk the latest tree..."); BinaryFormatter bf = new BinaryFormatter(); using (FileStream fout = File.Create("tree.dat")) { bf.Serialize(fout, trainedTree); } Console.WriteLine("...done"); Console.WriteLine("Now let's test the tree and find the misfits..."); List <RecordPair> misfits = new List <RecordPair>(); bool allDoneTraining = true; long runCounter = 0; int left = Console.CursorLeft; int top = Console.CursorTop; Parallel.ForEach(DataLoader.LoadNegativesFromAnswerKey(allPositives), (pair, state) => { Interlocked.Increment(ref runCounter); if (runCounter % 1000000 == 0) { lock (misfits) { Console.SetCursorPosition(left, top); Console.WriteLine($"\tExamined {runCounter.ToString("N0")} entries thus far. {misfits.Count.ToString("N0")} misfits found."); } } if (misfits.Count() < 100000) { if (DecisionTreeBuilder.IsMatch(pair, new DecisionTree[] { trainedTree }, null) != pair.IsMatch) { lock (misfits) { misfits.Add(pair); } allDoneTraining = false; } } else { state.Break(); } }); Console.WriteLine($"..done. {misfits.Count} misfits found."); if (allDoneTraining) { break; } else { Console.Write("Writing misfits to disk..."); //using (StreamWriter outputForThisRound = File.AppendText($"c:/users/brush/desktop/misfits/{DateTime.Now.ToString()}.txt")) { using (StreamWriter sw = File.AppendText(misfitsFilePath)) { foreach (RecordPair misfit in misfits) { string toWrite = $"{misfit.Record1.EnterpriseId},{misfit.Record2.EnterpriseId}"; sw.WriteLine(toWrite); //outputForThisRound.WriteLine(toWrite); } } } Console.WriteLine("...done"); Console.Write("Adding misfits to training data..."); int trainingDataOriginalSize = trainingData.Count; trainingData.AddRange(misfits); int trainingDataNowSize = trainingData.Count; Console.WriteLine($"...done. Training data increased by {trainingDataNowSize - trainingDataOriginalSize} records."); Console.WriteLine("Starting over!"); } } }
static void Test() { BinaryFormatter bf = new BinaryFormatter(); using (FileStream fs = File.OpenRead("serialized.dat")) { DecisionTreeNode node = bf.Deserialize(fs) as DecisionTreeNode; DecisionTreeOptions options = new DecisionTreeOptions { // TODO: Fill in MaximumNumberOfRecursionLevels = 25, NumberOfFeatures = 300, NumberOfThresholds = 35, OffsetXMax = 40, OffsetXMin = -40, OffsetYMax = 40, OffsetYMin = -40, OutOfRangeValue = 1000000, SplittingThresholdMax = .2f, SufficientGainLevel = 0, PercentageOfPixelsToUse = .9f, //DistanceThreshold = .1f, }; MRCFile file = MRCParser.Parse(Path.Combine("/home/brush/tomography2_fullsirtcliptrim.mrc")); MRCFrame frame = file.Frames[145]; LabeledTomogram tom = new LabeledTomogram(); tom.Width = frame.Width; tom.Height = frame.Height; tom.Data = new float[frame.Width * frame.Height]; for (int i = 0; i < frame.Data.Length; i++) { tom.Data[i] = frame.Data[i]; } //for (int y = 264, i = 0; y < 364; y++) //{ // for (int x = 501; x < 601; x++, i++) // { // tom.Data[i] = frame.Data[y * frame.Width + x]; // } //} float[] labels = DecisionTreeBuilder.Predict(tom, node, options); //Bitmap bmp = DataManipulator.Tomogram2Bitmap(tom); Bitmap bmp = Drawing.TomogramDrawing.PaintClassifiedPixelsOnTomogram(tom, labels); bmp.Save("/var/www/html/static/labeled_real.png", System.Drawing.Imaging.ImageFormat.Png); LabeledTomogram tom2 = DataReader.ReadDatFile("/home/brush/tom4/0.dat"); labels = DecisionTreeBuilder.Predict(tom2, node, options); //Bitmap bmp = DataManipulator.Tomogram2Bitmap(tom); bmp = Drawing.TomogramDrawing.PaintClassifiedPixelsOnTomogram(tom2, labels); bmp.Save("/var/www/html/static/labeled_simulated.png", System.Drawing.Imaging.ImageFormat.Png); } }
public LinkGenerationDecisionTree(IReadOnlyList <TreeRouteLinkGenerationEntry> entries) { _root = DecisionTreeBuilder <TreeRouteLinkGenerationEntry> .GenerateTree( entries, new AttributeRouteLinkGenerationEntryClassifier()); }
public LinkGenerationDecisionTree(IReadOnlyList <OutboundMatch> entries) { _root = DecisionTreeBuilder <OutboundMatch> .GenerateTree( entries, new OutboundMatchClassifier()); }
static void Train(int numberOfTrees, string outputDirectory, double subsamplingPercentage, double minGain, int maximumEditDistance) { Console.WriteLine("Train options:"); Console.WriteLine("\t1. Start full training."); Console.WriteLine("\t2. Start debug training from nohomo file."); Console.Write("Choice:"); int option = int.Parse(Console.ReadLine()); Stopwatch sw = new Stopwatch(); sw.Start(); List <RecordPair> trainingData = null; if (option == 1) { trainingData = DataLoader.BuildTrainingData("mrns.csv", "more.csv", "rejected.txt"); if (trainingData.Any(n => n.Record1 == null || n.Record2 == null)) { Console.WriteLine("YUP, ITS HERE"); } } else if (option == 2) { Console.Write("Nohomo file path:"); string filePath = Console.ReadLine().Replace("\"", ""); trainingData = DataLoader.LoadTrainingDataFromNoHomoFile(filePath); } // //List<RecordPair> trainingData = LoadTrainingData("D:/positives.csv", "D:/negatives.csv"); int numberPerTree = trainingData.Count / numberOfTrees; for (int c = 0; c < numberOfTrees; c++) { List <RecordPair> trainingDataSubset = new List <RecordPair>(); int startIndex = c * numberPerTree; int length = numberPerTree; if (c == numberOfTrees - 1) { length += trainingData.Count % numberPerTree; } for (int d = startIndex; d < (startIndex + length); d++) { trainingDataSubset.Add(trainingData[d]); } SplittingQuestion[] splittingQuestions = DecisionTreeBuilder.GenerateSplittingQuestions(maximumEditDistance); DecisionTreeBuilder treeBuilder = new DecisionTreeBuilder(); List <Tuple <SplittingQuestion, bool> > preComputedQuestions = new List <Tuple <SplittingQuestion, bool> >(); preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>( new SplittingQuestion { Field = FieldEnum.DOB, MatchType = MatchTypeEnum.EditDistance, MaximumEditDistance = 0, }, false)); preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>( new SplittingQuestion { Field = FieldEnum.MRN, MatchType = MatchTypeEnum.MRNDistance, MRNMaxDistance = 100, }, false)); preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>( new SplittingQuestion { Field = FieldEnum.LastName, MatchType = MatchTypeEnum.EditDistance, MaximumEditDistance = 1 }, false)); preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>( new SplittingQuestion { Field = FieldEnum.DOB, MatchType = MatchTypeEnum.EditDistance, MaximumEditDistance = 1, }, false)); preComputedQuestions.Add(new Tuple <SplittingQuestion, bool>(null, false)); DecisionTree tree = treeBuilder.Train(trainingDataSubset, splittingQuestions, subsamplingPercentage, minGain, null); BinaryFormatter bf = new BinaryFormatter(); using (FileStream fout = File.Create(Path.Combine(outputDirectory, $"tree{c}.dat"))) { bf.Serialize(fout, tree); } } sw.Stop(); Console.WriteLine($"Whole operation took {sw.ElapsedMilliseconds / 1000.0 / 60.0} minutes"); }