public ExampleSet GetProof(SparseVector x) { ExampleSet res = new ExampleSet(); List<ExampleDistancePair> list = new List<ExampleDistancePair>(); foreach (Example e in m_t_set.Examples) { list.Add(new ExampleDistancePair(e, SparseVector.Distance(x, e.X))); } list.Sort(); int[] votes = new int[m_catnum]; for (int i = 0; i < votes.Length; i++) { votes[i] = 0; } for (int i = 0; i < this.m_k; i++) { ExampleDistancePair pair = list[i]; res.AddExample(pair.Example); } return res; }
public LinearLeraningMachine(ClassificationProblem problem) { this.m_problem = problem; this.m_t_set = problem.TrainingSet; //this.m_problem.RetrieveVocabulary(out this.m_voc); this.m_l = m_t_set.Examples.Count; //this.m_weight = new SparseVector(m_voc.Count); }
public AdaBoost(ExampleSet t_set, int t, int n) { m_t_set = t_set; m_m = m_t_set.Examples.Count; m_dist = new double[this.m_m]; m_t = t; m_n = n; m_a = new double[m_t]; m_h = new WeakLearn[m_t]; }
public void Test() { int l = 30; int k = 10; double ratioSeparable = 0; int numSeparable = 0; ExampleSet set = new ExampleSet(); for (int d = 10; d < 50; d=d+10) { numSeparable = 0; for (int n = 0; n < k; n++) { set.Examples.Clear(); for (int i = 0; i < l; i++) { SparseVector x = new SparseVector(d); for (int j = 0; j < d; j++) { x[j] = m_rand.NextDouble(); } Category c = GetRandCategory(); Example e = new Example(c); e.X = x; set.AddExample(e); } SimpleLLM llm = new SimpleLLM(set, d); //Logging.Info(string.Format("IsLinearSeparable: {0}", llm.IsLinearSeparable())); //System.Console.WriteLine(string.Format("IsLinearSeparable: {0}", llm.IsLinearSeparable())); if (llm.IsLinearSeparable()) { numSeparable++; } } ratioSeparable = 1.0 * numSeparable / k; System.Console.WriteLine(string.Format("d: {0}, l: {1}, Separable ratio: {2}", d, l, ratioSeparable)); } }
private SparseVector m_weight; // weight vector #endregion Fields #region Constructors public Binary_SVM_SMO(ClassificationProblem problem) { this.m_problem = problem; this.m_t_set = this.m_problem.TrainingSet; // this.m_problem.RetrieveVocabulary(out this.m_voc); this.m_l = m_t_set.Examples.Count; this.m_alpha = new double[m_l]; this.m_error = new double[m_l]; this.m_kernel = new LinearKernel(); this.m_NonBound = new List<int>(); this.m_rand = new Random(); this.m_weight = new SparseVector(problem.Dimension); // foamliu, 2009/01/12, default values this.m_c = Constants.SVM_C; this.m_eta = Constants.SVM_Eta; this.m_tolerance = Constants.SVM_Tolerance; this.m_epsilon = Constants.SVM_Epsilon; }
/// <summary> /// foamliu, 2009/12/21, please make sure you've uncompressed "2_newsgroups.7z" in the "data" folder. /// </summary> /// <returns></returns> private static ClassificationProblem CreateText() { const string DataFolder = @"..\data\2_newsgroups"; ClassificationProblem problem = new ClassificationProblem(); ExampleSet t_set = new ExampleSet(); ExampleSet v_set = new ExampleSet(); CategoryCollection collect = new CategoryCollection(); collect.Add(new Category(+1, "+1")); collect.Add(new Category(-1, "-1")); problem.Dimension = 2; problem.CategoryCollection = collect; DirectoryInfo dataFolder = new DirectoryInfo(DataFolder); DirectoryInfo[] subfolders = dataFolder.GetDirectories(); int count = 0; for (int i = 0; i < subfolders.Count(); i++) { DirectoryInfo categoryFolder = subfolders[i]; int cat = i * 2 - 1; // for all the text files in each category FileInfo[] files = categoryFolder.GetFiles(); count = 0; int trainSetCount = Convert.ToInt32(Constants.TrainingSetRatio * files.Count()); for (int j = 0; j < files.Count(); j++) { FileInfo textFile = files[j]; Example e = new Example(); if (++count < trainSetCount) { t_set.AddExample(e); } else { v_set.AddExample(e); } } } problem.TrainingSet = t_set; problem.ValidationSet = v_set; return problem; }
/// <summary> /// foamliu, 2009/04/15, 生成样本. /// </summary> /// <param name="set"></param> private static ExampleSet GetExamples(CategoryCollection collect) { const int Rows = 4; const int Columns = 4; const int CellWidth = 100; const int CellHeight = 100; const int ExampleNumber = 640; ExampleSet set = new ExampleSet(); set.Examples.Clear(); Random rand = new Random(); for (int i = 0; i < ExampleNumber; i++) { int x = (int)(rand.NextDouble() * Columns * CellWidth); int y = (int)(rand.NextDouble() * Rows * CellHeight); Example e = new Example(); e.X = new SparseVector(2); e.X[0] = x; e.X[1] = y; e.Label = collect.GetCategoryById( GetCat(x, y, CellWidth, CellHeight)); set.AddExample(e); } return set; }
public WeakLearn(int index, ExampleSet t_set) { m_index = index; m_t_set = t_set; m_m = t_set.Count; }
public SimpleLLM(ExampleSet t_set, int d) { this.m_t_set = t_set; this.m_l = t_set.Examples.Count; this.m_weight = new SparseVector(d); }
/// <summary> /// No bias /// </summary> /// <param name="t_Set"></param> /// <param name="x"></param> /// <returns></returns> private double Calculate_F(ExampleSet t_Set, SparseVector x) { double f = 0.0; for (int i = 0; i < m_Alpha.Length; i++) { f += m_Alpha[i] * t_Set.Examples[i].Label.Id * m_kernel.Compute(t_Set.Examples[i].X, x); } return f; }
private void CalculateWeight(ExampleSet t_Set) { for (int i = 0; i < l; i++) { m_weight.Add((t_Set.Examples[i].Label.Id * m_Alpha[i]) * t_Set.Examples[i].X); } }
private double CalculateSVM_W(ExampleSet t_Set) { double W = 0.0; double temp = 0.0; for (int i = 0; i < m_Alpha.Length; i++) { temp += m_Alpha[i]; } W = temp; temp = 0.0; for (int i = 0; i < m_Alpha.Length; i++) { for (int j = 0; j < m_Alpha.Length; j++) { temp += t_Set.Examples[i].Label.Id * t_Set.Examples[j].Label.Id * m_Alpha[i] * m_Alpha[j] * m_kernel.Compute(t_Set.Examples[i].X, t_Set.Examples[j].X); } } W = W - temp / 2; return W; }
public void PredictText(ExampleSet t_Set, Example text, ref ClassificationResult result) { double f; f = Calculate_F(t_Set, text.X); if (f >= 0) { result.ResultCategoryId = +1; } else { result.ResultCategoryId = -1; } }