public ExampleSet GetProof(SparseVector x) { ExampleSet res = new ExampleSet(); List <ExampleDistancePair> list = new List <ExampleDistancePair>(); foreach (Example e in m_t_set.Examples) { list.Add(new ExampleDistancePair(e, SparseVector.Distance(x, e.X))); } list.Sort(); int[] votes = new int[m_catnum]; for (int i = 0; i < votes.Length; i++) { votes[i] = 0; } for (int i = 0; i < this.m_k; i++) { ExampleDistancePair pair = list[i]; res.AddExample(pair.Example); } return(res); }
/// <summary> /// foamliu, 2009/04/15, 生成样本. /// </summary> /// <param name="set"></param> private static ExampleSet GetExamples(CategoryCollection collect) { const int Rows = 4; const int Columns = 4; const int CellWidth = 100; const int CellHeight = 100; const int ExampleNumber = 640; ExampleSet set = new ExampleSet(); set.Examples.Clear(); Random rand = new Random(); for (int i = 0; i < ExampleNumber; i++) { int x = (int)(rand.NextDouble() * Columns * CellWidth); int y = (int)(rand.NextDouble() * Rows * CellHeight); Example e = new Example(); e.X = new SparseVector(2); e.X[0] = x; e.X[1] = y; e.Label = collect.GetCategoryById( GetCat(x, y, CellWidth, CellHeight)); set.AddExample(e); } return(set); }
/// <summary> /// foamliu, 2009/12/21, please make sure you've uncompressed "2_newsgroups.7z" in the "data" folder. /// </summary> /// <returns></returns> private static ClassificationProblem CreateText() { const string DataFolder = @"..\data\2_newsgroups"; ClassificationProblem problem = new ClassificationProblem(); ExampleSet t_set = new ExampleSet(); ExampleSet v_set = new ExampleSet(); CategoryCollection collect = new CategoryCollection(); collect.Add(new Category(+1, "+1")); collect.Add(new Category(-1, "-1")); problem.Dimension = 2; problem.CategoryCollection = collect; DirectoryInfo dataFolder = new DirectoryInfo(DataFolder); DirectoryInfo[] subfolders = dataFolder.GetDirectories(); int count = 0; for (int i = 0; i < subfolders.Count(); i++) { DirectoryInfo categoryFolder = subfolders[i]; int cat = i * 2 - 1; // for all the text files in each category FileInfo[] files = categoryFolder.GetFiles(); count = 0; int trainSetCount = Convert.ToInt32(Constants.TrainingSetRatio * files.Count()); for (int j = 0; j < files.Count(); j++) { FileInfo textFile = files[j]; Example e = new Example(); if (++count < trainSetCount) { t_set.AddExample(e); } else { v_set.AddExample(e); } } } problem.TrainingSet = t_set; problem.ValidationSet = v_set; return(problem); }
public void Test() { int l = 30; int k = 10; double ratioSeparable = 0; int numSeparable = 0; ExampleSet set = new ExampleSet(); for (int d = 10; d < 50; d = d + 10) { numSeparable = 0; for (int n = 0; n < k; n++) { set.Examples.Clear(); for (int i = 0; i < l; i++) { SparseVector x = new SparseVector(d); for (int j = 0; j < d; j++) { x[j] = m_rand.NextDouble(); } Category c = GetRandCategory(); Example e = new Example(c); e.X = x; set.AddExample(e); } SimpleLLM llm = new SimpleLLM(set, d); //Logging.Info(string.Format("IsLinearSeparable: {0}", llm.IsLinearSeparable())); //System.Console.WriteLine(string.Format("IsLinearSeparable: {0}", llm.IsLinearSeparable())); if (llm.IsLinearSeparable()) { numSeparable++; } } ratioSeparable = 1.0 * numSeparable / k; System.Console.WriteLine(string.Format("d: {0}, l: {1}, Separable ratio: {2}", d, l, ratioSeparable)); } }