public void Test() { int l = 30; int k = 10; double ratioSeparable = 0; int numSeparable = 0; ExampleSet set = new ExampleSet(); for (int d = 10; d < 50; d=d+10) { numSeparable = 0; for (int n = 0; n < k; n++) { set.Examples.Clear(); for (int i = 0; i < l; i++) { SparseVector x = new SparseVector(d); for (int j = 0; j < d; j++) { x[j] = m_rand.NextDouble(); } Category c = GetRandCategory(); Example e = new Example(c); e.X = x; set.AddExample(e); } SimpleLLM llm = new SimpleLLM(set, d); //Logging.Info(string.Format("IsLinearSeparable: {0}", llm.IsLinearSeparable())); //System.Console.WriteLine(string.Format("IsLinearSeparable: {0}", llm.IsLinearSeparable())); if (llm.IsLinearSeparable()) { numSeparable++; } } ratioSeparable = 1.0 * numSeparable / k; System.Console.WriteLine(string.Format("d: {0}, l: {1}, Separable ratio: {2}", d, l, ratioSeparable)); } }
/// <summary> /// foamliu, 2009/12/21, please make sure you've uncompressed "2_newsgroups.7z" in the "data" folder. /// </summary> /// <returns></returns> private static ClassificationProblem CreateText() { const string DataFolder = @"..\data\2_newsgroups"; ClassificationProblem problem = new ClassificationProblem(); ExampleSet t_set = new ExampleSet(); ExampleSet v_set = new ExampleSet(); CategoryCollection collect = new CategoryCollection(); collect.Add(new Category(+1, "+1")); collect.Add(new Category(-1, "-1")); problem.Dimension = 2; problem.CategoryCollection = collect; DirectoryInfo dataFolder = new DirectoryInfo(DataFolder); DirectoryInfo[] subfolders = dataFolder.GetDirectories(); int count = 0; for (int i = 0; i < subfolders.Count(); i++) { DirectoryInfo categoryFolder = subfolders[i]; int cat = i * 2 - 1; // for all the text files in each category FileInfo[] files = categoryFolder.GetFiles(); count = 0; int trainSetCount = Convert.ToInt32(Constants.TrainingSetRatio * files.Count()); for (int j = 0; j < files.Count(); j++) { FileInfo textFile = files[j]; Example e = new Example(); if (++count < trainSetCount) { t_set.AddExample(e); } else { v_set.AddExample(e); } } } problem.TrainingSet = t_set; problem.ValidationSet = v_set; return problem; }
/// <summary> /// foamliu, 2009/04/15, 生成样本. /// </summary> /// <param name="set"></param> private static ExampleSet GetExamples(CategoryCollection collect) { const int Rows = 4; const int Columns = 4; const int CellWidth = 100; const int CellHeight = 100; const int ExampleNumber = 640; ExampleSet set = new ExampleSet(); set.Examples.Clear(); Random rand = new Random(); for (int i = 0; i < ExampleNumber; i++) { int x = (int)(rand.NextDouble() * Columns * CellWidth); int y = (int)(rand.NextDouble() * Rows * CellHeight); Example e = new Example(); e.X = new SparseVector(2); e.X[0] = x; e.X[1] = y; e.Label = collect.GetCategoryById( GetCat(x, y, CellWidth, CellHeight)); set.AddExample(e); } return set; }
/// <summary> /// foamliu, 2008/12/30. /// Notes: the order of presentation of training examples should be randomized from epoch /// to epoch. /// /// </summary> private void ShuffleTrainSet() { ExampleSet t_set = this.TrainSet; int num = t_set.Examples.Count; Random rand = new Random(); int[] cards = new int[num]; for (int i = 0; i < num; i++) cards[i] = i; for (int i = 0; i < num; i++) { int temp; int j = (int)(rand.NextDouble() * num); // 0 - (num-1) temp = cards[i]; cards[i] = cards[j]; cards[j] = temp; } Example[] examples = new Example[num]; for (int i = 0; i < num; i++) { examples[i] = t_set.Examples[cards[i]]; } t_set.Examples.Clear(); for (int i = 0; i < num; i++) { t_set.Examples.Add(examples[i]); } }
public ExampleDistancePair(Example example, double distance) { this.m_example = example; this.m_distance = distance; }
public void AddExample(Example example) { m_Collection.Add(example); }
public void PredictText(ExampleSet t_Set, Example text, ref ClassificationResult result) { double f; f = Calculate_F(t_Set, text.X); if (f >= 0) { result.ResultCategoryId = +1; } else { result.ResultCategoryId = -1; } }
public int Predict(Example example) { return +1; }
private int PredictText(Example example) { double f; f = SparseVector.DotProduct(m_weight, example.X)/* + m_b*/; if (f >= 0) return +1; else return -1; }