Beispiel #1
0
 public static void SaveTab(string[] featureNames, LabeledDataset <BlogMetaData, SparseVector <double> > dataset, ClassType classType, string fileName)
 {
     using (StreamWriter w = new StreamWriter(fileName, /*append=*/ false, Encoding.ASCII))
     {
         for (int i = 0; i < featureNames.Length; i++)
         {
             w.Write(featureNames[i] + "\t");
         }
         w.WriteLine("author");
         for (int i = 0; i < featureNames.Length; i++)
         {
             w.Write("c\t");
         }
         w.WriteLine("d");
         for (int i = 0; i < featureNames.Length; i++)
         {
             w.Write("\t");
         }
         w.WriteLine("class");
         foreach (LabeledExample <BlogMetaData, SparseVector <double> > lblEx in dataset)
         {
             foreach (string lblStr in AnalysisUtils.GetLabel(lblEx.Label, classType).Split(','))
             {
                 if (lblStr != "")
                 {
                     foreach (IdxDat <double> item in lblEx.Example)
                     {
                         w.Write(item.Dat + "\t");
                     }
                     w.WriteLine(lblStr);
                 }
             }
         }
     }
 }
Beispiel #2
0
 public static void SaveArff(string[] featureNames, LabeledDataset <BlogMetaData, SparseVector <double> > dataset, ClassType classType, string fileName)
 {
     using (StreamWriter w = new StreamWriter(fileName, /*append=*/ false, Encoding.ASCII))
     {
         w.WriteLine("@RELATION r" + Guid.NewGuid().ToString("N"));
         w.WriteLine();
         foreach (string featureName in featureNames)
         {
             w.WriteLine("@ATTRIBUTE " + featureName + " NUMERIC");
         }
         w.Write("@ATTRIBUTE class ");
         ArrayList <string> classes = new ArrayList <string>();
         ((IEnumerable <LabeledExample <BlogMetaData, SparseVector <double> > >)dataset).ToList().ForEach(
             x => classes.AddRange(AnalysisUtils.GetLabel(x.Label, classType).Split(',')));
         classes = new ArrayList <string>(classes.Distinct());
         w.WriteLine(classes.ToString().Replace("( ", "{").Replace(" )", "}").Replace(" ", ","));
         w.WriteLine();
         w.WriteLine("@DATA");
         foreach (LabeledExample <BlogMetaData, SparseVector <double> > lblEx in dataset)
         {
             foreach (string lblStr in AnalysisUtils.GetLabel(lblEx.Label, classType).Split(','))
             {
                 if (lblStr != "")
                 {
                     foreach (IdxDat <double> item in lblEx.Example)
                     {
                         w.Write(item.Dat + ",");
                     }
                     w.WriteLine(lblStr);
                 }
             }
         }
     }
 }
Beispiel #3
0
        static LabeledDataset <string, SparseVector <double> > CreateSingleFeatureDataset(LabeledDataset <BlogMetaData, SparseVector <double> > srcDataset, ClassType classType, int fIdx)
        {
            SparseVector <double> minValues, maxValues;

            GetExtremes(srcDataset, out minValues, out maxValues);
            LabeledDataset <string, SparseVector <double> > dataset = new LabeledDataset <string, SparseVector <double> >();

            ((IEnumerable <LabeledExample <BlogMetaData, SparseVector <double> > >)srcDataset).ToList()
            .ForEach(x => dataset.Add(new LabeledExample <string, SparseVector <double> >(AnalysisUtils.GetLabel(x.Label, classType),
                                                                                          new SparseVector <double>(
                                                                                              new double[] { (x.Example[fIdx] - minValues[fIdx]) / (maxValues[fIdx] - minValues[fIdx]) } // simple normalization
                                                                                              ))));
            return(dataset);
        }