public static void SaveTab(string[] featureNames, LabeledDataset <BlogMetaData, SparseVector <double> > dataset, ClassType classType, string fileName) { using (StreamWriter w = new StreamWriter(fileName, /*append=*/ false, Encoding.ASCII)) { for (int i = 0; i < featureNames.Length; i++) { w.Write(featureNames[i] + "\t"); } w.WriteLine("author"); for (int i = 0; i < featureNames.Length; i++) { w.Write("c\t"); } w.WriteLine("d"); for (int i = 0; i < featureNames.Length; i++) { w.Write("\t"); } w.WriteLine("class"); foreach (LabeledExample <BlogMetaData, SparseVector <double> > lblEx in dataset) { foreach (string lblStr in AnalysisUtils.GetLabel(lblEx.Label, classType).Split(',')) { if (lblStr != "") { foreach (IdxDat <double> item in lblEx.Example) { w.Write(item.Dat + "\t"); } w.WriteLine(lblStr); } } } } }
public static void SaveArff(string[] featureNames, LabeledDataset <BlogMetaData, SparseVector <double> > dataset, ClassType classType, string fileName) { using (StreamWriter w = new StreamWriter(fileName, /*append=*/ false, Encoding.ASCII)) { w.WriteLine("@RELATION r" + Guid.NewGuid().ToString("N")); w.WriteLine(); foreach (string featureName in featureNames) { w.WriteLine("@ATTRIBUTE " + featureName + " NUMERIC"); } w.Write("@ATTRIBUTE class "); ArrayList <string> classes = new ArrayList <string>(); ((IEnumerable <LabeledExample <BlogMetaData, SparseVector <double> > >)dataset).ToList().ForEach( x => classes.AddRange(AnalysisUtils.GetLabel(x.Label, classType).Split(','))); classes = new ArrayList <string>(classes.Distinct()); w.WriteLine(classes.ToString().Replace("( ", "{").Replace(" )", "}").Replace(" ", ",")); w.WriteLine(); w.WriteLine("@DATA"); foreach (LabeledExample <BlogMetaData, SparseVector <double> > lblEx in dataset) { foreach (string lblStr in AnalysisUtils.GetLabel(lblEx.Label, classType).Split(',')) { if (lblStr != "") { foreach (IdxDat <double> item in lblEx.Example) { w.Write(item.Dat + ","); } w.WriteLine(lblStr); } } } } }
static LabeledDataset <string, SparseVector <double> > CreateSingleFeatureDataset(LabeledDataset <BlogMetaData, SparseVector <double> > srcDataset, ClassType classType, int fIdx) { SparseVector <double> minValues, maxValues; GetExtremes(srcDataset, out minValues, out maxValues); LabeledDataset <string, SparseVector <double> > dataset = new LabeledDataset <string, SparseVector <double> >(); ((IEnumerable <LabeledExample <BlogMetaData, SparseVector <double> > >)srcDataset).ToList() .ForEach(x => dataset.Add(new LabeledExample <string, SparseVector <double> >(AnalysisUtils.GetLabel(x.Label, classType), new SparseVector <double>( new double[] { (x.Example[fIdx] - minValues[fIdx]) / (maxValues[fIdx] - minValues[fIdx]) } // simple normalization )))); return(dataset); }