private void doExport(string fpath, string opath) { var sample = new ClassifiedSample <string>(); using (var srcFile = File.Open(fpath, FileMode.Open, FileAccess.Read)) using (var srcReader = new StreamReader(srcFile)) { var line = srcReader.ReadLine(); var segs = line.Split(SEPARATOR, StringSplitOptions.RemoveEmptyEntries); var cls = m_Classes[segs[0]]; var doc = segs[1]; sample.Add(doc, cls); } var vocabulary = Alg.ExtractVocabulary(sample); var dim = vocabulary.Count; var builder = new StringBuilder(); using (var outFile = File.Open(opath, FileMode.CreateNew, FileAccess.Write)) using (var outWriter = new StreamWriter(outFile)) { for (int i = 0; i < dim; i++) { builder.AppendFormat("{0},", vocabulary[i]); } builder.Append("_class,_value,_training"); outWriter.WriteLine(builder.ToString()); foreach (var pData in sample) { var doc = pData.Key; var cls = pData.Value; bool isEmpty; var data = Alg.ExtractFeatureVector(doc, out isEmpty); if (isEmpty) { continue; } builder.Clear(); for (int i = 0; i < dim; i++) { builder.AppendFormat("{0},", data[i]); } builder.AppendFormat("{0},{1},{2}", cls.Name, cls.Value, 1); outWriter.WriteLine(builder.ToString()); } } }