Beispiel #1
0
        private void doExport(string fpath, string opath)
        {
            var sample = new ClassifiedSample <string>();

            using (var srcFile = File.Open(fpath, FileMode.Open, FileAccess.Read))
                using (var srcReader = new StreamReader(srcFile))
                {
                    var line = srcReader.ReadLine();
                    var segs = line.Split(SEPARATOR, StringSplitOptions.RemoveEmptyEntries);
                    var cls  = m_Classes[segs[0]];
                    var doc  = segs[1];

                    sample.Add(doc, cls);
                }

            var vocabulary = Alg.ExtractVocabulary(sample);
            var dim        = vocabulary.Count;
            var builder    = new StringBuilder();

            using (var outFile = File.Open(opath, FileMode.CreateNew, FileAccess.Write))
                using (var outWriter = new StreamWriter(outFile))
                {
                    for (int i = 0; i < dim; i++)
                    {
                        builder.AppendFormat("{0},", vocabulary[i]);
                    }
                    builder.Append("_class,_value,_training");

                    outWriter.WriteLine(builder.ToString());

                    foreach (var pData in sample)
                    {
                        var  doc = pData.Key;
                        var  cls = pData.Value;
                        bool isEmpty;
                        var  data = Alg.ExtractFeatureVector(doc, out isEmpty);
                        if (isEmpty)
                        {
                            continue;
                        }

                        builder.Clear();
                        for (int i = 0; i < dim; i++)
                        {
                            builder.AppendFormat("{0},", data[i]);
                        }
                        builder.AppendFormat("{0},{1},{2}", cls.Name, cls.Value, 1);

                        outWriter.WriteLine(builder.ToString());
                    }
                }
        }