Beispiel #1
0
        private static void ReadData(ExecutionOption options, List <Article> articles)
        {
            if (!File.Exists(options.DataFile))
            {
                throw new ArgumentException(string.Format("File not found {0}", options.DataFile));
            }
            else
            {
                using (var sr = new StreamReader(options.DataFile))
                {
                    var reader = new CsvReader(sr);
                    reader.Configuration.HasHeaderRecord = false;

                    while (reader.Read())
                    {
                        var articleId = reader.GetField <int>(0);
                        var wordId    = reader.GetField <int>(1);
                        var wordCount = reader.GetField <int>(2);
                        articles[articleId - 1].WordCounts[wordId] = wordCount;

                        if (k < wordId)
                        {
                            k = wordId;
                        }
                    }
                }
            }
        }
Beispiel #2
0
        private static List <string> ReadGroups(ExecutionOption options)
        {
            List <string> groups = new List <string>();

            if (!File.Exists(options.GroupsFile))
            {
                throw new ArgumentException(string.Format("File not found {0}", options.LabelFile));
            }
            else
            {
                using (var sr = new StreamReader(options.GroupsFile))
                {
                    var reader = new CsvReader(sr);
                    reader.Configuration.HasHeaderRecord = false;

                    while (reader.Read())
                    {
                        var group = reader.GetField <string>(0);
                        groups.Add(group);
                    }
                }
            }

            return(groups);
        }
Beispiel #3
0
        private static List <Article> ReadLabels(ExecutionOption options, List <string> groups)
        {
            List <Article> articles = new List <Article>();

            if (!File.Exists(options.LabelFile))
            {
                throw new ArgumentException(string.Format("File not found {0}", options.LabelFile));
            }
            else
            {
                using (var sr = new StreamReader(options.LabelFile))
                {
                    var reader = new CsvReader(sr);
                    reader.Configuration.HasHeaderRecord = false;

                    while (reader.Read())
                    {
                        int     label = reader.GetField <int>(0);
                        Article a     = new Article
                        {
                            GroupId = label - 1
                        };

                        articles.Add(a);
                    }
                }
            }

            return(articles);
        }
Beispiel #4
0
        static void Main(string[] args)
        {
            ExecutionOption options = new ExecutionOption();

            if (CommandLine.Parser.Default.ParseArguments(args, options))
            {
                List <string>  groups   = ReadGroups(options);
                List <Article> articles = ReadLabels(options, groups);
                ReadData(options, articles);

                /*
                 * foreach (Article article in articles.Take(5))
                 * {
                 *  Console.WriteLine(article);
                 * }
                 */

                if (options.ExecutionMode == ExecutionMode.AllWayAverage)
                {
                    double[,] sum = new double[groups.Count, groups.Count];
                    int[,] count  = new int[groups.Count, groups.Count];

                    for (int i = 0; i < articles.Count; i++)
                    {
                        Article ai = articles[i];
                        for (int j = i + 1; j < articles.Count; j++)
                        {
                            Article aj = articles[j];

                            sum[ai.GroupId, aj.GroupId]   += GetDistance(ai.WordCounts, aj.WordCounts, options.SimilarityFunction);
                            count[ai.GroupId, aj.GroupId] += 1;
                        }
                    }

                    using (var sr = new StreamWriter("output.csv"))
                    {
                        var writer = new CsvWriter(sr);
                        for (int i = 0; i < groups.Count; i++)
                        {
                            for (int j = 0; j < groups.Count; j++)
                            {
                                double average    = j > i ? sum[i, j] / count[i, j] : sum[j, i] / count[j, i];
                                int    fieldCount = j > i ? count[i, j] : count[j, i];
                                if (fieldCount > 0)
                                {
                                    writer.WriteField(average);
                                }
                                else
                                {
                                    writer.WriteField(0.0);
                                }
                            }

                            writer.NextRecord();
                        }
                    }
                }
                else if (options.ExecutionMode == ExecutionMode.MostNearestNeighbor)
                {
                    int[,] count = new int[groups.Count, groups.Count];
                    for (int i = 0; i < articles.Count; i++)
                    {
                        double maxSum     = 0.0;
                        int    maxGroupId = -1;

                        Article ai = articles[i];
                        for (int j = 0; j < articles.Count; j++)
                        {
                            Article aj = articles[j];

                            if (ai.GroupId == aj.GroupId)
                            {
                                continue;
                            }

                            double distance = GetDistance(ai.WordCounts, aj.WordCounts, options.SimilarityFunction);

                            if (distance > maxSum)
                            {
                                maxSum     = distance;
                                maxGroupId = aj.GroupId;
                            }
                        }

                        if (maxGroupId >= 0)
                        {
                            count[ai.GroupId, maxGroupId] += 1;
                        }
                    }

                    using (var sr = new StreamWriter("output.csv"))
                    {
                        var writer = new CsvWriter(sr);
                        for (int i = 0; i < groups.Count; i++)
                        {
                            for (int j = 0; j < groups.Count; j++)
                            {
                                writer.WriteField(count[i, j]);
                            }

                            writer.NextRecord();
                        }
                    }
                }
                else if (options.ExecutionMode == ExecutionMode.MostNearestNeighborWithRandomProjection)
                {
                    var M      = Matrix <double> .Build;
                    var matrix = M.Random(options.RandomProjectionDimCount, k + 1, new Normal(0.0, 1.0));

                    DimensionReduceArticles(articles, matrix);

                    int[,] count = new int[groups.Count, groups.Count];
                    for (int i = 0; i < articles.Count; i++)
                    {
                        double maxSum     = 0.0;
                        int    maxGroupId = -1;

                        Article ai = articles[i];
                        for (int j = 0; j < articles.Count; j++)
                        {
                            Article aj = articles[j];

                            if (ai.GroupId == aj.GroupId)
                            {
                                continue;
                            }

                            double distance = GetDistance(ai.WordCountVector, aj.WordCountVector, options.SimilarityFunction);

                            if (distance > maxSum)
                            {
                                maxSum     = distance;
                                maxGroupId = aj.GroupId;
                            }
                        }

                        if (maxGroupId >= 0)
                        {
                            count[ai.GroupId, maxGroupId] += 1;
                        }
                    }

                    using (var sr = new StreamWriter("output.csv"))
                    {
                        var writer = new CsvWriter(sr);
                        for (int i = 0; i < groups.Count; i++)
                        {
                            for (int j = 0; j < groups.Count; j++)
                            {
                                writer.WriteField(count[i, j]);
                            }

                            writer.NextRecord();
                        }
                    }
                }
                else if (options.ExecutionMode == ExecutionMode.RandomProjectionCosineScatter)
                {
                    throw new NotSupportedException("Unknown execution mode");
                }
                else
                {
                    throw new NotSupportedException("Unknown execution mode");
                }

                Console.WriteLine("----------------------------------------------------------------------");
            }
        }