public void GetDocsOverTopicsDistrib()
        {
            const double threshold = 0.09;
            var topicExclusions = new[]
                                  	{
                                  		1, 6, 7, 11, 13, 15, 16, 17, 21, 24, 34, 36, 38, 41, 47, 49, 50, 51, 57, 60, 61, 64, 65, 76,
                                  		77, 78, 83, 87, 89
                                  	};

            var distrib = topicsStatistics.MaxThresholdDocsOverTopicsDistribution(threshold, topicExclusions);
            var converter = new TopicConverter(Program.TopicsWordsFileName);

            File.WriteAllLines(Program.StatisticsDirectory + "DocsOverTopicsDistrib.txt",
                               distrib.OrderByDescending(it => it.Value).Select(
                               	it => it.Key + "\t" + String.Join(", ", converter.GetTopicWords(it.Key, 5)) + "\t" + it.Value));
        }
        public void GenerateTopicRelationsMatrix()
        {
            var topicConverter = new TopicConverter(Program.TopicsWordsFileName);
            const int nTopicWords = 5;

            var relMatrix = userTopicsWalking.GetStochasticMatrix(topicExclusions);
            var outString = "\t";

            for (int i = 0; i < Program.TopicsCount; i++)
            {
                if (!topicExclusions.Contains(i))
                {
                    outString += String.Join("_", topicConverter.GetTopicWords(i, nTopicWords)) + "\t";
                }
            }
            outString += "\n";
            for (int i = 0; i < relMatrix.GetLength(0); i++)
            {
                if (topicExclusions.Contains(i)) continue;
                outString += String.Join("_", topicConverter.GetTopicWords(i, nTopicWords)) + "\t";
                for (int j = 0; j < relMatrix.GetLength(1); j++)
                {
                    if (!topicExclusions.Contains(j))
                    {
                        outString += relMatrix[i, j].ToString(CultureInfo.InvariantCulture) + "\t";
                    }
                }
                outString += "\n";
            }
            File.WriteAllText("TopicRelationsMatrix.txt", outString, Encoding.GetEncoding(1251));
        }
        /// <summary>
        /// User topics walking graph for Gephi generation
        /// </summary>
        /// <param name="fileToSave"></param>
        /// <param name="topicExclusions"></param>
        /// <param name="scale">[0 .. 5) - more detailed, [5 .. ) - less detailed graph</param>
        public void GenerateTopicWalkingGraph(string fileToSave, HashSet<int> topicExclusions, int scale = 5)
        {
            Console.WriteLine(String.Join("\n", topicMoves.OrderByDescending(it => it.Value)));

            var topicConverter = new TopicConverter(Program.TopicsWordsFileName);

            var graphEdges = topicMoves
                .Where(it => it.Value > scale)
                .Where(it => !topicExclusions.Contains(it.Key.Items[0]) && !topicExclusions.Contains(it.Key.Items[1]))
                .Select(it => new { seq = it.Key, chance = GetGivenProbability(it.Key)})
                .Select(row => new Edge{SourceId = row.seq.Items[0], DestinationId = row.seq.Items[1], Weight = row.chance})
                .ToList();

            var topics = graphEdges.Aggregate(new HashSet<int>(), (acc, e) =>
                                                     	{
                                                     		acc.Add(e.SourceId);
                                                     		acc.Add(e.DestinationId);
                                                     		return acc;
                                                     	});
            var graphVertices = topics.Select(t => new Vertex
                                              	{
                                              		Id = t,
                                                    Label = String.Join(", ", topicConverter.GetTopicWords(t, 10)),
                                                    Weight = topicDistribution[t]
                                              	});
            var graph = new GraphBuilder(graphVertices, graphEdges);

            graph.ExportToGVFormat(fileToSave, "TopikWalkingGraph", isOriented:true);
        }
Beispiel #4
0
        private void PrintTopCorrelations(IDictionary<DateTime, double> cloudinessDistrib, int topCount)
        {
            var allTopicProbabilities = userStatistics.GetAllWeekRegionTopicProbabilities(Region);

            var topicConverter = new TopicConverter(Program.TopicsWordsFileName);

            var corrs = new List<Tuple<int, double>>();
            for (var i = 0; i < Program.TopicsCount; i++)
            {
                corrs.Add(Tuple.Create(i,
                                       Utilits.GetTimeCorrelation(
                                       	allTopicProbabilities.ToDictionary(p => p.Key, p => p.Value[i]),
                                       	cloudinessDistrib)));
            }

            Console.WriteLine(String.Join("\n",
                                          corrs.OrderByDescending(cor => Math.Abs(cor.Item2)).Take(topCount).Select(
                                            cor => cor.Item1 + "\t" + String.Join(", ", topicConverter.GetTopicWords(cor.Item1, 4)) + "\t" + cor.Item2)));
        }