public void GenerateTopicRelationsMatrix() { var topicConverter = new TopicConverter(Program.TopicsWordsFileName); const int nTopicWords = 5; var relMatrix = userTopicsWalking.GetStochasticMatrix(topicExclusions); var outString = "\t"; for (int i = 0; i < Program.TopicsCount; i++) { if (!topicExclusions.Contains(i)) { outString += String.Join("_", topicConverter.GetTopicWords(i, nTopicWords)) + "\t"; } } outString += "\n"; for (int i = 0; i < relMatrix.GetLength(0); i++) { if (topicExclusions.Contains(i)) continue; outString += String.Join("_", topicConverter.GetTopicWords(i, nTopicWords)) + "\t"; for (int j = 0; j < relMatrix.GetLength(1); j++) { if (!topicExclusions.Contains(j)) { outString += relMatrix[i, j].ToString(CultureInfo.InvariantCulture) + "\t"; } } outString += "\n"; } File.WriteAllText("TopicRelationsMatrix.txt", outString, Encoding.GetEncoding(1251)); }
public void GetDocsOverTopicsDistrib() { const double threshold = 0.09; var topicExclusions = new[] { 1, 6, 7, 11, 13, 15, 16, 17, 21, 24, 34, 36, 38, 41, 47, 49, 50, 51, 57, 60, 61, 64, 65, 76, 77, 78, 83, 87, 89 }; var distrib = topicsStatistics.MaxThresholdDocsOverTopicsDistribution(threshold, topicExclusions); var converter = new TopicConverter(Program.TopicsWordsFileName); File.WriteAllLines(Program.StatisticsDirectory + "DocsOverTopicsDistrib.txt", distrib.OrderByDescending(it => it.Value).Select( it => it.Key + "\t" + String.Join(", ", converter.GetTopicWords(it.Key, 5)) + "\t" + it.Value)); }
/// <summary> /// User topics walking graph for Gephi generation /// </summary> /// <param name="fileToSave"></param> /// <param name="topicExclusions"></param> /// <param name="scale">[0 .. 5) - more detailed, [5 .. ) - less detailed graph</param> public void GenerateTopicWalkingGraph(string fileToSave, HashSet<int> topicExclusions, int scale = 5) { Console.WriteLine(String.Join("\n", topicMoves.OrderByDescending(it => it.Value))); var topicConverter = new TopicConverter(Program.TopicsWordsFileName); var graphEdges = topicMoves .Where(it => it.Value > scale) .Where(it => !topicExclusions.Contains(it.Key.Items[0]) && !topicExclusions.Contains(it.Key.Items[1])) .Select(it => new { seq = it.Key, chance = GetGivenProbability(it.Key)}) .Select(row => new Edge{SourceId = row.seq.Items[0], DestinationId = row.seq.Items[1], Weight = row.chance}) .ToList(); var topics = graphEdges.Aggregate(new HashSet<int>(), (acc, e) => { acc.Add(e.SourceId); acc.Add(e.DestinationId); return acc; }); var graphVertices = topics.Select(t => new Vertex { Id = t, Label = String.Join(", ", topicConverter.GetTopicWords(t, 10)), Weight = topicDistribution[t] }); var graph = new GraphBuilder(graphVertices, graphEdges); graph.ExportToGVFormat(fileToSave, "TopikWalkingGraph", isOriented:true); }
public static void TestGraphGeneration() { var converter = new TopicConverter(@"c:\Users\beloborodov\Documents\GibbsLDA\GibbsLDA++-0.2\CQA_LDA\model-final-100_topics-100_iters.twords"); converter.SaveTopicGraph(@"c:\Users\beloborodov\Documents\GibbsLDA\GibbsLDA++-0.2\CQA_LDA\model-00700.gv", 10); }
public static void TestConvertion() { var converter = new TopicConverter(@"c:\Users\beloborodov\Documents\GibbsLDA\GibbsLDA++-0.2\CQA_LDA\model-00500.twords"); converter.SaveTopicWordVectors(@"c:\Users\beloborodov\Documents\GibbsLDA\GibbsLDA++-0.2\CQA_LDA\model-00500.vectors"); }
private void PrintTopCorrelations(IDictionary<DateTime, double> cloudinessDistrib, int topCount) { var allTopicProbabilities = userStatistics.GetAllWeekRegionTopicProbabilities(Region); var topicConverter = new TopicConverter(Program.TopicsWordsFileName); var corrs = new List<Tuple<int, double>>(); for (var i = 0; i < Program.TopicsCount; i++) { corrs.Add(Tuple.Create(i, Utilits.GetTimeCorrelation( allTopicProbabilities.ToDictionary(p => p.Key, p => p.Value[i]), cloudinessDistrib))); } Console.WriteLine(String.Join("\n", corrs.OrderByDescending(cor => Math.Abs(cor.Item2)).Take(topCount).Select( cor => cor.Item1 + "\t" + String.Join(", ", topicConverter.GetTopicWords(cor.Item1, 4)) + "\t" + cor.Item2))); }