// Construye bloques de último momento (sin detalle, quick), o pequeños, basados en tweets (i.e. no en otros bloques menores). public DbTimeBlock BuildTimeBlockFromTweets(bool quick, TimeSpan length) { IEnumerable <DbTweet> globalToProc = GetTweetsInTimeInterval(DateTime.Now.Add(-length), DateTime.Now, null); AnalysisResults globalAR = null; if (quick) { globalAR = TA.AnalyzeTweetSet(globalToProc, true); } else { globalAR = TA.AnalyzeTweetSet(globalToProc, false); } Dictionary <DbTopic, AnalysisResults> allTopicsAR = new Dictionary <DbTopic, AnalysisResults>(); foreach (DbTopic t in db.Query <DbTopic>()) { IEnumerable <DbTweet> topicToProc = GetTweetsInTimeInterval(DateTime.Now.Add(-length), DateTime.Now, t); AnalysisResults topicAR = null; if (quick) { topicAR = TA.AnalyzeTweetSet(topicToProc, true); } else { topicAR = TA.AnalyzeTweetSet(topicToProc, false); } allTopicsAR.Add(t, topicAR); } DbTimeBlock toRet = new DbTimeBlock(DateTime.Now.Add(-length), length, globalAR, allTopicsAR); db.Store(toRet); Console.Out.WriteLine("\n\n[ CONSTRUIDO TIME BLOCK, quick: " + quick + "+ ]\n"); return(toRet); }
// Construye bloques de último momento (sin detalle, quick), o pequeños, basados en tweets (i.e. no en otros bloques menores). public DbTimeBlock BuildTimeBlockFromTweets(bool quick, TimeSpan length) { IEnumerable<DbTweet> globalToProc = GetTweetsInTimeInterval(DateTime.Now.Add(-length), DateTime.Now, null); AnalysisResults globalAR = null; if(quick) globalAR = TA.AnalyzeTweetSet(globalToProc, true); else globalAR = TA.AnalyzeTweetSet(globalToProc, false); Dictionary<DbTopic, AnalysisResults> allTopicsAR = new Dictionary<DbTopic, AnalysisResults>(); foreach (DbTopic t in db.Query<DbTopic>()) { IEnumerable<DbTweet> topicToProc = GetTweetsInTimeInterval(DateTime.Now.Add(-length), DateTime.Now, t); AnalysisResults topicAR = null; if(quick) topicAR = TA.AnalyzeTweetSet(topicToProc, true); else topicAR = TA.AnalyzeTweetSet(topicToProc, false); allTopicsAR.Add(t, topicAR); } DbTimeBlock toRet = new DbTimeBlock(DateTime.Now.Add(-length), length, globalAR, allTopicsAR); db.Store(toRet); Console.Out.WriteLine("\n\n[ CONSTRUIDO TIME BLOCK, quick: "+quick+"+ ]\n"); return toRet; }
// Construye bloques medianos o grandes, basados en bloques chicos o medianos respectivamente. public void BuildTimeBlockFromBlocks(TimeSpan length) { TimeSpan childBlocksLength = TimeSpan.FromSeconds(0); if (length == TS_medium) childBlocksLength = TS_short; else if (length == TS_long) childBlocksLength = TS_medium; IEnumerable<DbTimeBlock> ch = from DbTimeBlock t in db where t.Length == childBlocksLength && t.Used == false orderby t.Start descending select t; int cant = ch.Count(); AnalysisResults GlobalAR = new AnalysisResults(); Dictionary<DbTopic, AnalysisResults> TopicAR = new Dictionary<DbTopic, AnalysisResults>(); foreach (DbTopic t in db.Query<DbTopic>()) TopicAR.Add(t, new AnalysisResults()); if (cant > this.TimeBlockConsolidationThreshold) { foreach (DbTimeBlock cht in ch) { GlobalAR.Popularity += cht.GlobalAR.Popularity; GlobalAR.PosVal += cht.GlobalAR.PosVal; GlobalAR.NegVal += cht.GlobalAR.NegVal; GlobalAR.Ambiguity += cht.GlobalAR.Ambiguity; // Para poder comparar las palabras importantes de un subbloque con otro, ya que cada uno fue calculado con tfidf distintos. double normalizationFactor = 0; foreach(KeyValuePair<string,double> kv in cht.GlobalAR.relevantList) normalizationFactor += kv.Value; foreach(KeyValuePair<string,double> kv in cht.GlobalAR.relevantList) if(GlobalAR.RelevantTerms.ContainsKey(kv.Key)) GlobalAR.RelevantTerms[kv.Key] += kv.Value/normalizationFactor; else GlobalAR.RelevantTerms[kv.Key] = kv.Value/normalizationFactor; // ídem a lo anterior, para cada topic foreach (KeyValuePair<DbTopic, AnalysisResults> tar in cht.TopicAR) { TopicAR[tar.Key].Popularity += tar.Value.Popularity; TopicAR[tar.Key].PosVal += tar.Value.PosVal; TopicAR[tar.Key].NegVal += tar.Value.NegVal; TopicAR[tar.Key].Ambiguity += tar.Value.Ambiguity; normalizationFactor = 0; foreach(KeyValuePair<string,double> kv in cht.TopicAR[tar.Key].relevantList) normalizationFactor += kv.Value; foreach(KeyValuePair<string,double> kv in cht.TopicAR[tar.Key].relevantList) if(TopicAR[tar.Key].RelevantTerms.ContainsKey(kv.Key)) TopicAR[tar.Key].RelevantTerms[kv.Key] += kv.Value/normalizationFactor; else TopicAR[tar.Key].RelevantTerms[kv.Key] = kv.Value / normalizationFactor; } cht.Used = true; db.Store(cht); } GlobalAR.Ambiguity /= cant; GlobalAR.DictionaryToList(); foreach (DbTopic t in db.Query<DbTopic>()) { TopicAR[t].Ambiguity /= cant; TopicAR[t].DictionaryToList(); } DbTimeBlock toAdd = new DbTimeBlock(DateTime.Now.Add(-length), length, GlobalAR, TopicAR); db.Store(toAdd); Console.Out.WriteLine("\n\n[ CONSTRUIDO TIME BLOCK (desde otros bloques), tamaño: " + length +"+ ]\n"); } }
// Construye bloques medianos o grandes, basados en bloques chicos o medianos respectivamente. public void BuildTimeBlockFromBlocks(TimeSpan length) { TimeSpan childBlocksLength = TimeSpan.FromSeconds(0); if (length == TS_medium) { childBlocksLength = TS_short; } else if (length == TS_long) { childBlocksLength = TS_medium; } IEnumerable <DbTimeBlock> ch = from DbTimeBlock t in db where t.Length == childBlocksLength && t.Used == false orderby t.Start descending select t; int cant = ch.Count(); AnalysisResults GlobalAR = new AnalysisResults(); Dictionary <DbTopic, AnalysisResults> TopicAR = new Dictionary <DbTopic, AnalysisResults>(); foreach (DbTopic t in db.Query <DbTopic>()) { TopicAR.Add(t, new AnalysisResults()); } if (cant > this.TimeBlockConsolidationThreshold) { foreach (DbTimeBlock cht in ch) { GlobalAR.Popularity += cht.GlobalAR.Popularity; GlobalAR.PosVal += cht.GlobalAR.PosVal; GlobalAR.NegVal += cht.GlobalAR.NegVal; GlobalAR.Ambiguity += cht.GlobalAR.Ambiguity; // Para poder comparar las palabras importantes de un subbloque con otro, ya que cada uno fue calculado con tfidf distintos. double normalizationFactor = 0; foreach (KeyValuePair <string, double> kv in cht.GlobalAR.relevantList) { normalizationFactor += kv.Value; } foreach (KeyValuePair <string, double> kv in cht.GlobalAR.relevantList) { if (GlobalAR.RelevantTerms.ContainsKey(kv.Key)) { GlobalAR.RelevantTerms[kv.Key] += kv.Value / normalizationFactor; } else { GlobalAR.RelevantTerms[kv.Key] = kv.Value / normalizationFactor; } } // ídem a lo anterior, para cada topic foreach (KeyValuePair <DbTopic, AnalysisResults> tar in cht.TopicAR) { TopicAR[tar.Key].Popularity += tar.Value.Popularity; TopicAR[tar.Key].PosVal += tar.Value.PosVal; TopicAR[tar.Key].NegVal += tar.Value.NegVal; TopicAR[tar.Key].Ambiguity += tar.Value.Ambiguity; normalizationFactor = 0; foreach (KeyValuePair <string, double> kv in cht.TopicAR[tar.Key].relevantList) { normalizationFactor += kv.Value; } foreach (KeyValuePair <string, double> kv in cht.TopicAR[tar.Key].relevantList) { if (TopicAR[tar.Key].RelevantTerms.ContainsKey(kv.Key)) { TopicAR[tar.Key].RelevantTerms[kv.Key] += kv.Value / normalizationFactor; } else { TopicAR[tar.Key].RelevantTerms[kv.Key] = kv.Value / normalizationFactor; } } } cht.Used = true; db.Store(cht); } GlobalAR.Ambiguity /= cant; GlobalAR.DictionaryToList(); foreach (DbTopic t in db.Query <DbTopic>()) { TopicAR[t].Ambiguity /= cant; TopicAR[t].DictionaryToList(); } DbTimeBlock toAdd = new DbTimeBlock(DateTime.Now.Add(-length), length, GlobalAR, TopicAR); db.Store(toAdd); Console.Out.WriteLine("\n\n[ CONSTRUIDO TIME BLOCK (desde otros bloques), tamaño: " + length + "+ ]\n"); } }