private ArrayList<ArrayList<QueryToken>> ConvertSearchToStreamingQuery(ArrayList<ArrayList<QueryToken>> disjuncts, TwitterProfiler twitterProfiler) { ArrayList<ArrayList<QueryToken>> finalQueryList = new ArrayList<ArrayList<QueryToken>>(); QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, Convert.ToDouble(ConfigurationManager.AppSettings["TwitterProfilerThreshold"])); if (disjuncts.Count > 0) { Console.WriteLine("\nError: Count of disjuncts should be zero, instead it is {0}!", disjuncts.Count()); System.Environment.Exit(1); } return finalQueryList; }
public void ConvertSearchToStreamingConfig(string filePathIn, string filePathOut) { List<QueryGroupCollection> queryGroupCollection = GetSearchQueries(filePathIn); TwitterProfiler twitterProfiler = new TwitterProfiler(); foreach (var collection in queryGroupCollection) foreach (var group in collection.queryGroups) { string combinedQuery = ""; foreach (var query in group.queries) combinedQuery += "(" + query.query + ") OR "; if (combinedQuery != "") { combinedQuery = Regex.Replace(combinedQuery, @" OR $", ""); ArrayList<ArrayList<QueryToken>> tokenizedSearchQueries = TokenizeQuery(combinedQuery); group.tokenizedStreamingQueries = ConvertSearchToStreamingQuery(tokenizedSearchQueries, twitterProfiler); QueryConverterUtils.ShortenStreamingQueries(group.tokenizedStreamingQueries, twitterProfiler); group.streamingQuery = TokenizedStreamingQueryToString(group.tokenizedStreamingQueries); } } ExportStreamingConfig(queryGroupCollection, filePathOut); }
public static void ShortenStreamingQueries(ArrayList<ArrayList<QueryToken>> tokenizedQueries, TwitterProfiler twitterProfiler) { // check that the length of each query satisfies twitter's streaming API requirements foreach (var item in tokenizedQueries) { int queryLength = item.Sum(x => x.ToString().Length); int spacesLength = (item.Count - 1); // (item.Count - 1) is the number of spaces in the eported string streaming query (see ConfigParser.TokenizedStreamingQueryToString()) int itemsRemoved = 0; while (queryLength + spacesLength > Convert.ToInt32(ConfigurationManager.AppSettings["TwitterStreamingAPIMaxQueryLength"])) { List<double> frequencies = new List<double>(); for (int i = 0; i < item.Count; ++i) { var itemCopy = new ArrayList<QueryToken>(item); itemCopy.RemoveAt(i); double freq = TwitterProfiler.ProfileFrequency(TwitterProfiler.QueryTokensToStringConverter(itemCopy)); if (Math.Round(freq) == Convert.ToInt32(TwitterProfiler.InvalidReturns.Exception)) { Console.WriteLine("\nShortenStreamingQueries: Error during twitter profiling: {0}!", TwitterProfiler.GetExceptionError()); System.Environment.Exit(1); } frequencies.Add(freq); } int minFreqIdx = frequencies.IndexOf(frequencies.Min()); frequencies.RemoveAt(minFreqIdx); item.RemoveAt(minFreqIdx); itemsRemoved++; queryLength = item.Sum(x => x.ToString().Length); spacesLength = (item.Count - 1); if (item.Count == 1 && queryLength > Convert.ToInt32(ConfigurationManager.AppSettings["TwitterStreamingAPIMaxQueryLength"])) { item.Clear(); itemsRemoved++; break; } } if (itemsRemoved > 0) Console.WriteLine("\nWARNING: {0} out of {1} tokens were removed from a query to reduce its length to the required size!\n", itemsRemoved, item.Count + itemsRemoved); } }