예제 #1
0
        private ArrayList<ArrayList<QueryToken>> ConvertSearchToStreamingQuery(ArrayList<ArrayList<QueryToken>> disjuncts, TwitterProfiler twitterProfiler)
        {             
            ArrayList<ArrayList<QueryToken>> finalQueryList = new ArrayList<ArrayList<QueryToken>>();
            QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, Convert.ToDouble(ConfigurationManager.AppSettings["TwitterProfilerThreshold"]));

            if (disjuncts.Count > 0)
            {
                Console.WriteLine("\nError: Count of disjuncts should be zero, instead it is {0}!", disjuncts.Count());
                System.Environment.Exit(1);
            }

            return finalQueryList;
        }
예제 #2
0
        public void ConvertSearchToStreamingConfig(string filePathIn, string filePathOut)
        {
            List<QueryGroupCollection> queryGroupCollection = GetSearchQueries(filePathIn);

            TwitterProfiler twitterProfiler = new TwitterProfiler();

            foreach (var collection in queryGroupCollection)
                foreach (var group in collection.queryGroups)
                {
                    string combinedQuery = "";
                    foreach (var query in group.queries)
                        combinedQuery += "(" + query.query + ") OR ";

                    if (combinedQuery != "")
                    {
                        combinedQuery = Regex.Replace(combinedQuery, @" OR $", "");

                        ArrayList<ArrayList<QueryToken>> tokenizedSearchQueries = TokenizeQuery(combinedQuery);
                        group.tokenizedStreamingQueries = ConvertSearchToStreamingQuery(tokenizedSearchQueries, twitterProfiler);
                        QueryConverterUtils.ShortenStreamingQueries(group.tokenizedStreamingQueries, twitterProfiler);
                        group.streamingQuery = TokenizedStreamingQueryToString(group.tokenizedStreamingQueries);
                    }
                }

            ExportStreamingConfig(queryGroupCollection, filePathOut);
        }
        public static void ShortenStreamingQueries(ArrayList<ArrayList<QueryToken>> tokenizedQueries, TwitterProfiler twitterProfiler)
        {
            // check that the length of each query satisfies twitter's streaming API requirements
            foreach (var item in tokenizedQueries)
            {
                int queryLength = item.Sum(x => x.ToString().Length);
                int spacesLength = (item.Count - 1); // (item.Count - 1) is the number of spaces in the eported string streaming query (see ConfigParser.TokenizedStreamingQueryToString())

                int itemsRemoved = 0;
                while (queryLength + spacesLength > Convert.ToInt32(ConfigurationManager.AppSettings["TwitterStreamingAPIMaxQueryLength"]))
                {
                    List<double> frequencies = new List<double>();
                    for (int i = 0; i < item.Count; ++i)
                    {
                        var itemCopy = new ArrayList<QueryToken>(item);
                        itemCopy.RemoveAt(i);
                        double freq = TwitterProfiler.ProfileFrequency(TwitterProfiler.QueryTokensToStringConverter(itemCopy));
                        if (Math.Round(freq) == Convert.ToInt32(TwitterProfiler.InvalidReturns.Exception))
                        {
                            Console.WriteLine("\nShortenStreamingQueries: Error during twitter profiling: {0}!", TwitterProfiler.GetExceptionError());
                            System.Environment.Exit(1);
                        }
                        frequencies.Add(freq);
                    }

                    int minFreqIdx = frequencies.IndexOf(frequencies.Min());

                    frequencies.RemoveAt(minFreqIdx);
                    item.RemoveAt(minFreqIdx);
                    itemsRemoved++;

                    queryLength = item.Sum(x => x.ToString().Length);
                    spacesLength = (item.Count - 1);

                    if (item.Count == 1 && queryLength > Convert.ToInt32(ConfigurationManager.AppSettings["TwitterStreamingAPIMaxQueryLength"]))
                    {
                        item.Clear();
                        itemsRemoved++;
                        break;
                    }
                }

                if (itemsRemoved > 0)
                    Console.WriteLine("\nWARNING: {0} out of {1} tokens were removed from a query to reduce its length to the required size!\n", itemsRemoved, item.Count + itemsRemoved);
            }
        }