Esempio n. 1
0
        private static void statistics(PinAuthorizer tw)
        {
            using (knowledgeObjects DB = new knowledgeObjects())
            {

                List<learningBase> tweetsCollection = new List<learningBase>();

                tweetsCollection = (List<learningBase>)(from tweets in DB.learningBase
                                                        orderby tweets.id
                                                        select tweets).ToList();

                Stopwatch stopwatch = new Stopwatch();
                TweetInformation ti = new TweetInformation();

                TimeSpan timespan = new TimeSpan();
                TimeSpan actualTime = new TimeSpan();

                TweetLoc tl = new TweetLoc(10);

                int i = 0;

                foreach (var item in tweetsCollection)
                {

                    i++;
                    stopwatch.Start();
                    ti = new TweetInformation();
                    ti.userlocation = item.userlocation;
                    ti.timezone = item.timezone;
                    ti.longitude = item.lon;
                    ti.latitude = item.lat;
                    ti.baseDataId = item.id;
                    tl.getGeographyStatistics(ti);
                    stopwatch.Stop();
                    actualTime += stopwatch.Elapsed;
                    timespan += stopwatch.Elapsed;
                    stopwatch.Reset();
                    if (i % 10 == 0)
                    {
                        string tweetTXT = i + " T " + new RoundedTimeSpan(timespan.Ticks, 2) + " avg " + new RoundedTimeSpan(timespan.Ticks / i, 2) + " avg5k " + new RoundedTimeSpan(actualTime.Ticks / 1000, 2);
                        System.Console.WriteLine(tweetTXT);
                        //statusUpdate("@pide2001 " + tweetTXT, tw);
                        actualTime = TimeSpan.Zero;
                    }
                }

                System.Console.WriteLine("Median " + tl.statistics.getMedianOfDistances());
                System.Console.WriteLine("Average " + tl.statistics.getAverageDistance());
                System.Console.WriteLine("Biggest " + tl.statistics.getBiggestDistance());
                System.Console.WriteLine("Smallest " + tl.statistics.getSmallestDistance());
                Tuple<GeographyData, TweetKnowledgeObj> know = tl.statistics.getBiggestDistanceAndInformation();
                System.Console.WriteLine("Biggest distance between " + know.Item1.geonamesId + " and " + know.Item2.baseDataId);

                System.Console.WriteLine("Press any key to quit !");
                System.Console.ReadLine();

            }
        }
Esempio n. 2
0
        private static void analysis(PinAuthorizer tw)
        {
            using (knowledgeObjects DB = new knowledgeObjects())
            {
                //DB.Database.Log = s => System.Diagnostics.Debug.WriteLine(s);
                DB.Configuration.AutoDetectChangesEnabled = false;
                DB.Configuration.ValidateOnSaveEnabled = false;

                //get tweetRandomSample to iterate over tweets and analyse the results
                var tweetRandomSample = (from trs
                            in DB.tweetRandomSample2
                                         select trs).ToList();

                Stopwatch stopwatch = new Stopwatch();
                int counter = 0;
                //iterate over tweets
                foreach (var tweet in tweetRandomSample)
                {
                    counter++;
                    //new List to hold Lists sorted by NGramOrder
                    List<List<knowledgeBaseGeocoding>> ListsByOrder = new List<List<knowledgeBaseGeocoding>>();

                    //getting knowledgeBaseGeocoding entries per tweet
                    List<knowledgeBaseGeocoding> knowledgeBaseGeocodingList = tweet.knowledgeBaseGeocoding.ToList();

                    if (knowledgeBaseGeocodingList.Select(g => g.knowledgeBaseId).FirstOrDefault()!= null)
                    {
                        //get maximumNGram
                        var max = knowledgeBaseGeocodingList.Max(g => g.NGramOrder);
                        List<knowledgeBaseGeocoding> temporaryList = new List<knowledgeBaseGeocoding>();
                        //Add lists from max NGramOrder down to 1
                        var knowledgeBaseGeocoding = (from kbg
                            in DB.knowledgeBaseGeocoding where kbg.tweetRandomSampleId == tweet.id
                                                 select kbg).Include("knowledgeBase").ToList();

                        for (int i = 1; i <= max; i++)
                        {

                            if (knowledgeBaseGeocoding.Where(g => g.NGramOrder.Value == i).ToList().Count > 0)
                            {
                                ListsByOrder.Add(knowledgeBaseGeocoding.Where(g => g.NGramOrder.Value == i).ToList());                            }

                        }

                        //iterate over lists
                        foreach (var liste in ListsByOrder)
                        {
                                var order = liste.First().NGramOrder;
                                var tweetId = liste.First().tweetRandomSampleId;

                                //sql function to make it even faster
                                int sum = (int)DB.getSumOfNGramCounts(tweetId, order).FirstOrDefault();

                                //cityLevel
                                Tuple<int, float> idMaxPercentageCity = Tuple.Create(0, (float)0.0);
                                float currentPercentage = 0;
                                int ngramcoCity = 0;
                                foreach (var knowledgeBaseGeocodingEntry in liste)
                                {
                                    int ngramco = (int)knowledgeBaseGeocodingEntry.NgramCount;
                                    currentPercentage = ((float)ngramco / (float)sum);
                                    if (idMaxPercentageCity.Item2 < currentPercentage)
                                    {
                                        idMaxPercentageCity = Tuple.Create(knowledgeBaseGeocodingEntry.id, currentPercentage);
                                        ngramcoCity = ngramco;
                                    }
                                }

                                int geonamesidCity = liste.Where(g => g.id == idMaxPercentageCity.Item1).Select(g => g.KnowledgeBase.GeoNamesId).FirstOrDefault();
                                int knowledgeBaseIdCity = liste.Where(g => g.id == idMaxPercentageCity.Item1).Select(g => g.KnowledgeBase.Id).FirstOrDefault(); ;
                                double? percentageMaxCity = idMaxPercentageCity.Item2;
                                int countCity = (int)liste.Where(g => g.id == idMaxPercentageCity.Item1).Select(g => g.KnowledgeBase.NGramCount).FirstOrDefault();
                                //System.Console.WriteLine("geonId {0} knowledgeBaseid {1} percentageMax {2} count {3}" ,geonamesid,knowledgeBaseId,percentageMax,count);

                                //admin2
                                var result = liste.GroupBy(o => o.KnowledgeBase.Admin2Id)
                                            .Select(g => new { admin2Id = g.Key, total = g.Sum(i => i.NgramCount) });

                                Tuple<int, float> idMaxPercentageAdmin2 = Tuple.Create(0, (float)0.0);
                                int lossAdmin2 = 0;
                                int ngramcoAdmin2 = 0;
                                foreach (var entryadmin2 in result)
                                {
                                    int ngramco = (int)entryadmin2.total;
                                    currentPercentage = ((float)ngramco / (float)sum);
                                    //System.Console.WriteLine(currentPercentage + " " + entryadmin2.admin2Id);
                                    if (idMaxPercentageAdmin2.Item2 < currentPercentage)
                                    {
                                        if (entryadmin2.admin2Id != null)
                                        {
                                            idMaxPercentageAdmin2 = Tuple.Create((int)entryadmin2.admin2Id, currentPercentage);
                                            ngramcoAdmin2 = ngramco;
                                        }
                                        else
                                        {
                                            lossAdmin2 = (int)entryadmin2.total;

                                        }
                                    }
                                }
                                //System.Console.WriteLine(idMaxPercentageAdmin2);
                                //System.Console.WriteLine("loss adm2 " + lossAdmin2);

                                //admin1
                                var resultAdmin1 = liste.GroupBy(o => o.KnowledgeBase.Admin1Id)
                                            .Select(g => new { admin1Id = g.Key, total = g.Sum(i => i.NgramCount) });

                                Tuple<int, float> idMaxPercentageAdmin1 = Tuple.Create(0, (float)0.0);
                                int lossAdmin1 = 0;
                                int ngramcoAdmin1 = 0;
                                foreach (var entryadmin1 in resultAdmin1)
                                {
                                    int ngramco = (int)entryadmin1.total;
                                    currentPercentage = ((float)ngramco / (float)sum);
                                    //.Console.WriteLine(currentPercentage + " " + entryadmin1.admin1Id);
                                    if (idMaxPercentageAdmin1.Item2 < currentPercentage)
                                    {
                                        if (entryadmin1.admin1Id != null)
                                        {
                                            idMaxPercentageAdmin1 = Tuple.Create((int)entryadmin1.admin1Id, currentPercentage);
                                            ngramcoAdmin1 = ngramco;
                                        }
                                        else
                                        {
                                            lossAdmin1 = (int)entryadmin1.total;

                                        }
                                    }
                                }
                                //.Console.WriteLine(idMaxPercentageAdmin1);

                                //System.Console.WriteLine("loss adm1 " + lossAdmin1);

                                //country
                                var resultCountry = liste.GroupBy(o => o.KnowledgeBase.CountryId)
                                            .Select(g => new { countryId = g.Key, total = g.Sum(i => i.NgramCount) });

                                Tuple<int, float> idMaxPercentageCountry = Tuple.Create(0, (float)0.0);
                                int ngramcoCountry = 0;
                                foreach (var entryCountry in resultCountry)
                                {
                                    int ngramco = (int)entryCountry.total;
                                    currentPercentage = ((float)ngramco / (float)sum);
                                    //System.Console.WriteLine(currentPercentage + " " + entryCountry.countryId);
                                    if (idMaxPercentageCountry.Item2 < currentPercentage)
                                    {

                                        idMaxPercentageCountry = Tuple.Create((int)entryCountry.countryId, currentPercentage);
                                        ngramcoCountry = ngramco;
                                    }
                                }
                                //System.Console.WriteLine(idMaxPercentageCountry);

                                DB.resultsKnowledgeBaseGeocoding.Add(new resultsKnowledgeBaseGeocoding
                                {
                                    nGramOrder = order,
                                    overallCount = sum,
                                    tweetRandomSampleId = tweetId,
                                    city_geonamesId = geonamesidCity,
                                    city_knowledgeBaseId = knowledgeBaseIdCity,
                                    city_percentage = percentageMaxCity,
                                    city_count = ngramcoCity,
                                    adm2_geonamesId = idMaxPercentageAdmin2.Item1,
                                    adm2_percentage = idMaxPercentageAdmin2.Item2,
                                    adm2_loss = lossAdmin2,
                                    adm2_count = ngramcoAdmin2,
                                    adm1_geonamesId = idMaxPercentageAdmin1.Item1,
                                    adm1_percentage = idMaxPercentageAdmin1.Item2,
                                    adm1_loss = lossAdmin1,
                                    adm1_count = ngramcoAdmin1,
                                    country_geonamesId = idMaxPercentageCountry.Item1,
                                    country_percentage = idMaxPercentageCountry.Item2,
                                    country_count = ngramcoCountry
                                });
                                DB.SaveChanges();

                        }
                    }
                    if (counter % 500 == 0) {
                        System.Console.WriteLine("tweets so far: " + counter);
                        statusUpdate("@pide2001 " + counter, tw);
                    }

                   }

            }

            System.Console.WriteLine("Press any key to quit !");
            System.Console.ReadLine();
        }
Esempio n. 3
0
        private static void locate(PinAuthorizer tw)
        {
            System.Console.WriteLine("Please Type in how many DataLines should be taken (0 for all): ");
            int takeUserInput = Convert.ToInt32(Console.ReadLine());
            System.Console.WriteLine("Please Type in how many Datalines should be skipped (0 for none): ");
            int skipUserInput = Convert.ToInt32(Console.ReadLine());
            System.Console.WriteLine("Please type in the Information intervall: ");
            int informationIntervall = Convert.ToInt32(Console.ReadLine());
            System.Console.WriteLine("Do you want to retrieve Tweets about the progress? (0 no 1 yes)");
            int tweetInformation = Convert.ToInt32(Console.ReadLine());

            using (knowledgeObjects DB = new knowledgeObjects())
            {
                DB.Configuration.AutoDetectChangesEnabled = false;
                DB.Configuration.ValidateOnSaveEnabled = false;

                List<tweetRandomSample2> tweetsCollection = new List<tweetRandomSample2>();

                if (takeUserInput == 0)
                {

                    tweetsCollection = (List<tweetRandomSample2>)(from tweets in DB.tweetRandomSample2
                                                            orderby tweets.id
                                                            select tweets).Skip(skipUserInput).ToList();
                }
                else
                {
                    tweetsCollection = (List<tweetRandomSample2>)(from tweets in DB.tweetRandomSample2
                                                            orderby tweets.id
                                                            select tweets).Take(takeUserInput).Skip(skipUserInput).ToList();
                }

                Stopwatch stopwatch = new Stopwatch();
                TweetInformation ti = new TweetInformation();

                TimeSpan timespan = new TimeSpan();
                TimeSpan actualTime = new TimeSpan();

                TweetLoc tl = new TweetLoc(0);

                int i = 0;
                GeoNames knowledgeResult = new GeoNames();
                GeoNames tweetCountry = new GeoNames();
                foreach (var item in tweetsCollection)
                {
                    using (GeonamesDataEntities geonamesDB = new GeonamesDataEntities()) {
                        tweetCountry = (from geonames in geonamesDB.GeoNames
                                        where geonames.geonameid == item.geoNames_geoNamesId
                                        select geonames).ToList().First();
                    }

                    i++;
                    stopwatch.Start();
                    ti = new TweetInformation();
                    ti.userlocation = item.userlocation;
                    ti.timezone = item.timezone;
                    ti.longitude = item.lon;
                    ti.latitude = item.lat;
                    ti.baseDataId = item.id;
                    ti.coord = item.coord;
                    ti.randomSampleId = item.id;
                    tl.saveLocateResults(ti);
                    stopwatch.Stop();
                    actualTime += stopwatch.Elapsed;
                    timespan += stopwatch.Elapsed;
                    stopwatch.Reset();
                    if (i % informationIntervall == 0)
                    {
                        string tweetTXT = i + " T " + new RoundedTimeSpan(timespan.Ticks, 2) + " avg " + new RoundedTimeSpan(timespan.Ticks / i, 2) + " avg5k " + new RoundedTimeSpan(actualTime.Ticks / informationIntervall, 2);
                        System.Console.WriteLine(tweetTXT);
                        if (tweetInformation == 1)
                        {
                            statusUpdate("@pide2001 " + tweetTXT, tw);
                            }
                        actualTime = TimeSpan.Zero;
                    }
                }

                System.Console.WriteLine("Press any key to quit !");
                System.Console.ReadLine();

            }
        }
Esempio n. 4
0
        private static void learning(PinAuthorizer tw)
        {
            System.Console.WriteLine("Please Type in how many LearningData should be taken (0 for all): ");
            int takeUserInput = Convert.ToInt32(Console.ReadLine());
            System.Console.WriteLine("Please Type in how many Datalines should be skipped (0 for none): ");
            int skipUserInput = Convert.ToInt32(Console.ReadLine());
            System.Console.WriteLine("Please choose the amount of Data which should be saved to the Database in one step: ");
            int bulkInsertSizeUserInput = Convert.ToInt32(Console.ReadLine());
            System.Console.WriteLine("Please type in the Information intervall: ");
            int informationIntervall = Convert.ToInt32(Console.ReadLine());
            System.Console.WriteLine("Do you want to retrieve Tweets about the progress? (0 no 1 yes)");
            int tweetInformation = Convert.ToInt32(Console.ReadLine());

            using (knowledgeObjects DB = new knowledgeObjects())
            {

                List<learningBase> tweetsCollection = new List<learningBase>();

                if (takeUserInput == 0)
                {

                    tweetsCollection = (List<learningBase>)(from tweets in DB.learningBase
                                                            orderby tweets.id
                                                            select tweets).Skip(skipUserInput).ToList();
                }
                else
                {
                    tweetsCollection = (List<learningBase>)(from tweets in DB.learningBase
                                                            orderby tweets.id
                                                            select tweets).Take(takeUserInput).Skip(skipUserInput).ToList();
                }

                Stopwatch stopwatch = new Stopwatch();
                TweetInformation ti = new TweetInformation();

                TimeSpan timespan = new TimeSpan();
                TimeSpan actualTime = new TimeSpan();
                int bulkinsertSize = bulkInsertSizeUserInput;

                TweetLoc tl = new TweetLoc(bulkinsertSize);
                int i = 0;

                foreach (var item in tweetsCollection)
                {
                    if (i % bulkinsertSize == 0)
                    {
                        tl = new TweetLoc(bulkinsertSize);
                    }
                    i++;
                    stopwatch.Start();
                    ti = new TweetInformation();
                    ti.userlocation = item.userlocation;
                    ti.timezone = item.timezone;
                    ti.longitude = item.lon;
                    ti.latitude = item.lat;
                    ti.baseDataId = item.id;
                    tl.learn(ti);
                    stopwatch.Stop();
                    actualTime += stopwatch.Elapsed;
                    timespan += stopwatch.Elapsed;
                    stopwatch.Reset();

                    if (i % informationIntervall == 0)
                    {
                        string tweetTXT = i + " T " + new RoundedTimeSpan(timespan.Ticks, 2) + " avg " + new RoundedTimeSpan(timespan.Ticks / i, 2) + " avg5k " + new RoundedTimeSpan(actualTime.Ticks / informationIntervall, 2);
                        System.Console.WriteLine(tweetTXT);
                        if (tweetInformation == 1)
                        {
                            statusUpdate("@pide2001 " + tweetTXT, tw);
                        }
                        actualTime = TimeSpan.Zero;
                    }
                }
                System.Console.WriteLine("Press any key to quit !");
                System.Console.ReadLine();

            }
        }