예제 #1
0
        private void ProcessFile(S3Object file)
        {
            Trace.WriteLine(string.Format("Processing item {0}", file.Key));

            var lines = ReadS3File(file);

            var trimmedDate = file.Key.Substring(DATE_EXTRACTION_PREFIX.Length, 11);
            var date        = DateTime.ParseExact(trimmedDate, FORMAT, PROVIDER);

            var fileMod   = file.GetHashCode() % ComputeNode.GlobalBucketCount;
            var buckets   = ComputeNode.Catalogs.Values.Cast <ICatalog>().Where(c => c.CatalogName == CATALOG).First().Buckets;
            var bucketMod = buckets.First(b => b.Value.BucketMod == fileMod).Value;

            Trace.WriteLine(string.Format("Adding data items from {0}", file.Key));
            lines.AsParallel().ForAll(line =>
            {
                var items = line.Split(' ');
                Debug.Assert(items.Length == 4);

                var projectCode = HttpUtility.UrlDecode(items[0]);
                var pageName    = HttpUtility.UrlDecode(items[1]);
                var pageViews   = int.Parse(items[2]);
                var pageSizeKB  = long.Parse(items[3]);

                var wikiStat = new WikipediaHourlyPageStats(date, projectCode, pageName, pageViews, pageSizeKB);
                bucketMod.BucketDataTables[TABLE].AddItem(wikiStat);
            });

            Trace.WriteLine(string.Format("Added data items from {0}", file.Key));
        }
예제 #2
0
        private void ProcessFile(S3Object file)
        {
            Trace.WriteLine(string.Format("Processing item {0}", file.Key));

            var lines = ReadS3File(file);

            var trimmedDate = file.Key.Substring(DATE_EXTRACTION_PREFIX.Length, 11);
            var date = DateTime.ParseExact(trimmedDate, FORMAT, PROVIDER);

            var fileMod = file.GetHashCode() % ComputeNode.GlobalBucketCount;
            var buckets = ComputeNode.Catalogs.Values.Cast<ICatalog>().Where(c => c.CatalogName == CATALOG).First().Buckets;
            var bucketMod = buckets.First(b => b.Value.BucketMod == fileMod).Value;

            Trace.WriteLine(string.Format("Adding data items from {0}", file.Key));
            lines.AsParallel().ForAll(line =>
                {
                    var items = line.Split(' ');
                    Debug.Assert(items.Length == 4);

                    var projectCode = HttpUtility.UrlDecode(items[0]);
                    var pageName = HttpUtility.UrlDecode(items[1]);
                    var pageViews = int.Parse(items[2]);
                    var pageSizeKB = long.Parse(items[3]);

                    var wikiStat = new WikipediaHourlyPageStats(date, projectCode, pageName, pageViews, pageSizeKB);
                    bucketMod.BucketDataTables[TABLE].AddItem(wikiStat);
                });

            Trace.WriteLine(string.Format("Added data items from {0}", file.Key));
        }