Example #1
0
        public override void Run(string[] args)
        {
            var filenames = string.Empty;
            var options = new OptionSet() { { "f=|files=", "Merges the specified split files.", f => filenames = f } };
            options.Parse(args);
            if (string.IsNullOrWhiteSpace(filenames))
            {
                throw new OptionSetException(options);
            }

            var splitFiles = new HashSet<string>(Directory.EnumerateFiles(@".\", filenames));
            var workFiles = new List<string>(splitFiles.Take(splitFiles.Count - Take >= Take ? Take : splitFiles.Count));
            splitFiles.RemoveWhere(s => workFiles.Contains(s));
            int splitId = 0;
            bool firstRun = true;
            var createdFiles = new HashSet<string>();

            while (workFiles.Count > 1 || firstRun)
            {
                int written = 0;
                var multiQueue = new MultiQueue(workFiles);
                using (var stream = Utils.FindFirstSplitFile("merge", ref splitId))
                {
                    createdFiles.Add(stream.Name);
                    Console.WriteLine("Merging {0} into {1}", string.Join(",", from f in workFiles
                                                                               select Path.GetFileName(f)),
                                                                               Path.GetFileName(stream.Name));
                    foreach (var item in multiQueue.Merge())
                    {
                        stream.Write(item.ToGuid().ToByteArray(), 0, 16);
                        if (++written % 1000000 == 0)
                        {
                            Console.WriteLine("Written {0} lines", written);
                        }
                    }
                }

                Console.WriteLine("Written {0} lines", written);
                Console.WriteLine("Excluded {0} duplicates.", multiQueue.Duplicates);
                if (!firstRun)
                {
                    workFiles.ForEach(f => File.Delete(f));
                }

                workFiles = new List<string>(splitFiles.Take(splitFiles.Count - Take >= Take ? Take : splitFiles.Count));
                splitFiles.RemoveWhere(s => workFiles.Contains(s));
                if (workFiles.Count < Take)
                {
                    splitFiles = createdFiles;
                    createdFiles = new HashSet<string>();
                    workFiles = new List<string>(splitFiles.Take(Take));
                    splitFiles.RemoveWhere(s => workFiles.Contains(s));
                    firstRun = false;
                }
            }
        }
Example #2
0
        protected async Task FetchUrlInfos(Status[] ses)
        {
            if (ses == null || ses.Length == 0)
                return;
            var mem = MemoryCache.Default;

            var urls = new HashSet<string>();
            foreach (var s in ses)
            {
                var us = Utils.ExtractUrlFromWeibo(s.text);
                foreach(var url in us)
                {
                    if (mem.Get("http://t.cn/" + url) == null)
                        urls.Add(url);
                }
                //urls.Add(us);
                if (s.retweeted_status != null)
                {
                    var rus = Utils.ExtractUrlFromWeibo(s.retweeted_status.text);
                    foreach (var url in rus)
                    {
                        if (mem.Get("http://t.cn/" + url) == null)
                            urls.Add(url);
                    }
                }
            }
            if(urls.Count >= 20)
            {
                var tasks = new Task[2];
                var u1 = urls.Take(20);
                tasks[0] = FetchUrlInfosImp(u1);
                var u2 = urls.Skip(20);
                tasks[1] = FetchUrlInfosImp(u2);
                await Task.WhenAll(tasks);
            }else
            {
                await FetchUrlInfosImp(urls);
            }

        }