Example #1
0
        static void Main(string[] args)
        {
            var filePath = args.First();
            Program.ResourceLock = new SemaphoreSlim(15);

            var elasticSettings = new ConnectionSettings("127.0.0.1.", 9200)
                                        .SetDefaultIndex("mpdreamz")
                                        .SetMaximumAsyncConnections(50);
            var client = new ElasticClient(elasticSettings);
            ConnectionStatus connectionStatus;
            if (!client.TryConnect(out connectionStatus))
            {
                Console.Error.WriteLine("Could not connect to {0}:\r\n{1}",
                    elasticSettings.Host, connectionStatus.Error.OriginalException.Message);
                Console.Read();
                return;
            }

            var reader = new XmlTextReader(filePath);
            Post post = new Post();
            PostMetaData meta = new PostMetaData();

            int processed = 0, dropped = 0;
            Stopwatch sw = new Stopwatch();
            sw.Start();
            var postQueue = new List<Post>();
            try
            {
                while (reader.Read())
                {
                    var name = reader.Name;

                    if (reader.NodeType == XmlNodeType.Element)
                    {
                        if (name == "HackerNews")
                            continue;

                        if (name == "ID")
                            post.Id = reader.ReadElementContentAsInt();

                        else if (name == "ParentID")
                            post.ParentId = reader.ReadElementContentAsInt();
                        else if (name == "Url")
                            post.Url = reader.ReadElementContentAsString();
                        else if (name == "Title")
                            post.Title = reader.ReadElementContentAsString();
                        else if (name == "Text")
                            post.Text = reader.ReadElementContentAsString();
                        else if (name == "Username")
                            meta.Username = reader.ReadElementContentAsString();
                        else if (name == "Points")
                            meta.Points = reader.ReadElementContentAsInt();
                        else if (name == "Type")
                            meta.Type = reader.ReadElementContentAsInt();
                        else if (name == "Timestamp")
                            meta.Created = reader.ReadElementContentAsDateTime();
                        else if (name == "CommentCount")
                            meta.CommentsCount = reader.ReadElementContentAsInt();
                    }

                    if (reader.NodeType == XmlNodeType.EndElement
                        && name == "row")
                    {
                        post.Meta = meta;

                        postQueue.Add(post);
                        if (postQueue.Count() == 1000)
                        {
                            client.IndexAsync<Post>(postQueue, (c) =>
                            {
                                if (!c.Success)
                                    dropped++;
                            });

                            postQueue = new List<Post>();
                            processed++;
                        }

                        Console.Write("\rProcessed:{0}, Dropped:{2} in {1}", processed, sw.Elapsed, dropped);

                        post = new Post();
                        meta = new PostMetaData();
                    }

                }
                sw.Stop();
                Console.WriteLine("\nDone! {0}", sw.Elapsed);
            }
            catch (Exception e)
            {

            }
        }
Example #2
0
        /// <summary>
        /// Depends on hn_full_11-07-2010.xml which you can download from: 
        /// http://api.ihackernews.com/torrents/hn_full_11-07-2010.zip.torrent
        /// 
        /// When run from debug make sure to change the default debug arguments.
        /// <param name="args">Full filepath to hn_full_11-07-2010.xml</param>
        static void Main(string[] args)
        {
            var filePath = args.First();
            var elasticSettings = new ConnectionSettings(new Uri("http://ipv4.fiddler:9200"))
                                        .SetDefaultIndex("mpdreamz")
                                        .SetMaximumAsyncConnections(50);
            var client = new ElasticClient(elasticSettings);
            ConnectionStatus connectionStatus;
            if (!client.TryConnect(out connectionStatus))
            {
                Console.Error.WriteLine("Could not connect to {0}:\r\n{1}",
                    elasticSettings.Host, connectionStatus.Error.OriginalException.Message);
                Console.Read();
                return;
            }

            var reader = new XmlTextReader(filePath);
            Post post = new Post();
            PostMetaData meta = new PostMetaData();

            int processed = 0, dropped = 0;
            Stopwatch sw = new Stopwatch();
            sw.Start();
            var postQueue = new List<Post>();
            try
            {
                while (reader.Read())
                {
                    var name = reader.Name;

                    if (reader.NodeType == XmlNodeType.Element)
                    {
                        if (name == "HackerNews")
                            continue;

                        if (name == "ID")
                            post.Id = reader.ReadElementContentAsInt();

                        else if (name == "ParentID")
                            post.ParentId = reader.ReadElementContentAsInt();
                        else if (name == "Url")
                            post.Url = reader.ReadElementContentAsString();
                        else if (name == "Title")
                            post.Title = reader.ReadElementContentAsString();
                        else if (name == "Text")
                            post.Text = reader.ReadElementContentAsString();
                        else if (name == "Username")
                            meta.Username = reader.ReadElementContentAsString();
                        else if (name == "Points")
                            meta.Points = reader.ReadElementContentAsInt();
                        else if (name == "Type")
                            meta.Type = reader.ReadElementContentAsInt();
                        else if (name == "Timestamp")
                            meta.Created = reader.ReadElementContentAsDateTime();
                        else if (name == "CommentCount")
                            meta.CommentsCount = reader.ReadElementContentAsInt();
                    }

                    if (reader.NodeType == XmlNodeType.EndElement
                        && name == "row")
                    {
                        post.Meta = meta;
                        postQueue.Add(post);
                        if (postQueue.Count() == 1000)
                        {
                            var t = client.IndexManyAsync(postQueue);
                            t.ContinueWith(c =>
                            {
                                var result = c.Result;
                                if (!result.IsValid)
                                    dropped++;
                            });
                            processed += postQueue.Count();
                            postQueue = new List<Post>();

                        }
                        Console.Write("\rProcessed:{0}, Dropped:{2} in {1}", processed, sw.Elapsed, dropped);

                        post = new Post();
                        meta = new PostMetaData();
                    }
                }
                if (postQueue.Count() > 0)
                {
                    var task = client.IndexManyAsync(postQueue).ContinueWith(t =>
                    {
                        var c = t.Result;
                        if (!c.IsValid)
                            Interlocked.Increment(ref dropped);
                        return t;
                    });
                    Interlocked.Add(ref processed, postQueue.Count());
                    postQueue = new List<Post>();

                }
                sw.Stop();
                Console.WriteLine("\nDone!", sw.Elapsed);
                Console.WriteLine("{0} docs in {1} => {2} docs/s", processed, sw.Elapsed, processed / sw.Elapsed.TotalSeconds);
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }