예제 #1
0
        public static int savePageData(string stock, string subject, string threadCodeOrg, string threadLink, string authorPostLink)
        {
            var db = new HCDB();
            int startIndex = 0; int endIndex = 0;

            // Page Number
            int pageNum = 1;

            if (subject.Substring(0, 4).ToLower().Contains("re:"))
            {
                startIndex = threadLink.IndexOf("page-") + 5;
                threadLink = threadLink.Substring(startIndex, threadLink.Length - startIndex);
                endIndex   = threadLink.IndexOf("?");
                pageNum    = Convert.ToInt32(threadLink.Substring(0, endIndex));
            }

            // Author Link
            startIndex    = threadCodeOrg.IndexOf("class=\"avatar\"");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("a href");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("\"") + 1;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            endIndex      = threadCodeOrg.IndexOf("\"");
            string authorLink = "http://hotcopper.com.au/" + threadCodeOrg.Substring(0, endIndex);

            if (authorLink.Trim() != "")
            {
                saveAuthorData(authorLink, authorPostLink);
            }
            // Author
            startIndex    = threadCodeOrg.IndexOf("<h");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf(">") + 1;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            endIndex      = threadCodeOrg.IndexOf("</");
            string postAuthor = threadCodeOrg.Substring(0, endIndex);

            if (postAuthor.Contains("<a"))
            {
                endIndex = postAuthor.IndexOf("<a");
                string firstPart = postAuthor.Substring(0, endIndex);
                postAuthor = postAuthor.Substring(endIndex + 2, postAuthor.Length - (endIndex + 2));
                startIndex = postAuthor.IndexOf(">") + 1;
                string secondPart = postAuthor.Substring(startIndex, postAuthor.Length - startIndex);
                postAuthor = firstPart + "\n" + secondPart;
            }
            postAuthor = postAuthor.Replace("</a>", "").Trim();
            postAuthor = postAuthor.Replace("&#039;", "'");

            // Date, Time
            startIndex    = threadCodeOrg.IndexOf("Date:");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("<dd");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf(">") + 1;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            endIndex      = threadCodeOrg.IndexOf("</");
            string postDate = threadCodeOrg.Substring(0, endIndex);

            startIndex    = threadCodeOrg.IndexOf("Time:");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("<dd");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf(">") + 1;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            endIndex      = threadCodeOrg.IndexOf("</");
            string   postTime        = threadCodeOrg.Substring(0, endIndex);
            string   postDateTimeStr = postDate + " " + postTime;
            DateTime postDateTime    = Convert.ToDateTime(postDateTimeStr);

            // Post ID
            startIndex    = threadCodeOrg.IndexOf("Post #:");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("<dd");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf(">") + 1;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            endIndex      = threadCodeOrg.IndexOf("</");
            string postIDstr = threadCodeOrg.Substring(0, endIndex);

            if (postIDstr.Contains("<a"))
            {
                endIndex = postIDstr.IndexOf("<a");
                string firstPart = postIDstr.Substring(0, endIndex);
                postIDstr  = postIDstr.Substring(endIndex + 2, postIDstr.Length - (endIndex + 2));
                startIndex = postIDstr.IndexOf(">") + 1;
                string secondPart = postIDstr.Substring(startIndex, postIDstr.Length - startIndex);
                postIDstr = firstPart + "\n" + secondPart;
            }
            postIDstr = postIDstr.Replace("</a>", "").Trim();
            int postID = Convert.ToInt32(postIDstr);

            // IP
            string postIP = "Not Found";

            startIndex = threadCodeOrg.IndexOf("IP:");
            if (startIndex != -1)
            {
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                startIndex    = threadCodeOrg.IndexOf("<dd");
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                startIndex    = threadCodeOrg.IndexOf(">") + 1;
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                endIndex      = threadCodeOrg.IndexOf("</");
                postIP        = threadCodeOrg.Substring(0, endIndex);
            }


            // CONTENT
            #region Content
            startIndex    = threadCodeOrg.IndexOf("class=\"content\"");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("<article>") + 9;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("<blockquote");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf(">") + 1;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            endIndex      = threadCodeOrg.IndexOf("</blockquote");
            string story = threadCodeOrg.Substring(0, endIndex);
            while (story.Contains("<a"))
            {
                endIndex = story.IndexOf("<a");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 2, story.Length - (endIndex + 2));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<p"))
            {
                endIndex = story.IndexOf("<p");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 2, story.Length - (endIndex + 2));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<img"))
            {
                endIndex = story.IndexOf("<img");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 4, story.Length - (endIndex + 4));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<span"))
            {
                endIndex = story.IndexOf("<span");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 5, story.Length - (endIndex + 5));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<script"))
            {
                endIndex = story.IndexOf("<script");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 6, story.Length - (endIndex + 6));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<ins"))
            {
                endIndex = story.IndexOf("<ins");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 4, story.Length - (endIndex + 4));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<div"))
            {
                endIndex = story.IndexOf("<div");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 4, story.Length - (endIndex + 4));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<link"))
            {
                endIndex = story.IndexOf("<link");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 5, story.Length - (endIndex + 5));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<video"))
            {
                endIndex = story.IndexOf("<video");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 6, story.Length - (endIndex + 6));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<source"))
            {
                endIndex = story.IndexOf("<source");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 7, story.Length - (endIndex + 7));
                startIndex = story.IndexOf(">") + 1;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<style>"))
            {
                endIndex = story.IndexOf("<style>");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 7, story.Length - (endIndex + 7));
                startIndex = story.IndexOf("</style>") + 8;
                string secondPart = story.Substring(startIndex, story.Length - startIndex);
                story = firstPart + "\n" + secondPart;
            }
            while (story.Contains("<!--"))
            {
                endIndex = story.IndexOf("<!--");
                string firstPart = story.Substring(0, endIndex);
                story      = story.Substring(endIndex + 3, story.Length - (endIndex + 3));
                startIndex = story.IndexOf("-->") + 3;
                string secondPart = "";
                if (story.IndexOf("-->") != -1)
                {
                    secondPart = story.Substring(startIndex, story.Length - startIndex);
                }
                story = firstPart + "\n" + secondPart;
            }
            story = story.Replace("</style>", "");
            story = story.Replace("</span>", "");
            story = story.Replace("</script>", "");
            story = story.Replace("</video>", "");
            story = story.Replace("</a>", "");
            story = story.Replace("<br />", "");
            story = story.Replace("</ul>", "");
            story = story.Replace("</li>", "");
            story = story.Replace("</tr>", "");
            story = story.Replace("</td>", "");
            story = story.Replace("<p>", "");
            story = story.Replace("</p>", "");
            story = story.Replace("<P>", "");
            story = story.Replace("</P>", "");
            story = story.Replace("</ins>", "");
            story = story.Replace("</div>", "");
            story = story.Replace("<em>", "");
            story = story.Replace("</em>", "");
            story = story.Replace("&nbsp;", " ");
            story = story.Replace("&lsquo;", "'");
            story = story.Replace("&rsquo;", "'");
            story = story.Replace("&ldquo;", "\"");
            story = story.Replace("&rdquo;", "\"");
            story = story.Replace("&quot;", "\"");
            story = story.Replace("&amp;", "&");
            story = story.Replace("&#8217;", "'");
            story = story.Replace("&#8220;", "\"");
            story = story.Replace("&#8221;", "\"");
            story = story.Replace("&thinsp;&#8212;&thinsp;", " - ");
            story = story.Replace("\n", "");
            story = story.Replace("\t", "");
            story = story.Replace("&#039;", "'");
            story = story.Replace("<b>", "");
            story = story.Replace("</b>", "");
            story = story.Replace("<br>", "");
            story = story.Replace("<strong>", "");
            story = story.Replace("</strong>", "").Trim();
            #endregion

            // Length of the post (Word Count - compared with MS word)
            //int postWordCount1 = Regex.Matches(story, @"[A-Za-z0-9]+").Count;
            int postWordCount = Regex.Matches(story, @"[\S]+").Count;      // Best Method so far
            //int postWordCount3 = story.Split().Length;
            //int postWordCount4 = story.Count(Char.IsWhiteSpace);    // -1
            //int postWordCount = Math.Min(Math.Min(postWordCount1, postWordCount2), Math.Min(postWordCount3, postWordCount4));

            // Likes
            startIndex    = threadCodeOrg.IndexOf("icon icon-like");
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            startIndex    = threadCodeOrg.IndexOf("</span>") + 7;
            threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
            endIndex      = threadCodeOrg.IndexOf("</");
            string likesStr = threadCodeOrg.Substring(0, endIndex);
            likesStr = likesStr.Replace(",", "");
            int likes = 0;
            if (!Int32.TryParse(likesStr, out likes))
            {
                likes = 0;
            }


            // Price at Posting
            Decimal priceAtPosting = 0;
            startIndex = threadCodeOrg.IndexOf("Price at posting");
            if (startIndex != -1)
            {
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                startIndex    = threadCodeOrg.IndexOf("$") + 1;
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                endIndex      = threadCodeOrg.IndexOf("</");
                if (!threadCodeOrg.Substring(0, endIndex).Contains("("))
                {
                    if (threadCodeOrg.Trim() != "" && threadCodeOrg != null)
                    {
                        if (!Decimal.TryParse(threadCodeOrg.Substring(0, endIndex), out priceAtPosting))
                        {
                            //Decimal parsing has failed
                            priceAtPosting = -1M;
                        }
                    }
                }
            }

            // Sentiment
            string sentiment = "Not Found";
            startIndex = threadCodeOrg.IndexOf("Sentiment");
            if (startIndex != -1)
            {
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                startIndex    = threadCodeOrg.IndexOf("<dd>") + 4;
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                endIndex      = threadCodeOrg.IndexOf("</");
                sentiment     = threadCodeOrg.Substring(0, endIndex);
            }

            // Disclosure
            string disclosure = "Not Found";
            startIndex = threadCodeOrg.IndexOf("Disclosure");
            if (startIndex != -1)
            {
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                startIndex    = threadCodeOrg.IndexOf("<dd>") + 4;
                threadCodeOrg = threadCodeOrg.Substring(startIndex, threadCodeOrg.Length - startIndex);
                endIndex      = threadCodeOrg.IndexOf("</");
                disclosure    = threadCodeOrg.Substring(0, endIndex);
            }

            if (!db.Posts.Any(f => f.Subject == subject && f.PageNum == pageNum && f.Post_ID == postID))
            {
                db.Posts.Add(new HCDB_Posts
                {
                    Stock            = stock,
                    Subject          = subject,
                    PageNum          = pageNum,
                    Content          = story,
                    Likes            = likes,
                    DateTime         = postDateTime,
                    Author           = postAuthor,
                    Post_ID          = postID,
                    IP               = postIP,
                    Length_of_Post   = postWordCount,
                    Price_at_Posting = priceAtPosting,
                    Disclosure       = disclosure,
                    Sentiment        = sentiment
                });
                db.SaveChanges();
                return(1);
            }
            else
            {
                var existing = (from u in db.Posts
                                where u.Subject == subject &&
                                u.PageNum == pageNum &&
                                u.Post_ID == postID
                                select u).FirstOrDefault();
                if (existing != null)
                {
                    if (existing.Likes != likes)
                    {
                        existing.Likes = likes;
                    }
                    if (existing.Price_at_Posting != priceAtPosting)
                    {
                        existing.Price_at_Posting = priceAtPosting;
                    }
                    db.SaveChanges();
                }
                return(0);
            }
        }
예제 #2
0
        public static void saveAuthorData(string authorLink, string authorPostLink)
        {
            var db = new HCDB();

            try
            {
                //MessageBox.Show(authorLink);
                string sourceCode = getSourceCode(authorLink);
                if (sourceCode == "invalid")
                {
                    throw new UriFormatException();
                }

                int startIndex = 0; int endIndex = 0;
                // Name
                startIndex = sourceCode.IndexOf("itemprop=\"name");
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                startIndex = sourceCode.IndexOf(">") + 1;
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                endIndex   = sourceCode.IndexOf("</");
                string authorName = sourceCode.Substring(0, endIndex);

                // Posts
                startIndex = sourceCode.IndexOf("stat-post");
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                startIndex = sourceCode.IndexOf("</div>") + 6;
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                endIndex   = sourceCode.IndexOf("<");
                string postsNumStr   = sourceCode.Substring(0, endIndex).Trim().Replace(",", "");
                int    postsTotalNum = 0;
                if (!int.TryParse(postsNumStr, out postsTotalNum))
                {
                    postsTotalNum = -1;
                }

                // Likes Received
                startIndex = sourceCode.IndexOf("stat-likes");
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                startIndex = sourceCode.IndexOf("</div>") + 6;
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                endIndex   = sourceCode.IndexOf("<");
                int    likesReceived    = 0;
                string likesReceivedStr = sourceCode.Substring(0, endIndex).Trim().Replace(",", "");
                if (!int.TryParse(likesReceivedStr, out likesReceived))
                {
                    likesReceived = -1;
                }

                // Following
                // Its Number
                startIndex = sourceCode.IndexOf("stat-following");
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                startIndex = sourceCode.IndexOf("</div>") + 6;
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                endIndex   = sourceCode.IndexOf("<");
                int following = 0;

                string followingStr = sourceCode.Substring(0, endIndex).Trim().Replace(",", "");
                if (!int.TryParse(followingStr, out following))
                {
                    following = -1;
                }

                // Followers
                // Its Number
                startIndex = sourceCode.IndexOf("stat-followers");
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                startIndex = sourceCode.IndexOf("</div>") + 6;
                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                endIndex   = sourceCode.IndexOf("<");
                int    followers    = 0;
                string followersStr = sourceCode.Substring(0, endIndex).Trim().Replace(",", "");


                if (!int.TryParse(followersStr, out followers))
                {
                    followers = -1;
                }

                // Following Stocks stockList
                startIndex = sourceCode.IndexOf("member-stockList");
                string stockLists = "";
                if (startIndex != -1)
                {
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</ol");
                    string stockCode = sourceCode.Substring(0, endIndex);
                    int    count     = 0;
                    while (stockCode.IndexOf("<li>") != -1)
                    {
                        startIndex  = stockCode.IndexOf("<li>") + 4;
                        stockCode   = stockCode.Substring(startIndex, stockCode.Length - startIndex);
                        startIndex  = stockCode.IndexOf("title=") + 6;
                        stockCode   = stockCode.Substring(startIndex, stockCode.Length - startIndex);
                        startIndex  = stockCode.IndexOf("\"") + 1;
                        stockCode   = stockCode.Substring(startIndex, stockCode.Length - startIndex);
                        endIndex    = stockCode.IndexOf("\"");
                        stockLists += stockCode.Substring(0, endIndex) + "/space";
                        count++;
                        if (stockLists.Contains("<a"))
                        {
                            endIndex = stockLists.IndexOf("<a");
                            string firstPart = stockLists.Substring(0, endIndex);
                            stockLists = stockLists.Substring(endIndex + 2, stockLists.Length - (endIndex + 2));
                            startIndex = stockLists.IndexOf(">") + 1;
                            string secondPart = stockLists.Substring(startIndex, stockLists.Length - startIndex);
                            stockLists = firstPart + "\n" + secondPart;
                        }
                        if (stockLists.Contains("<span"))
                        {
                            endIndex = stockLists.IndexOf("<span");
                            string firstPart = stockLists.Substring(0, endIndex);
                            stockLists = stockLists.Substring(endIndex + 5, stockLists.Length - (endIndex + 5));
                            startIndex = stockLists.IndexOf(">") + 1;
                            string secondPart = stockLists.Substring(startIndex, stockLists.Length - startIndex);
                            stockLists = firstPart + "\n" + secondPart;
                        }
                        stockLists = stockLists.Replace("<li>", "");
                        stockLists = stockLists.Replace("\n", "");
                        stockLists = stockLists.Replace("</a>", "").Trim();
                        stockLists = stockLists.Replace("&#039;", "'");
                    }
                    stockLists = count + "  " + stockLists.Replace("/space", ", ");
                }

                // Following - Its name lists
                string followingLists = "";
                if (following > 0)
                {
                    startIndex = sourceCode.IndexOf("Following " + following + " members");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    startIndex = sourceCode.IndexOf("<ol>") + 4;
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</ol");
                    string followingListsCode = sourceCode.Substring(0, endIndex);
                    while (followingListsCode.IndexOf("<li>") != -1)
                    {
                        startIndex         = followingListsCode.IndexOf("<li>") + 4;
                        followingListsCode = followingListsCode.Substring(startIndex, followingListsCode.Length - startIndex);
                        endIndex           = followingListsCode.IndexOf("</");
                        followingLists    += followingListsCode.Substring(0, endIndex) + "/space";
                        if (followingLists.Contains("<a"))
                        {
                            endIndex = followingLists.IndexOf("<a");
                            string firstPart = followingLists.Substring(0, endIndex);
                            followingLists = followingLists.Substring(endIndex + 2, followingLists.Length - (endIndex + 2));
                            startIndex     = followingLists.IndexOf(">") + 1;
                            string secondPart = followingLists.Substring(startIndex, followingLists.Length - startIndex);
                            followingLists = firstPart + "\n" + secondPart;
                        }
                        if (followingLists.Contains("<span"))
                        {
                            endIndex = followingLists.IndexOf("<span");
                            string firstPart = followingLists.Substring(0, endIndex);
                            followingLists = followingLists.Substring(endIndex + 5, followingLists.Length - (endIndex + 5));
                            startIndex     = followingLists.IndexOf(">") + 1;
                            string secondPart = followingLists.Substring(startIndex, followingLists.Length - startIndex);
                            followingLists = firstPart + "\n" + secondPart;
                        }
                        followingLists = followingLists.Replace("<li>", "");
                        followingLists = followingLists.Replace("\n", "");
                        followingLists = followingLists.Replace(" ", "");
                        followingLists = followingLists.Replace("</a>", "").Trim();
                        followingLists = followingLists.Replace("&#039;", "'");
                    }
                    followingLists = followingLists.Replace("/space", ", ");
                }

                // Followers - Its name lists
                string followersLists = "";
                if (followers > 0)
                {
                    startIndex = sourceCode.IndexOf("Followed by " + followers + " members");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    startIndex = sourceCode.IndexOf("<ol>") + 4;
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</ol");
                    string followersListsCode = sourceCode.Substring(0, endIndex);
                    while (followersListsCode.IndexOf("<li>") != -1)
                    {
                        startIndex         = followersListsCode.IndexOf("<li>") + 4;
                        followersListsCode = followersListsCode.Substring(startIndex, followersListsCode.Length - startIndex);
                        endIndex           = followersListsCode.IndexOf("</");
                        followersLists    += followersListsCode.Substring(0, endIndex) + "/space";
                        if (followersLists.Contains("<a"))
                        {
                            endIndex = followersLists.IndexOf("<a");
                            string firstPart = followersLists.Substring(0, endIndex);
                            followersLists = followersLists.Substring(endIndex + 2, followersLists.Length - (endIndex + 2));
                            startIndex     = followersLists.IndexOf(">") + 1;
                            string secondPart = followersLists.Substring(startIndex, followersLists.Length - startIndex);
                            followersLists = firstPart + "\n" + secondPart;
                        }
                        if (followersLists.Contains("<span"))
                        {
                            endIndex = followersLists.IndexOf("<span");
                            string firstPart = followersLists.Substring(0, endIndex);
                            followersLists = followersLists.Substring(endIndex + 5, followersLists.Length - (endIndex + 5));
                            startIndex     = followersLists.IndexOf(">") + 1;
                            string secondPart = followersLists.Substring(startIndex, followersLists.Length - startIndex);
                            followersLists = firstPart + "\n" + secondPart;
                        }
                        followersLists = followersLists.Replace("<li>", "");
                        followersLists = followersLists.Replace("\n", "");
                        followersLists = followersLists.Replace(" ", "");
                        followersLists = followersLists.Replace("</a>", "").Trim();
                        followersLists = followersLists.Replace("&#039;", "'");
                    }
                    followersLists = followersLists.Replace("/space", ", ");
                }

                // Num of posts in a calendar month
                int numofPostsinaCalendarMonth = numOfPostsInAMonth(authorPostLink);


                if (!db.Authors.Any(f => f.Name == authorName))
                {
                    db.Authors.Add(new HCDB_Authors
                    {
                        Name             = authorName,
                        Num_of_Posts     = postsTotalNum,
                        Likes_Received   = likesReceived,
                        Followers        = followers,
                        Followers_List   = followersLists,
                        Following        = following,
                        Following_List   = followingLists,
                        Following_Stocks = stockLists,
                        Num_of_Posts_in_calendar_month = numofPostsinaCalendarMonth
                    });
                    db.SaveChanges();
                }
                else
                {
                    var existing = (from u in db.Authors
                                    where u.Name == authorName
                                    select u).FirstOrDefault();
                    if (existing != null)
                    {
                        if (existing.Num_of_Posts != postsTotalNum)
                        {
                            existing.Num_of_Posts = postsTotalNum;
                        }
                        if (existing.Likes_Received != likesReceived)
                        {
                            existing.Likes_Received = likesReceived;
                        }
                        if (existing.Followers != followers)
                        {
                            existing.Followers = followers;
                        }
                        if (existing.Following != following)
                        {
                            existing.Following = following;
                        }
                        if (existing.Followers_List != followersLists)
                        {
                            existing.Followers_List = followersLists;
                        }
                        if (existing.Following_List != followingLists)
                        {
                            existing.Following_List = followingLists;
                        }
                        if (existing.Num_of_Posts_in_calendar_month != numofPostsinaCalendarMonth)
                        {
                            existing.Num_of_Posts_in_calendar_month = numofPostsinaCalendarMonth;
                        }
                        if (existing.Following_Stocks != stockLists)
                        {
                            existing.Following_Stocks = stockLists;
                        }
                        db.SaveChanges();
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error" + ex.ToString());
                Console.WriteLine("Error: " + ex.Message);  // May not display in the form
            }
        }
예제 #3
0
        // MARKET DATA
        private void MarketDataButton(object sender, EventArgs e)
        {
            var db      = new HCDB();
            int newData = 0;

            // Retreive a source code from a webpage
            string url = textBox1.Text;         // e.g. http://hotcopper.com.au/asx/anz#.VI98gSuUfJI

            MessageBox.Show("This runs");
            if (url != null && url.Trim() != "")
            {
                try
                {
                    string sourceCode = WorkerClasses.getSourceCode(url);
                    if (sourceCode == "invalid")
                    {
                        throw new UriFormatException();
                    }
                    listbox.Items.Add("[" + DateTime.Now + "] Process Starts. Please wait for a few minutes.");

                    /* TAG */
                    string groupWord = textBox2.Text;
                    if (groupWord == "")
                    {
                        groupWord = WorkerClasses.getGroupWord(url);
                    }

                    int startIndex = 0; int endIndex = 0;
                    startIndex = sourceCode.IndexOf("stock-pricing");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);


                    // Open
                    startIndex = sourceCode.IndexOf("class=\"primary\"");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    startIndex = sourceCode.IndexOf(">") + 1;
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</");
                    string temp = sourceCode.Substring(0, endIndex).Replace(",", "");
                    MessageBox.Show("Open is currently " + temp);
                    temp = temp.Replace("$", "");

                    if (temp.Contains(""))
                    {
                        temp = temp.Replace("&cent;", "");
                        temp = temp.Replace("M", "");
                        temp = Convert.ToString(Convert.ToDouble(temp) / 100);
                    }

                    Decimal openValue = Convert.ToDecimal(temp);

                    // High
                    startIndex = sourceCode.IndexOf("class=\"high\"");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    startIndex = sourceCode.IndexOf(">") + 1;
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</");
                    temp       = sourceCode.Substring(0, endIndex).Replace(",", "");
                    temp       = temp.Replace("$", "");
                    if (temp.Contains(""))
                    {
                        temp = temp.Replace("&cent;", "");
                        temp = Convert.ToString(Convert.ToDouble(temp) / 100);
                    }
                    Decimal highValue = Convert.ToDecimal(temp);

                    // Low
                    startIndex = sourceCode.IndexOf("class=\"low\"");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    startIndex = sourceCode.IndexOf(">") + 1;
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</");
                    temp       = sourceCode.Substring(0, endIndex).Replace(",", "");
                    temp       = temp.Replace("$", "");
                    if (temp.Contains(""))
                    {
                        temp = temp.Replace("&cent;", "");
                        temp = Convert.ToString(Convert.ToDouble(temp) / 100);
                    }
                    Decimal lowValue = Convert.ToDecimal(temp);



                    // Value
                    startIndex = sourceCode.IndexOf("class=\"primary\"");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    startIndex = sourceCode.IndexOf(">") + 1;
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</");
                    temp       = sourceCode.Substring(0, endIndex).Replace(",", "");
                    temp       = temp.Replace("$", "");
                    if (temp.Contains(""))
                    {
                        temp = temp.Replace("&cent;", "");
                        temp = temp.Replace("M", "");
                        temp = Convert.ToString(Convert.ToDouble(temp) / 100);
                    }
                    Decimal lastValue = Convert.ToDecimal(temp);

                    // Volume
                    startIndex = sourceCode.IndexOf("class=\"primary\"");
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    startIndex = sourceCode.IndexOf(">") + 1;
                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                    endIndex   = sourceCode.IndexOf("</");
                    temp       = sourceCode.Substring(0, endIndex).Replace(",", "");
                    temp       = temp.Replace("$", "");
                    if (temp.Contains(""))
                    {
                        temp = temp.Replace("&cent;", "");
                        temp = temp.Replace("M", "");
                        temp = Convert.ToString(Convert.ToDouble(temp) / 100);
                    }
                    Decimal marketPrice = Convert.ToDecimal(temp);

                    /*// Volume (Millions)
                     * startIndex = sourceCode.IndexOf("class=\"primary\"");
                     * sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                     * startIndex = sourceCode.IndexOf(">") + 1;
                     * sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                     * endIndex = sourceCode.IndexOf("</");
                     * temp = sourceCode.Substring(0, endIndex).Replace(",", "");
                     * if (temp.Contains("m") || temp.Contains("M"))
                     *  temp = temp.Replace("m", "").Replace("M", "");
                     * else if (temp.Contains("b") || temp.Contains("B"))
                     * {
                     *  temp = temp.Replace("b", "").Replace("B", "");
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) * 1000);
                     * }
                     * else if (temp.Contains("k") || temp.Contains("K"))
                     * {
                     *  temp = temp.Replace("k", "").Replace("K", "");
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) / 1000);
                     * }
                     * else
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) / 1000000);
                     * Decimal volume = Convert.ToDecimal(temp);
                     *
                     * // Value (Millions)
                     * startIndex = sourceCode.IndexOf("class=\"primary\"");
                     * sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                     * startIndex = sourceCode.IndexOf("$") + 1;
                     * sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                     * endIndex = sourceCode.IndexOf("</");
                     * temp = sourceCode.Substring(0, endIndex).Replace(",", "");
                     * if (temp.Contains("m") || temp.Contains("M"))
                     *  temp = temp.Replace("m", "").Replace("M", "");
                     * else if (temp.Contains("b") || temp.Contains("B"))
                     * {
                     *  temp = temp.Replace("b", "").Replace("B", "");
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) * 1000);
                     * }
                     * else if (temp.Contains("k") || temp.Contains("K"))
                     * {
                     *  temp = temp.Replace("k", "").Replace("K", "");
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) / 1000);
                     * }
                     * else
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) / 1000);
                     * Decimal value = Convert.ToDecimal(temp);
                     *
                     * // Market Cap (Billions)
                     * startIndex = sourceCode.IndexOf("class=\"primary\"");
                     * sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                     * startIndex = sourceCode.IndexOf("$") + 1;
                     * sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                     * endIndex = sourceCode.IndexOf("</");
                     * temp = sourceCode.Substring(0, endIndex).Replace(",", "");
                     * if (temp.Contains("b") || temp.Contains("B"))
                     *  temp = temp.Replace("b", "").Replace("B", "");
                     * else if (temp.Contains("m") || temp.Contains("M"))
                     * {
                     *  temp = temp.Replace("m", "").Replace("M", "");
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) / 1000);
                     * }
                     * else if (temp.Contains("k") || temp.Contains("K"))
                     * {
                     *  temp = temp.Replace("k", "").Replace("K", "");
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) / 1000000);
                     * }
                     * else
                     *  temp = Convert.ToString(Convert.ToDecimal(temp) / 1000000000);
                     * Decimal marketCap = Convert.ToDecimal(temp);
                     *
                     */
                    db.MarketData.Add(new HCDB_MarketData
                    {
                        Tag                   = groupWord,
                        Date                  = DateTime.Now,
                        High                  = highValue,
                        Low                   = lowValue,
                        Open                  = openValue,
                        Last                  = lastValue,
                        Market_Price          = marketPrice,
                        Volume__Millions_     = 0M,
                        Value__Millions_      = 0M,
                        Market_Cap__Billions_ = 0M
                    });
                    db.SaveChanges();
                    newData++;

                    listbox.Items.Add("\n[" + DateTime.Now + "] Market Data completed.\n" + newData + " saved.");
                }
                catch (UriFormatException)
                {
                    listbox.Items.Add("Invalid URL!");
                    MessageBox.Show("Invalid URL!");
                    textBox1.Text = "";
                }
                catch (Exception ex)
                {
                    listbox.Items.Add("Error found: " + ex);
                    MessageBox.Show("Error found: " + ex);
                    textBox1.Text = "";
                }
            }
        }
예제 #4
0
        // Positive and Negative words
        private void SentimentAnalysisButton(object sender, EventArgs e)
        {
            var db          = new HCDB();
            int duplicates  = 0;
            int updatedData = 0;

            // Retreive a source code from a webpage
            // e.g. http://www3.nd.edu/~mcdonald/Word_Lists.html
            try
            {
                listbox.Items.Add("Process Starts. Please wait for a few minutes.");
                string word = ""; int posCount = 0; int negCount = 0;

                // Positive Words
                string positiveWordLink = "http://www3.nd.edu/~mcdonald/Data/Finance_Word_Lists/LoughranMcDonald_Positive.csv";
                string positiveCode     = WorkerClasses.getSourceCode(positiveWordLink).ToLower().Replace("\n", "");
                if (positiveCode == "invalid")
                {
                    throw new UriFormatException();
                }
                string positiveCode_copy = positiveCode;

                // Negative Words
                string negativeWordLink = "http://www3.nd.edu/~mcdonald/Data/Finance_Word_Lists/LoughranMcDonald_Negative.csv";
                string negativeCode     = WorkerClasses.getSourceCode(negativeWordLink).ToLower().Replace("\n", "");
                if (negativeCode == "invalid")
                {
                    throw new UriFormatException();
                }
                string negativeCode_copy = negativeCode;

                // Check with the article's story if a word in the list is contained in a story
                //   if it is contained, count how many times.
                var postsList = (from u in db.Posts
                                 select u).ToList();
                if (postsList.Count != 0)
                {
                    foreach (var u in postsList)
                    {
                        if (u.PosWords == null && u.NegWords == null)
                        {
                            string   content      = u.Content;
                            string[] contentSplit = content.Split(new char[] { '.', '?', '!', ' ', ';', ':', ',' }, StringSplitOptions.RemoveEmptyEntries);
                            // Select a word (read by line) in the list
                            while (positiveCode.IndexOf("\r") != -1)
                            {
                                int endIndex = positiveCode.IndexOf("\r");
                                word         = positiveCode.Substring(0, endIndex);
                                positiveCode = positiveCode.Substring(endIndex + 1, positiveCode.Length - endIndex - 1);
                                if (content.Contains(word))
                                {
                                    var matchQuery = from words in contentSplit
                                                     where words.ToLowerInvariant() == word.ToLowerInvariant()
                                                     select words;
                                    posCount += matchQuery.Count();
                                }
                            }
                            while (negativeCode.IndexOf("\r") != -1)
                            {
                                int endIndex = negativeCode.IndexOf("\r");
                                word         = negativeCode.Substring(0, endIndex);
                                negativeCode = negativeCode.Substring(endIndex + 1, negativeCode.Length - endIndex - 1);
                                if (content.Contains(word))
                                {
                                    var matchQuery = from words in contentSplit
                                                     where words.ToLowerInvariant() == word.ToLowerInvariant()
                                                     select words;
                                    negCount += matchQuery.Count();
                                }
                            }

                            u.PosWords = posCount;
                            u.NegWords = negCount;
                            db.SaveChanges();
                            updatedData++;

                            // Reset components for next loop
                            positiveCode = positiveCode_copy;
                            negativeCode = negativeCode_copy;
                            posCount     = 0;
                            negCount     = 0;
                        }
                        else
                        {
                            duplicates++;
                        }
                    }
                }

                listbox.Items.Add("\n[" + DateTime.Now + "] HotCopper Pos/Neg Word Process Ended.\n" + updatedData + " updated and " + duplicates + " duplicates Found.");
            }
            catch (Exception ex)
            {
                if (ex.Message.Contains("UriFormatException"))
                {
                    listbox.Items.Add("Invalid URL!");
                    MessageBox.Show("Invalid URL!");
                }
                else
                {
                    listbox.Items.Add(ex.Message);
                }
            }
        }
예제 #5
0
        private void GetHotCopperThreads(string pageSource)
        {
            var db          = new HCDB();
            int duplicates  = 0;
            int newData     = 0;
            int postNewData = 0;

            if (pageSource != null)
            {
                try
                {
                    string stock = textBox2.Text;
                    if (stock == null || stock.Trim() == "")
                    {
                        int identifier = url.IndexOf("hotcopper.com.au") + 16;
                        if (url != "")
                        {
                            stock = url.Substring(identifier, url.Length - identifier);
                            if (stock.Contains("#"))
                            {
                                stock = stock.Substring(0, stock.IndexOf("#"));
                            }
                        }
                    }
                    string sourceCode = pageSource;

                    if (sourceCode == "invalid")
                    {
                        throw new UriFormatException();
                    }
                    listbox.Items.Add("Process Starts. Please wait for a few minutes.");

                    /* Group */
                    string groupWord = textBox2.Text;
                    if (groupWord == "")
                    {
                        groupWord = WorkerClasses.getGroupWord(url);
                    }

                    #region HotCopper THREADS results only
                    try
                    {
                        /* First of ALL, Save Threads Links */
                        while (sourceCode.IndexOf("listblock tags") != -1)
                        {
                            // TAG
                            int startIndex = sourceCode.IndexOf("listblock tags");
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            int    endIndex = sourceCode.IndexOf("</div");
                            string relCode  = sourceCode.Substring(0, endIndex);
                            string tags     = "";
                            while (relCode.Contains("<a href"))
                            {
                                startIndex = relCode.IndexOf("<a href");
                                relCode    = relCode.Substring(startIndex, relCode.Length - startIndex);
                                startIndex = relCode.IndexOf(">") + 1;
                                relCode    = relCode.Substring(startIndex, relCode.Length - startIndex);
                                endIndex   = relCode.IndexOf("</");
                                tags      += " " + relCode.Substring(0, endIndex);
                            }
                            tags = tags.Replace("&amp;", "&").Trim();

                            // Subject
                            startIndex = sourceCode.IndexOf("listblock subject");
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            startIndex = sourceCode.IndexOf("<h");
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            startIndex = sourceCode.IndexOf(">") + 1;
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            endIndex   = sourceCode.IndexOf("</");
                            string subject    = sourceCode.Substring(0, endIndex);
                            string threadLink = "";
                            if (subject.Contains("<a"))
                            {
                                // Thread Link
                                string tempCode = sourceCode;
                                startIndex = tempCode.IndexOf("<a href");
                                tempCode   = tempCode.Substring(startIndex, tempCode.Length - startIndex);
                                startIndex = tempCode.IndexOf("\"") + 1;
                                tempCode   = tempCode.Substring(startIndex, tempCode.Length - startIndex);
                                endIndex   = tempCode.IndexOf("\"");
                                threadLink = "http://hotcopper.com.au/" + tempCode.Substring(0, endIndex);

                                // Remove "<a>" from Subject
                                endIndex = subject.IndexOf("<a");
                                string firstPart = subject.Substring(0, endIndex);
                                subject    = subject.Substring(endIndex + 2, subject.Length - (endIndex + 2));
                                startIndex = subject.IndexOf(">") + 1;
                                string secondPart = subject.Substring(startIndex, subject.Length - startIndex);
                                subject = firstPart + "\n" + secondPart;
                            }
                            subject = subject.Replace("</a>", "").Trim();
                            subject = subject.Replace("&#039;", "'");
                            subject = subject.Replace("&amp;", "'");

                            // Author

                            startIndex = sourceCode.IndexOf("listblock author ");
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            startIndex = sourceCode.IndexOf(">") + 1;
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            endIndex   = sourceCode.IndexOf("</");
                            string author = sourceCode.Substring(0, endIndex);

                            // Author's Posts Link
                            startIndex = sourceCode.IndexOf("a href");
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            startIndex = sourceCode.IndexOf("\"") + 1;
                            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                            endIndex   = sourceCode.IndexOf("\"");
                            string authorPostLink = "https://hotcopper.com.au/" + sourceCode.Substring(0, endIndex);
                            authorPostLink = authorPostLink.Replace("&amp;", "&");
                            while (author.Contains("<a"))
                            {
                                endIndex = author.IndexOf("<a");
                                string firstPart = author.Substring(0, endIndex);
                                author     = author.Substring(endIndex + 2, author.Length - (endIndex + 2));
                                startIndex = author.IndexOf(">") + 1;
                                string secondPart = author.Substring(startIndex, author.Length - startIndex);
                                author = firstPart + "\n" + secondPart;
                            }
                            author = author.Replace("</a>", "").Trim();
                            author = author.Replace("&#039;", "'");

                            // Check the subject if it is not a reply and not an announcement  (First Post only)
                            if (!subject.Substring(0, 4).ToLower().Contains("re:") && !subject.Substring(0, 4).ToLower().Contains("ann:"))
                            {
                                // Views
                                startIndex = sourceCode.IndexOf("listblock stats ");
                                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                                startIndex = sourceCode.IndexOf("</span>") + 7;
                                if (startIndex == -1)
                                {
                                    startIndex = sourceCode.IndexOf(">") + 1;
                                }
                                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                                endIndex   = sourceCode.IndexOf("</");
                                string viewStr = sourceCode.Substring(0, endIndex).Trim();
                                viewStr = viewStr.Replace(",", "");
                                int view = Convert.ToInt32(viewStr);

                                //// Immediate DateTime
                                startIndex = sourceCode.IndexOf("listblock time");
                                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                                startIndex = sourceCode.IndexOf(">") + 1;
                                sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                                startIndex = sourceCode.IndexOf(">") + 1;
                                endIndex   = sourceCode.IndexOf("</");
                                if (startIndex < endIndex && startIndex != -1)
                                {
                                    sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
                                }
                                endIndex = sourceCode.IndexOf("</");
                                string   datetimeStr = sourceCode.Substring(0, endIndex).Trim();
                                DateTime threadDate  = Convert.ToDateTime(datetimeStr);

                                // Access inside the thread
                                if (threadLink != "")
                                {
                                    string threadCode = WorkerClasses.getSourceCode(threadLink);
                                    if (threadCode == "invalid")
                                    {
                                        throw new UriFormatException();
                                    }

                                    // Date of the thread
                                    startIndex = threadCode.IndexOf("icon left icon-clock");
                                    threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                    startIndex = threadCode.IndexOf("</span>") + 7;
                                    threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                    endIndex   = threadCode.IndexOf("</");
                                    string   threadBeginStr = threadCode.Substring(0, endIndex).Trim();
                                    DateTime threadBegin    = Convert.ToDateTime(datetimeStr);

                                    // if Nav to First does not exist, this post is the last one, save as it is.
                                    int    totalPosts    = 1;
                                    string lastPoster    = author;
                                    string lastPost      = threadLink;
                                    string threadCodeOrg = threadCode;

                                    startIndex = threadCode.IndexOf("rel=\"start\"");   // Nav to First
                                    // if first exists, Check 'PageNav', find a total number, then go to last post,
                                    //   find save last post link and last poster.

                                    //................................. UPDATE AREA ................................//
                                    if (startIndex != -1)
                                    {
                                        // Total Number of posts
                                        startIndex = threadCode.IndexOf("PageNav");
                                        threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                        startIndex = threadCode.IndexOf("data-last") + 9;
                                        threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                        startIndex = threadCode.IndexOf("\"") + 1;
                                        threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                        endIndex   = threadCode.IndexOf("\"");
                                        totalPosts = Convert.ToInt32(threadCode.Substring(0, endIndex));

                                        // ------:> Navigate to Last Post
                                        startIndex = threadCode.IndexOf("Next</a>");
                                        threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                        startIndex = threadCode.IndexOf("a href");
                                        threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                        startIndex = threadCode.IndexOf("\"") + 1;
                                        threadCode = threadCode.Substring(startIndex, threadCode.Length - startIndex);
                                        endIndex   = threadCode.IndexOf("\"");
                                        string lastPostLink = "http://hotcopper.com.au/" + threadCode.Substring(0, endIndex);
                                        string lastPostCode = WorkerClasses.getSourceCode(lastPostLink);
                                        if (lastPostCode == "invalid")
                                        {
                                            throw new UriFormatException();
                                        }

                                        // Last Post
                                        lastPost = lastPostLink;

                                        // Last Poster
                                        startIndex   = lastPostCode.IndexOf("user-wrap");
                                        lastPostCode = lastPostCode.Substring(startIndex, lastPostCode.Length - startIndex);
                                        startIndex   = lastPostCode.IndexOf("<h");
                                        lastPostCode = lastPostCode.Substring(startIndex, lastPostCode.Length - startIndex);
                                        startIndex   = lastPostCode.IndexOf(">") + 1;
                                        lastPostCode = lastPostCode.Substring(startIndex, lastPostCode.Length - startIndex);
                                        startIndex   = lastPostCode.IndexOf(">") + 1;
                                        endIndex     = lastPostCode.IndexOf("</");
                                        lastPoster   = lastPostCode.Substring(0, endIndex);
                                        while (lastPoster.Contains("<a"))
                                        {
                                            endIndex = lastPoster.IndexOf("<a");
                                            string firstPart = lastPoster.Substring(0, endIndex);
                                            lastPoster = lastPoster.Substring(endIndex + 2, lastPoster.Length - (endIndex + 2));
                                            startIndex = lastPoster.IndexOf(">") + 1;
                                            string secondPart = lastPoster.Substring(startIndex, lastPoster.Length - startIndex);
                                            lastPoster = firstPart + "\n" + secondPart;
                                        }
                                        lastPoster = lastPoster.Replace("</a>", "").Trim();
                                        lastPoster = lastPoster.Replace("&#039;", "'");
                                    }

                                    if (!db.Threads.Any(f => f.Subject == subject && f.Begin_Date == threadBegin))
                                    {
                                        db.Threads.Add(new HCDB_Threads
                                        {
                                            Stock        = stock,
                                            Tags         = tags,
                                            Subject      = subject,
                                            Num_of_Posts = totalPosts,
                                            Num_of_Views = view,
                                            First_Poster = author,
                                            Begin_Date   = threadBegin,
                                            Last_Post    = lastPost,
                                            Last_Poster  = lastPoster
                                        });
                                        db.SaveChanges();
                                        //MessageBox.Show("Count " + newData);
                                        Console.WriteLine("Count " + newData);
                                        newData++;
                                    }
                                    else
                                    {
                                        var existing = (from u in db.Threads
                                                        where u.Subject == subject &&
                                                        u.Begin_Date == threadBegin
                                                        select u).FirstOrDefault();
                                        if (existing != null)
                                        {
                                            if (existing.Last_Post != lastPost)
                                            {
                                                existing.Last_Post = lastPost;
                                            }
                                            if (existing.Last_Poster != lastPoster)
                                            {
                                                existing.Last_Poster = lastPoster;
                                            }
                                            if (existing.Num_of_Posts != totalPosts)
                                            {
                                                existing.Num_of_Posts = totalPosts;
                                            }
                                            if (existing.Num_of_Views != view)
                                            {
                                                existing.Num_of_Views = view;
                                            }
                                            db.SaveChanges();
                                            duplicates++;
                                        }
                                    }

                                    // Save Page Data
                                    postNewData += WorkerClasses.savePageData(stock, subject, threadCodeOrg, threadLink, authorPostLink);
                                }
                            }
                            // Second, Third, Forth and after posts...
                            else if (subject.Substring(0, 4).ToLower().Contains("re:"))
                            {
                                string threadCodeOrg = WorkerClasses.getSourceCode(threadLink);
                                if (threadCodeOrg == "invalid")
                                {
                                    throw new UriFormatException();
                                }

                                // Save Page Data
                                postNewData += WorkerClasses.savePageData(stock, subject, threadCodeOrg, threadLink, authorPostLink);
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show(ex.Message);
                        MessageBox.Show(ex.ToString());
                        listbox.Items.Add("Error: " + ex);
                    }

                    listbox.Items.Add("\n[" + DateTime.Now + "] HotCopper Threads completed.\n" + newData + " saved and " + duplicates + " duplicates Updated.");
                    listbox.Items.Add("\n[" + DateTime.Now + "] HotCopper Posts completed.\n" + postNewData + " saved.");
                    #endregion
                }
                catch (Exception ex)
                {
                    MessageBox.Show(ex.Message);
                    MessageBox.Show(ex.ToString());
                    listbox.Items.Add("Invalid URL!");
                    MessageBox.Show("Invalid URL!");
                    textBox1.Text = "";
                }
            }
            else
            {
                listbox.Items.Add("Please enter URL.");
                MessageBox.Show("Please enter URL.");
            }
        }