예제 #1
0
        private void loadWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            // Create new tree
            string[] parsedData  = null;
            int      TableNumber = 0;

            try
            {
                // Get list of websites
                string[][] AllSites = new string[5][];
                TableNumber = GetNumberOfKeys() + 1;
                int sitesRemaining = 1531 - TableNumber;
                for (int i = 0; i < AllSites.Length; ++i)
                {
                    AddMessage($"Getting site list for '{Enum.GetName(typeof (WebCompareModel.SitesEnum), i)}' cluster");
                    AllSites[i] = WebCompareModel.GetSiteList(WebCompareModel.Websites[i], (sitesRemaining / 5));
                }

                // Build frequency tables from 1000+ sites
                int sitecateg = 1, statusCount = 1, scx = 1;
                foreach (string[] sites in AllSites)
                {
                    AddMessage("Building frequency tables for cluster.. " + sitecateg);
                    foreach (string site in sites)
                    {
                        // Status display
                        if (statusCount == 100)
                        {
                            AddMessage($"Sites to go..{sites.Count() * 5 - scx + 70}");
                            statusCount = 0;
                        }
                        ++statusCount; ++scx;

                        // Get data from website and parse
                        parsedData = WebCompareModel.GetWebDataAgility(site);
                        // Fill a new HTable (frequency table)
                        HTable table = new HTable();
                        table.URL  = site;
                        table.Name = site.Substring(30);
                        if (parsedData != null)
                        {
                            for (int w = 0; w < parsedData.Length; ++w)
                            {
                                table.Put(parsedData[w], 1);
                            }
                        }
                        // Write HTable to file
                        table.SaveTable(TableNumber);
                        // Add HTable to BTree, including write to file
                        Session.Instance.Tree.Insert(TableNumber, table.Name);
                        ++TableNumber;
                    }
                    AddMessage("Completed building frequency table " + sitecateg);
                    ++sitecateg;
                } // End AllSites foreach
            } catch (Exception err) { MessageBox.Show("Exception caught: " + err, "Exception:Loader:loaderWorker_DoWork()", MessageBoxButton.OK, MessageBoxImage.Warning); }
        }
예제 #2
0
        public void UpdateTable(ref HTable ht)
        {
            AddMessage("\nUpdtating frequency table..");
            // Get data from websit and parse
            ht.Table     = null; // Clear current table
            ht.TableSize = 32;
            ht.Table     = new Model.HTable.HEntry[32];
            string[] parsedData = WebCompareModel.GetWebDataAgility(ht.URL);
            // ReFill HTable
            for (int w = 0; w < parsedData.Length; ++w)
            {
                ht.Put(parsedData[w], 1);
            }

            AddMessage("\nCOMPLETED updating frequency table..");
        }
예제 #3
0
        private void worker_DoWork(object sender, DoWorkEventArgs e)
        {
            // Setup table for user entered url
            for (int t = 0; t < tables.Length; ++t)
            {
                tables[t] = new HTable();
                if (t < WebCompareModel.Websites.Length)
                {
                    tables[t].URL = WebCompareModel.Websites[t];
                }
                else
                {
                    tables[t].URL = wcViewModel.UserURL;
                }
            }

            // if nothings entered in url return
            if (wcViewModel.UserURL == "")
            {
                AddMessage("\nPlease enter a valid URL");
                return;
            }

            //string[] test = GetWebDataAgility(wcViewModel.UserURL);
            AddMessage("");
            string[] data           = null;
            string[] parsedMessages = null;

            // Get Data from Websites
            for (int w = 0; w <= WebCompareModel.Websites.Length; ++w)
            {
                if (w != WebCompareModel.Websites.Length)
                {
                    // Get data
                    AddMessage("\nGETTING data from: " + WebCompareModel.Websites[w]);
                    data = GetWebDataAgility(WebCompareModel.Websites[w]);

                    // Parse each message into
                    AddMessage("\nPARSING data from: " + WebCompareModel.Websites[w]);
                    parsedMessages = WebCompareModel.Parser(data);

                    // Fill respective table
                    AddMessage("\nFILLING TABLE from: " + WebCompareModel.Websites[w] + "\n");
                    FillTables(data, parsedMessages, w);
                }
                else   // We are at the last table, aka the User entered table
                {
                    // Get data
                    AddMessage("\nGETTING data from USER entered webpage");
                    data = GetWebDataAgility(wcViewModel.UserURL);

                    // Parse each message into
                    AddMessage("\nPARSING data from USER entered webpage");
                    parsedMessages = WebCompareModel.Parser(data);

                    // Fill respective
                    AddMessage("\nFILLING TABLE from USER entered webpage\n");
                    FillTables(data, parsedMessages, w);
                }
            }    // End get data from websites

            // Calculate cosine vectors
            AddMessage("\nCALCULATING cosine vectors\n");

            for (int tab = 0; tab < tables.Length - 1; ++tab)
            {
                // get vector, last table is the user entered table
                List <object>[] vector = WebCompareModel.BuildVector(tables[tab], tables[tables.Length - 1]);
                // Calcualte similarity
                tables[tab].Similarity = WebCompareModel.CosineSimilarity(vector);
            }

            // Compare to the entered URL by the user
            //     and display results in order
            wcViewModel.Results = GetResults();
        }
예제 #4
0
        public Vertex BuildGraph(string site, Vertex parent, out float sim)
        {
            // Decrement count because we just went down a level
            --levelCount;
            sim = 0;
            if (siteCount >= MAXSITES)
            {
                return(null);
            }
            try
            {
                // Take one site and a parent vertex
                // Get data and list of sites
                List <string> parsedData;
                List <string> sites;
                GetWebDataAgility(site, out parsedData, out sites);
                // Fill a new HTable (frequency table)
                HTable vTable = new HTable();
                vTable.ID   = ++TableNumber;
                vTable.URL  = site;
                vTable.Name = site.Substring(30);
                if (parsedData != null)
                {
                    for (int w = 0; w < parsedData.Count; ++w)
                    {
                        vTable.Put(parsedData[w], 1);
                    }
                }
                // Write HTable to file
                vTable.SaveTable(vTable.ID);
                // Create a vertex for this site with the same ID as HTable
                Vertex v = new Vertex(vTable.ID, vTable.URL);

                // Create an edge connecting this vertex and parent vertex
                if (parent != null)
                {
                    // Calc similiarty to parent
                    HTable          parentTable = LoadTable(parent.ID);
                    List <object>[] vector      = WebCompareModel.BuildVector(vTable, parentTable);
                    //// Calcualte similarity, 1 minus for shortest path calculation, lower numbers are more similar
                    sim = 1 - (float)WebCompareModel.CosineSimilarity(vector);
                    //Create edge to parent
                    Edge e1 = new Edge(parent.ID, v.ID, sim, ++EdgeNumber);
                    Edge e2 = new Edge(v.ID, parent.ID, sim, ++EdgeNumber);
                    //mainGraph.AddEdge(e1);   // Add edge to Graph list
                    //mainGraph.SaveEdge(e1);  // Write edge to disk
                    //mainGraph.AddEdge(e2);   // Add edge to Graph list
                    //mainGraph.SaveEdge(e2);  // Write edge to disk
                    // Add eachother as neighbors
                    parent.AddNeighbor(e1);
                    v.AddNeighbor(e2);
                    // Update parent vertex
                    mainGraph.AddVertex(parent);
                    mainGraph.SaveVertex(parent);
                }
                // Update/Add to graph
                mainGraph.AddVertex(v);
                mainGraph.SaveVertex(v);
                ++siteCount;

                // Add list of sites to this vertex
                //// Forach- add, recursively call this method
                foreach (var s in sites)
                {
                    // Check for nulls, Don't compare to itself
                    if (s == null || s == site)
                    {
                        continue;
                    }

                    // Don't get more sites if site tree has been built already
                    Vertex v2 = mainGraph.HasVertex(s);
                    if (v2 != null)
                    {
                        LoadStatus += ".";
                        // Calc similiarty to parent
                        HTable v2Table = LoadTable(v2.ID);
                        sim = 0;   // clear
                        if (v2Table != null)
                        {
                            List <object>[] vector = WebCompareModel.BuildVector(vTable, v2Table);
                            //// Calcualte similarity, 1 minus for shortest path calculation, lower numbers are more similar
                            sim = 1 - (float)WebCompareModel.CosineSimilarity(vector);
                            //Create edge to parent
                            Edge e = new Edge(v.ID, v2.ID, (float)sim, ++EdgeNumber);
                            //mainGraph.AddEdge(e);   // Add edge to Graph list
                            //mainGraph.SaveEdge(e);  // Write edge to disk

                            // Add eachother as neighbors
                            v.AddNeighbor(e);
                            v2.AddNeighbor(new Edge(v2.ID, v.ID, (float)sim, ++EdgeNumber));
                            // Update/Add to graph
                            mainGraph.AddVertex(v);
                            mainGraph.AddVertex(v2);
                        }

                        mainGraph.SaveVertex(v2);
                    }
                    else
                    {
                        float  simout = 0;
                        Vertex neighb = null;

                        // Don't build another graph off this node if the level count hits 0
                        if (levelCount > 0)
                        {
                            neighb = BuildGraph(s, v, out simout);
                            // Increment levelcount because we just came back up a level
                            ++levelCount;
                        }
                        else
                        {
                            return(v);
                        }

                        if (neighb != null)
                        {
                            Edge newEdge = new Edge(v.ID, neighb.ID, simout, EdgeNumber++);
                            v.AddNeighbor(newEdge);
                            //mainGraph.SaveEdge(newEdge);
                        }
                    }
                }
                // Update Vertex to graph and persist
                if (v != null)
                {
                    mainGraph.AddVertex(v);
                    mainGraph.SaveVertex(v);
                }

                return(v);
            }
            catch (Exception exc) { Console.WriteLine("Error building graph: " + exc); }
            sim = 0;
            return(null);
        }
예제 #5
0
        private void worker_DoWork(object sender, DoWorkEventArgs e)
        {
            AddMessage("");
            wcViewModel.Results = "";
            wcViewModel.Results = "Top 10 most similar websites: ";
            // Build frequency table for user entered URL
            AddMessage("Building Entered URL frequency table..");
            // Get data from websit and parse
            string[] parsedData = WebCompareModel.GetWebDataAgility(wcViewModel.UserURL);
            // Fill HTable
            HTable compareTable = new HTable();

            compareTable.URL  = wcViewModel.UserURL;
            compareTable.Name = wcViewModel.UserURL.Substring(30);
            for (int w = 0; w < parsedData.Length; ++w)
            {
                compareTable.Put(parsedData[w], 1);
            }
            // Array of keyvalue pairs for the top 100 closest sites
            List <KeyValuePair <long, double> > topSites = new List <KeyValuePair <long, double> >();

            // Foreach sites,
            AddMessage("\nCalculating Similarities for 1000+ webistes..");
            SiteTotal = LoaderViewModel.GetNumberOfKeys();
            int statusCount = 0;

            AddMessage($"\nSites to go..{SiteTotal}");
            for (int i = 1; i < SiteTotal; ++i)
            {
                // Status display
                ++statusCount;
                if (statusCount == 50)
                {
                    AddMessage($"\nSites to go..{SiteTotal - i + 70}");
                    statusCount = 0;
                }
                //// Build Vector
                HTable tempTable = LoadTable(i);
                if (tempTable != null)
                {
                    List <object>[] vector = WebCompareModel.BuildVector(tempTable, compareTable);
                    //// Calcualte similarity
                    tempTable.Similarity = WebCompareModel.CosineSimilarity(vector);
                    //// Update table
                    tempTable.SaveTable(i);
                    //// Maintain array of top 100 sites (IDs)
                    topSites = AddTopSite(topSites, new KeyValuePair <long, double>(i, tempTable.Similarity));
                    //// Check if the stored site needs updating
                    DateTime compDate = DateTime.Now.Subtract(new TimeSpan(30, 0, 0, 0));
                    if (tempTable.LastUpdated < compDate)
                    {
                        //// Update the HTable
                        UpdateTable(ref tempTable);
                    }
                }
            }
            // For top 10 websites
            for (int i = 0; i < 10; ++i)
            {
                //// Get Name of site using Key
                //// Display 10 sites
                string siteName = Tree.SearchTree(topSites[i].Key, 1);
                wcViewModel.Results += "\n" + siteName;
            }

            // Calculate and Display most similar
            GetResult(topSites);
        }