public void CosineSimilarityTest()
        {
            CosineSimilarity sim = new CosineSimilarity();

            // TODO: Re-enable argument checking only for debugging
            // Assert.Throws<ArgumentException>(() => sim.GetSimilarityScore(p0, q4));

            double result = sim.GetSimilarityScore(p0, q0);

            Assert.AreEqual(result, .8, 0.00001);

            result = sim.GetSimilarityScore(p1, q1);
            Assert.AreEqual(result, 0.97014, 0.00001);

            result = sim.GetSimilarityScore(p2, q2);
            Assert.AreEqual(result, 0);

            result = sim.GetSimilarityScore(p3, q3);
            Assert.AreEqual(result, 1, 0.00001);

            result = sim.GetSimilarityScore(p4, q4);
            Assert.AreEqual(result, 0.96065, 0.00001);

            result = sim.GetSimilarityScore(p5, q5);
            Assert.AreEqual(result, 0.96897, 0.00001);
        }
Beispiel #2
0
        public double CheckSimilarity()
        {
            var list = u.Repository <Question>().GetAll().ToList();

            //double[] results = new double[list.Count];

            //for (int i = 0; i < list.Count; i++)
            //{
            //    //results[i] = 100 - StringSimilarity.LevenshteinCaculate(list[0].QuestionContent, list[i].QuestionContent) * 100 / list[0].QuestionContent.Length;
            //    //results[i] = StringSimilarity.RateSimilarity(list[0].QuestionContent, list[i].QuestionContent);
            //    //var sim = new CosineSimilarity();
            //    //var sim = new JaccardSimilarity();
            //    var sim = new DiceSimilarity();
            //    results[i] = sim.GetSimilarity(list[0].QuestionContent, list[i].QuestionContent);
            //}
            //var sim = new DiceSimilarity();
            var sim = new CosineSimilarity();
            //var sim = new OverlapCoefficient();
            //var sim = new BlockDistance();
            //var sim = new ChapmanLengthDeviation(); // neu cau ngan qua thi xac xuat cao cung chua chac la giong nhau
            //var sim = new MongeElkan();// tham chieu tu Jaro Winkler https://cs.stackexchange.com/questions/32530/a-reference-for-pseudocode-for-monge-elkan-algorithm
            //var sim = new SmithWatermanGotohWindowedAffine();
            //double result = sim.GetSimilarity("What is software engineering", "software engineering is __");
            //double result = sim.GetSimilarity("i bit a dog", "a dog bit me");
            double result = sim.GetSimilarity("By switching on and off, the __ can be used to represent the 1s and 0s that are foundation of all that goes on in the computer",
                                              "what can be used to represent the on and off that are foundation of all that goes on in the computer?");

            return(result);
        }
        public List<WikiPage> generateRecommendations(List<WikiPage> allWikiPages)
        {
            clusterUsersPages();

            CosineSimilarity cSimilarity = new CosineSimilarity();
            Dictionary<string, float> pageDistances = new Dictionary<string, float>(allWikiPages.Count);
            List<WikiPage> recommendedPages = new List<WikiPage>();

            foreach (WikiPage page in allWikiPages)
            {
                // If the WikiPage is one that the user like's, no need to calculate distance
                if (userData.likedWikiPages.Find(WikiPage => WikiPage.title == page.title) == null)
                {
                         if (!pageDistances.ContainsKey(page.title))
                         {
                             pageDistances.Add(page.title, cSimilarity.GetDistance(userData.likedPagesCluster.tf_IDF_Vec, page.tf_IDF_Vec));
                         }
                }
            }

            var sortedDict = pageDistances.OrderByDescending(x => x.Value).Take(20);

            foreach(var item in sortedDict)
            {
                recommendedPages.Add(allWikiPages.Find(WikiPage => WikiPage.title == item.Key));
            }

            return recommendedPages;
        }
Beispiel #4
0
        public void CosineSimilarityTest( )
        {
            CosineSimilarity sim = new CosineSimilarity( );

            Assert.Throws <ArgumentException>(() => sim.GetSimilarityScore(p0, q4));

            double result = sim.GetSimilarityScore(p0, q0);

            Assert.AreApproximatelyEqual(result, .8, 0.00001);

            result = sim.GetSimilarityScore(p1, q1);
            Assert.AreApproximatelyEqual(result, 0.97014, 0.00001);

            result = sim.GetSimilarityScore(p2, q2);
            Assert.AreEqual(result, 0);

            result = sim.GetSimilarityScore(p3, q3);
            Assert.AreApproximatelyEqual(result, 1, 0.00001);

            result = sim.GetSimilarityScore(p4, q4);
            Assert.AreApproximatelyEqual(result, 0.96065, 0.00001);

            result = sim.GetSimilarityScore(p5, q5);
            Assert.AreApproximatelyEqual(result, 0.96897, 0.00001);
        }
Beispiel #5
0
        static double FindCosignTotal(Person person)
        {
            CosineSimilarity sim       = new CosineSimilarity();
            double           fNameval  = sim.GetSimilarity(fName, person.FirstName.ToLower());
            double           LNameval  = sim.GetSimilarity(LName, person.LastName.ToLower());
            double           fNameval1 = sim.GetSimilarity(fName, person.LastName.ToLower());
            double           LNameval1 = sim.GetSimilarity(LName, person.FirstName.ToLower());

            return(fNameval + LNameval + fNameval1 + LNameval1);
        }
Beispiel #6
0
        static void Main(string[] args)
        {
            Dictionary <string, string> config = new Dictionary <string, string>();
            string relativeLocation            = "..\\..\\..\\..\\";

            config.Add("idPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpusMapping.txt"));
            config.Add("docPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoCorpus.txt"));
            config.Add("qidPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoListOfFeatures.txt"));
            config.Add("qdocPath", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoQueries.txt"));
            config.Add("goldSetDir", System.IO.Path.Combine(relativeLocation, "Experiment\\Rhino\\RhinoFeaturesToGoldSetMethodsMapping"));
            config.Add("effAllPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessAllMethods.txt"));
            config.Add("effBestPath", System.IO.Path.Combine(relativeLocation, "Experiment\\EffectivenessBestMethods.txt"));

            Console.WriteLine("Running experiment...");
            Console.WriteLine("Importing corpus...");
            TLArtifactsCollection corpusArtifacts = Corpus.Import(config["idPath"], config["docPath"]);

            Console.WriteLine("Computing corpus vectors...");
            Vectorizer corpusVectors = new Vectorizer(corpusArtifacts, "Ordinal");

            Console.WriteLine("Computing corpus tf, df...");
            Normalizer corpusTF = new Normalizer(corpusVectors.Vectors);

            Console.WriteLine("Computing corpus idf...");
            NormalizedVector corpusIDF = InverseDocumentFrequency.Compute(corpusVectors.Frequencies, corpusVectors.Vectors.Count);

            Console.WriteLine("Computing corpus tf-idf...");
            NormalizedVectorCollection corpusTFIDF = TFIDF.Compute(corpusTF.Vectors, corpusIDF);

            Console.WriteLine("Importing queries...");
            TLArtifactsCollection queryArtifacts = Corpus.Import(config["qidPath"], config["qdocPath"]);

            Console.WriteLine("Computing corpus vectors...");
            Vectorizer queryVectors = new Vectorizer(queryArtifacts, "Boolean");

            Console.WriteLine("Computing similarities...");
            TLSimilarityMatrix sims = CosineSimilarity.Compute(corpusTF.Vectors, corpusTF.Lengths, queryVectors.Vectors);

            Console.WriteLine("Importing gold set...");
            TLSimilarityMatrix goldset = AnswerMapping.Import(config["goldSetDir"]);

            Console.WriteLine("Calculating effectiveness measures...");
            Effectiveness.Export(queryArtifacts, sims, goldset, config["effAllPath"], config["effBestPath"]);
            Console.WriteLine("Effectiveness measures written to:\n\t" + config["effAllPath"] + "\n\t" + config["effBestPath"]);
            Console.WriteLine("Experiment complete.");

            Console.WriteLine("\nPress enter key to continue...");
            Console.ReadLine();
        }
Beispiel #7
0
        public void TestDistanceIsPositiveEvenIfThereIsRounding()
        {
            // See: https://github.com/doxakis/HdbscanSharp/issues/5

            var a = new double[] { 20 };
            var b = new double[] { 19.990000000000002 };

            var distFunc = new CosineSimilarity();
            var distance = distFunc.ComputeDistance(0, 1, a, b);

            if (distance < 0)
            {
                Assert.Fail("Distance must be positive.");
            }
        }
        public void GetSimilarity_TwoDifferentVectors_ExpectedResult()
        {
            CosineSimilarity cs = new CosineSimilarity();
            Article          x  = new Article();
            Article          y  = new Article();

            x.Vector[1] = 1;
            x.Vector[2] = 3;
            x.Vector[3] = 5;

            y.Vector[1] = 2;
            y.Vector[2] = 4;
            y.Vector[5] = 3;

            double expected = 0.43943537440204113472653679374377;
            double actual   = cs.GetSimilarity(x, y);

            Assert.IsTrue(Math.Abs(expected - actual) < 0.001);
        }
Beispiel #9
0
        static void Main(string[] args)
        {
            Console.WriteLine("Starting: {0}", DateTime.Now);

            String filePath       = args[0];
            String outputFilePath = args[1];
            int    k          = Int32.Parse(args[2]);
            int    distance   = Int32.Parse(args[3]);
            int    numThreads = Int32.Parse(args[4]);

            var samples = CSVIO.Load <float>(filePath);

            Similarity sim = new EuclideanDistance();

            if (distance == 2)
            {
                sim = new PearsonSimilarity();
            }
            if (distance == 3)
            {
                sim = new CosineSimilarity();
            }

            Console.WriteLine("Using distance measure: {0} on {1} samples of dimensionality: {2}",
                              sim, samples.Count, samples[0].Length);

            Console.WriteLine("Beginning Clustering: {0}", DateTime.Now);

            var     clusters = Cluster(samples, sim, numThreads);
            Cluster root     = clusters[0];

            Console.WriteLine("Finished Clustering: {0}", DateTime.Now);

            var classifications = Classify(samples, root, k);

            CSVIO.Save <int>(outputFilePath, classifications);

            Console.WriteLine("Finished: {0}", DateTime.Now);
        }
Beispiel #10
0
        public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein)
        {
            switch (simMetricType)
            {
            case SimMetricType.BlockDistance:
                var sim2 = new BlockDistance();
                return(sim2.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanLengthDeviation:
                var sim3 = new ChapmanLengthDeviation();
                return(sim3.GetSimilarity(firstWord, secondWord));

            case SimMetricType.CosineSimilarity:
                var sim4 = new CosineSimilarity();
                return(sim4.GetSimilarity(firstWord, secondWord));

            case SimMetricType.DiceSimilarity:
                var sim5 = new DiceSimilarity();
                return(sim5.GetSimilarity(firstWord, secondWord));

            case SimMetricType.EuclideanDistance:
                var sim6 = new EuclideanDistance();
                return(sim6.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaccardSimilarity:
                var sim7 = new JaccardSimilarity();
                return(sim7.GetSimilarity(firstWord, secondWord));

            case SimMetricType.Jaro:
                var sim8 = new Jaro();
                return(sim8.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaroWinkler:
                var sim9 = new JaroWinkler();
                return(sim9.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MatchingCoefficient:
                var sim10 = new MatchingCoefficient();
                return(sim10.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MongeElkan:
                var sim11 = new MongeElkan();
                return(sim11.GetSimilarity(firstWord, secondWord));

            case SimMetricType.NeedlemanWunch:
                var sim12 = new NeedlemanWunch();
                return(sim12.GetSimilarity(firstWord, secondWord));

            case SimMetricType.OverlapCoefficient:
                var sim13 = new OverlapCoefficient();
                return(sim13.GetSimilarity(firstWord, secondWord));

            case SimMetricType.QGramsDistance:
                var sim14 = new QGramsDistance();
                return(sim14.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWaterman:
                var sim15 = new SmithWaterman();
                return(sim15.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotoh:
                var sim16 = new SmithWatermanGotoh();
                return(sim16.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotohWindowedAffine:
                var sim17 = new SmithWatermanGotohWindowedAffine();
                return(sim17.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanMeanLength:
                var sim18 = new ChapmanMeanLength();
                return(sim18.GetSimilarity(firstWord, secondWord));

            default:
                var sim1 = new Levenstein();
                return(sim1.GetSimilarity(firstWord, secondWord));
            }
        }
        public void ItemBasedLimitedDataEvaluateKValue()
        {
            var products              = db.Products.ToList();
            var orders                = db.Orders.ToList();
            var orderDetails          = db.OrderDetails.ToList();
            var recommendationResults = db.RecommendationResults.ToList();

            ISimilarityCalculable similarityCalculable = new CosineSimilarity();

            using (StreamWriter w = File.AppendText(@"c:\TestResult.txt"))
            {
                w.WriteLine("Item-based test with limited data" + DateTime.Now);

                for (int kNearest = 1; kNearest < products.Count; kNearest++)
                {
                    int firstHalfCorrect  = 0;
                    int secondHalfCorrect = 0;


                    for (int j = 0; j < orders.Count; j++)
                    {
                        Order order = orders[j];

                        List <Order> availableOrders = orders.GetRange(0, j + 1);

                        List <OrderDetail> availableOrderDetails =
                            (from o in availableOrders
                             from od in orderDetails
                             where od.OrderId == o.OrderId
                             select od).ToList();

                        List <RecommendationResult> availableRecommendationResults =
                            (from o in availableOrders
                             from rr in recommendationResults
                             where rr.OrderId == o.OrderId
                             select rr).ToList();

                        availableOrders.Remove(order);
                        availableRecommendationResults.Remove(
                            availableRecommendationResults.Find(rr => rr.OrderId == order.OrderId));

                        RecommendationCalculator calc = new RecommendationCalculator(@"c:\testLog.txt", products,
                                                                                     availableOrders, availableOrderDetails, availableRecommendationResults);

                        Product product = calc.RecommendProductItemBased(order, similarityCalculable, kNearest);

                        if (product.ProductId == recommendationResults[j].SelectedProductId)
                        {
                            if (j < orders.Count / 2)
                            {
                                firstHalfCorrect++;
                            }
                            else
                            {
                                secondHalfCorrect++;
                            }
                        }
                    }
                    w.WriteLine("K: " + kNearest + " \tCorrect: " + (firstHalfCorrect + secondHalfCorrect) + "\tOrders: " + orders.Count + "\tPercentage: " +
                                Math.Round((firstHalfCorrect + secondHalfCorrect) / (double)orders.Count), 3);

                    w.Flush();
                }
            }
            Assert.IsTrue(true);
        }
Beispiel #12
0
        public double GetSimilarity(string str1, string str2, string type)
        {
            IStringMetric stringMetric;

            switch (type)
            {
            case AlgorithmTypes.BlockDistance:
                stringMetric = new BlockDistance();
                break;

            case AlgorithmTypes.ChapmanLengthDeviation:
                stringMetric = new ChapmanLengthDeviation();
                break;

            case AlgorithmTypes.ChapmanMeanLength:
                stringMetric = new ChapmanMeanLength();
                break;

            case AlgorithmTypes.CosineSimilarity:
                stringMetric = new CosineSimilarity();
                break;

            case AlgorithmTypes.DiceSimilarity:
                stringMetric = new DiceSimilarity();
                break;

            case AlgorithmTypes.EuclideanDistance:
                stringMetric = new EuclideanDistance();
                break;

            case AlgorithmTypes.JaccardSimilarity:
                stringMetric = new JaccardSimilarity();
                break;

            case AlgorithmTypes.Jaro:
                stringMetric = new Jaro();
                break;

            case AlgorithmTypes.JaroWinkler:
                stringMetric = new JaroWinkler();
                break;

            case AlgorithmTypes.Levenstein:
                stringMetric = new Levenstein();
                break;

            case AlgorithmTypes.MatchingCoefficient:
                stringMetric = new MatchingCoefficient();
                break;

            case AlgorithmTypes.MongeElkan:
                stringMetric = new MongeElkan();
                break;

            case AlgorithmTypes.NeedlemanWunch:
                stringMetric = new NeedlemanWunch();
                break;

            case AlgorithmTypes.OverlapCoefficient:
                stringMetric = new OverlapCoefficient();
                break;

            case AlgorithmTypes.QGramsDistance:
                stringMetric = new QGramsDistance();
                break;

            case AlgorithmTypes.SmithWaterman:
                stringMetric = new SmithWaterman();
                break;

            case AlgorithmTypes.SmithWatermanGotoh:
                stringMetric = new SmithWatermanGotoh();
                break;

            case AlgorithmTypes.SmithWatermanGotohWindowedAffine:
                stringMetric = new SmithWatermanGotohWindowedAffine();
                break;

            default:
                stringMetric = new SmithWatermanGotoh();
                break;
            }

            var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim());

            return(similarity);
        }
Beispiel #13
0
 // [SetUp]
 public CosineSimilarityUnitTests()
 {
     LoadData();
     _myCosineSimilarity = new CosineSimilarity();
 }
Beispiel #14
0
        public double GetDistance(double[] p, double[] q)
        {
            CosineSimilarity cosineSimilarity = new CosineSimilarity();

            return(1.0 - cosineSimilarity.GetSimilarityScore(p, q));
        }
Beispiel #15
0
 public SearchFactory(CosineSimilarity cs, QueryDispatcher qry)
 {
     _qry        = qry;
     _hubContext = GlobalHost.ConnectionManager.GetHubContext <MessageHub>();
     _cs         = cs;
 }
Beispiel #16
0
 public AForgeCosineCalculator()
 {
     sim = new CosineSimilarity();
 }