Example #1
0
        public void TestGetPositivelyCorrelatedEntities()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(4);

            float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f };
            float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f };
            float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f };
            float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);
            matrix.SetRow(3, row4);

            Assert.AreEqual(0.1f, matrix[0, 0]);
            Assert.AreEqual(0.5f, matrix[3, 2]);

            // test
            IList <int> cor_entities_list = matrix.GetPositivelyCorrelatedEntities(2);

            int[] cor_entities = new int[5];
            cor_entities_list.CopyTo(cor_entities, 0);
            int[] pos_cor_entities = { 1, 3, 0, 0, 0 };
            Assert.AreEqual(pos_cor_entities, cor_entities);
        }
Example #2
0
        private static void FindBestThresholds(ProgramArguments programArgs)
        {
            IEnumerable <DocumentCluster> originalClusters  = GetSimilarNewsTopicFiles();
            IEnumerable <Document>        documents         = Flatten(originalClusters);
            CorrelationMatrix             correlationMatrix = LoadCorrelationMatrix(programArgs);

            string fileName = Guid.NewGuid().ToString() + ".csv";

            using (StreamWriter sw = new StreamWriter(fileName))
            {
                double errorGoal = 0.01;
                SortedDictionary <double, SortedDictionary <double, List <double> > > errorValues = new SortedDictionary <double, SortedDictionary <double, List <double> > >();
                ErrorOptimizer.Optimize(0, 1, 0, 1, (permissibleValue, variationValue) =>
                {
                    SimilarityAlgorithm similarityAlgorithm             = new SimilarityAlgorithm(correlationMatrix, permissibleValue, variationValue);
                    DocumentCategorizer categorizer                     = new DocumentCategorizer(similarityAlgorithm);
                    IEnumerable <DocumentCluster> resultClusters        = categorizer.Cluster(documents);
                    IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters);

                    double average = (from score in errorScores select score.Value).Average();
                    Console.WriteLine("Average Error: " + average);

                    sw.WriteLine("{0}, {1}, {2}", permissibleValue, variationValue, average);

                    return(Math.Abs(average) <= errorGoal);
                });
            }
        }
Example #3
0
        private static void OutputThresholdReport(List <Tuple <Statement, Statement> > pairs, ProgramArguments programArgs)
        {
            CorrelationMatrix   correlationMatrix = LoadCorrelationMatrix(programArgs);
            SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix);
            StringBuilder       sb  = new StringBuilder();

            foreach (Tuple <Statement, Statement> pair in pairs)
            {
                Statement s1 = StemStatement(pair.Item1);
                Statement s2 = StemStatement(pair.Item2);

                double s12      = sim.StatementSimilarityToStatement(s1, s2);
                double s21      = sim.StatementSimilarityToStatement(s2, s1);
                bool   areEqual = sim.StatementEqualsToStatement(s1, s2);

                sb.AppendFormat(
                    "{0},{1},{2},{2}\r\n",
                    pair.Item1.ToString().Replace(',', '.'),
                    pair.Item2.ToString().Replace(',', '.'),
                    Math.Min(s12, s21),
                    Math.Abs(s12 - s21));
            }

            string reportName = "autoRSS_thresholdReport_" + Guid.NewGuid().ToString() + ".csv";

            using (StreamWriter sw = new StreamWriter(reportName))
            {
                sw.WriteLine(sb.ToString());
            }

            Console.WriteLine("Report: " + reportName);
        }
 // TODO think about moving the next two methods to their own class
 /// <summary>Compute similarity between one item and a collection of items</summary>
 /// <param name="item_id">the item ID</param>
 /// <param name="items">a collection of items</param>
 /// <param name="item_correlation">the similarity measure to use</param>
 /// <returns>the similarity between the item and the collection</returns>
 public static double Similarity(int item_id, ICollection<int> items, CorrelationMatrix item_correlation)
 {
     double similarity = 0;
     foreach (int other_item_id in items)
         similarity += item_correlation[item_id, other_item_id];
     return similarity;
 }
Example #5
0
        private static void ExperimentPandVThresholds(ProgramArguments programArgs)
        {
            IEnumerable <DocumentCluster> originalClusters  = GetSimilarNewsTopicFiles();
            IEnumerable <Document>        documents         = Flatten(originalClusters);
            CorrelationMatrix             correlationMatrix = LoadCorrelationMatrix(programArgs);

            double startP = 0,
                   endP   = 1,
                   startV = 0,
                   endV   = 1;
            double step   = 0.01;

            double[,] errorValues = new double[(int)((endP - startP) / step) + 1, (int)((endV - startV) / step) + 1];
            for (double i = 0, iP = startP; iP < endP; iP += step, i++)
            {
                for (double j = 0, iV = startV; iV < endV; iV += step, j++)
                {
                    SimilarityAlgorithm similarityAlgorithm
                        = new SimilarityAlgorithm(
                              correlationMatrix,
                              iP,
                              iV);
                    DocumentCategorizer                     categorizer    = new DocumentCategorizer(similarityAlgorithm);
                    IEnumerable <DocumentCluster>           resultClusters = categorizer.Cluster(documents);
                    IEnumerable <DocumentClusterErrorScore> errorScores    = CalculateErrorScore(resultClusters, originalClusters);

                    double average = (from score in errorScores select score.Value).Average();
                    Console.WriteLine("Average Error: " + average);

                    errorValues[(int)i, (int)j] = Math.Abs(average);
                }
            }

            string fileName = Guid.NewGuid().ToString() + ".csv";

            using (StreamWriter sw = new StreamWriter(fileName))
            {
                sw.Write("0, ");
                for (double j = 0, iV = startV; iV < endV; iV += step, j++)
                {
                    sw.Write("{0}, ", iV);
                }

                sw.WriteLine();

                for (double i = 0, iP = startP; iP < endP; iP += step, i++)
                {
                    sw.Write("{0}, ", iP);
                    for (double j = 0, iV = startV; iV < endV; iV += step, j++)
                    {
                        sw.Write("{0}, ", errorValues[(int)i, (int)j]);
                    }

                    sw.WriteLine();
                }
            }

            Logger.Log("Saved experiment to file: " + fileName);
        }
 ///
 public override void LearnAttributeToFactorMapping()
 {
     BinaryCosine cosine_correlation = new BinaryCosine(MaxItemID + 1);
     Console.Error.WriteLine("training with max_item_id={0}", MaxItemID);
     cosine_correlation.ComputeCorrelations(item_attributes);
     this.item_correlation = cosine_correlation;
     _MapToLatentFactorSpace = Utils.Memoize<int, float[]>(__MapToLatentFactorSpace);
 }
        ///
        public override void LearnAttributeToFactorMapping()
        {
            BinaryCosine cosine_correlation = new BinaryCosine(MaxItemID + 1);

            Console.Error.WriteLine("training with max_item_id={0}", MaxItemID);
            cosine_correlation.ComputeCorrelations(item_attributes);
            this.item_correlation   = cosine_correlation;
            _MapToLatentFactorSpace = Utils.Memoize <int, double[]>(__MapToLatentFactorSpace);
        }
Example #8
0
        public double GetCorrelation(string label1, string label2, double t = 0)
        {
            if (CorrelationMatrix == null)
            {
                throw new Exception("No correlation matrix attached to model");
            }

            return(CorrelationMatrix.GetCorrelation(label1, label2, t));
        }
        /// <summary>Compute the intra-set similarity of an item collection</summary>
        /// <param name="items">a collection of items</param>
        /// <param name="item_correlation">the similarity measure to use</param>
        /// <returns>the intra-set similarity of the collection</returns>
        public static double Similarity(ICollection<int> items, CorrelationMatrix item_correlation)
        {
            double similarity = 0;
            for (int i = 0; i < items.Count; i++)
                for (int j = i + 1; j < items.Count; j++)
                    similarity += item_correlation[i, j];

            return similarity;
        }
Example #10
0
        ///
        public override void LoadModel(string filename)
        {
            using ( StreamReader reader = Recommender.GetReader(filename, this.GetType()) )
            {
                CorrelationMatrix correlation = CorrelationMatrix.ReadCorrelationMatrix(reader);

                base.Train(); // train baseline model
                this.correlation = new BinaryCosine(correlation);
            }
        }
Example #11
0
        // TODO think about moving the next two methods to their own class

        /// <summary>Compute similarity between one item and a collection of items</summary>
        /// <param name="item_id">the item ID</param>
        /// <param name="items">a collection of items</param>
        /// <param name="item_correlation">the similarity measure to use</param>
        /// <returns>the similarity between the item and the collection</returns>
        public static double Similarity(int item_id, ICollection <int> items, CorrelationMatrix item_correlation)
        {
            double similarity = 0;

            foreach (int other_item_id in items)
            {
                similarity += item_correlation[item_id, other_item_id];
            }
            return(similarity);
        }
Example #12
0
        ///
        public override void LoadModel(string filename)
        {
            using (StreamReader reader = Recommender.GetReader(filename, this.GetType()))
            {
                CorrelationMatrix correlation = CorrelationMatrix.ReadCorrelationMatrix(reader);

                base.Train();                 // train baseline model
                this.correlation = new BinaryCosine(correlation);
            }
        }
Example #13
0
 public TO_AssetFxModel ToTransportObject() =>
 new TO_AssetFxModel
 {
     AssetCurves       = _assetCurves?.ToDictionary(x => x.Key, x => x.Value.GetTransportObject()),
     AssetVols         = _assetVols?.ToDictionary(x => x.Key.GetTransportObject(), x => x.Value.GetTransportObject()),
     BuildDate         = BuildDate,
     CorrelationMatrix = CorrelationMatrix?.GetTransportObject(),
     Fixings           = _fixings?.ToDictionary(x => x.Key, x => x.Value.GetTransportObject()),
     FundingModel      = _fundingModel.GetTransportObject(),
     Portfolio         = _portfolio?.ToTransportObject(),
 };
Example #14
0
        public void CorrelMatrixFacts()
        {
            var z = new CorrelationMatrix();

            z = new CorrelationMatrix(new[] { "x" }, new[] { "y" }, new[] { new [] { 0.9999 } });
            Assert.Throws <Exception>(() => z.GetCorrelation("x", "z"));
            Assert.False(z.TryGetCorrelation("x", "z", out var c));
            var zz     = z.Clone();
            var bumped = zz.Bump(0.5);

            Assert.True(bumped.GetCorrelation("x", "y") < 1.0);
        }
        private static void SetSigma(PersonStatistics personStats, CorrelationMatrix correlationMatrix)
        {
            var allBooleans = BooleanStatistic.GetAll(personStats);

            allBooleans = ExcludeNonvariantStatistcs(allBooleans);

            var sigma = new MultivariateBinaryGenerator();

            sigma.BuildCoverianceMatrix(allBooleans, correlationMatrix, RIntegration.GetSigma);

            personStats.BinaryGenerator = sigma;
        }
Example #16
0
        private static void CreateThresholdTrainingData(ProgramArguments programArgs)
        {
            CorrelationMatrix   correlationMatrix = LoadCorrelationMatrix(programArgs);
            SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix);

            while (true)
            {
                ScanTrainData(sim);
                Console.WriteLine("Press Enter to rescan");
                Console.ReadLine();
            }
        }
Example #17
0
 public static object CreateCorrelationMatrix(
     [ExcelArgument(Description = "Object name")] string ObjectName,
     [ExcelArgument(Description = "Labels X")] object[] LabelsX,
     [ExcelArgument(Description = "Labels Y")] object[] LabelsY,
     [ExcelArgument(Description = "Correlations")] double[,] Correlations)
 {
     return(ExcelHelper.Execute(_logger, () =>
     {
         var matrix = new CorrelationMatrix(LabelsX.ObjectRangeToVector <string>(), LabelsY.ObjectRangeToVector <string>(), Correlations.SquareToJagged());
         return ExcelHelper.PushToCache <ICorrelationMatrix>(matrix, ObjectName);
     }));
 }
Example #18
0
        private static void CreateSimilarityReport(ProgramArguments programArgs)
        {
            CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs);

            SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix);
            string wikipediaPath    = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml";

            using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open)))
            {
                // Skip first 100
                for (int i = 0; i < 100; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (!elementFound)
                    {
                        break;
                    }
                }

                string filename = "autorss_test_" + Guid.NewGuid().ToString() + ".csv";
                using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
                {
                    StreamWriter sw           = new StreamWriter(fs);
                    Document     prevDocument = null;
                    for (int i = 0; i < 100; i++)
                    {
                        bool elementFound = sr.ReadToFollowing("text");
                        if (elementFound)
                        {
                            string pageContents;
                            //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium))
                            {
                                sr.ReadStartElement();
                                pageContents = sr.ReadContentAsString();
                            }

                            Document document = ConstructDocument(pageContents);
                            //Console.WriteLine("Ratio: " + sim.CalculateOddsRatio(document, document) + "\r\nDocument Contents: " + pageContents);
                            if (null == prevDocument)
                            {
                                prevDocument = document;
                            }

                            sw.WriteLine(sim.CalculateOddsRatio(document, prevDocument));
                            prevDocument = document;
                        }
                    }

                    sw.Flush();
                }
            }
        }
Example #19
0
        private void CreateSimilarityMatrix(string typename)
        {
            Type type = Type.GetType("MyMediaLite.Correlation." + typename, true);

            if (type.IsSubclassOf(typeof(CorrelationMatrix)))
            {
                correlation = (CorrelationMatrix)type.GetConstructor(new Type[] { typeof(int) }).Invoke(new object[] { Entity == EntityType.USER ? MaxUserID + 1 : MaxItemID + 1 });
            }
            else
            {
                throw new Exception(typename + " is not a subclass of CorrelationMatrix");
            }
        }
Example #20
0
        private static IEnumerable <DocumentClusterErrorScore> CategorizeLabeledNewsArticles(ProgramArguments programArgs)
        {
            IEnumerable <DocumentCluster> originalClusters  = GetSimilarNewsTopicFiles();
            IEnumerable <Document>        documents         = Flatten(originalClusters);
            CorrelationMatrix             correlationMatrix = LoadCorrelationMatrix(programArgs);

            DocumentCategorizer           categorizer    = new DocumentCategorizer(correlationMatrix);
            IEnumerable <DocumentCluster> resultClusters = categorizer.Cluster(documents);

            OutputClusters(resultClusters);
            IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters);

            return(errorScores);
        }
Example #21
0
        /// <summary>Compute the intra-set similarity of an item collection</summary>
        /// <param name="items">a collection of items</param>
        /// <param name="item_correlation">the similarity measure to use</param>
        /// <returns>the intra-set similarity of the collection</returns>
        public static double Similarity(ICollection <int> items, CorrelationMatrix item_correlation)
        {
            double similarity = 0;

            for (int i = 0; i < items.Count; i++)
            {
                for (int j = i + 1; j < items.Count; j++)
                {
                    similarity += item_correlation[i, j];
                }
            }

            return(similarity);
        }
Example #22
0
        public CorrelationMatrix GetMatrix(Corpus corpus)
        {
            var matrix = new CorrelationMatrix(corpus.UniqueLemmas.Count);

            _cycleProvider.Run(corpus.UniqueLemmas.Count, (i, j) =>
            {
                var cij = GetCorrelation(corpus, i, j);
                var cji = GetCorrelation(corpus, j, i);

                var max = Math.Max(cij, cji);
                var min = Math.Min(cij, cji);

                matrix[i, j] = max == 1 ? min : max;
            });

            return(matrix);
        }
Example #23
0
        public double GetCompositeVolForStrikeAndDate(string assetId, DateTime expiry, double strike, Currency ccy)
        {
            var curve = GetPriceCurve(assetId);

            var fxId   = $"{curve.Currency.Ccy}/{ccy.Ccy}";
            var fxPair = FundingModel.FxMatrix.GetFxPair(fxId);

            var fxSpotDate = fxPair.SpotDate(expiry);
            var fxFwd      = FundingModel.GetFxRate(fxSpotDate, fxId);
            var fxVol      = FundingModel.GetVolSurface(fxId).GetVolForDeltaStrike(0.5, expiry, fxFwd);
            var tExpC      = BuildDate.CalculateYearFraction(expiry, DayCountBasis.Act365F);
            var correl     = CorrelationMatrix.GetCorrelation(fxId, assetId, tExpC);
            var sigma      = GetVolForStrikeAndDate(assetId, expiry, strike / fxFwd);

            sigma = System.Math.Sqrt(sigma * sigma + fxVol * fxVol + 2 * correl * fxVol * sigma);
            return(sigma);
        }
        public void TestAddEntity()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(4);
            float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f };
            float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f };
            float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f };
            float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);
            matrix.SetRow(3, row4);

            // test
            matrix.AddEntity(4);
            Assert.AreEqual(5, matrix.dim1);
        }
Example #25
0
        public void TestWrite()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(3);

            float[] row1 = { 1f, 0.1f, 0.2f };
            float[] row2 = { 0.1f, 1f, 0.3f };
            float[] row3 = { 0.2f, 0.3f, 1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);

            // test
            string filename = "testCorrelationMatrixWriter.txt";
            var    writer   = new StreamWriter(filename);

            matrix.Write(writer);
            writer.Close();

            var reader1 = new StreamReader(filename);

            Assert.AreEqual("3", reader1.ReadLine().Trim());
            Assert.AreEqual("0 1 0.1", reader1.ReadLine().Trim());
            Assert.AreEqual("0 2 0.2", reader1.ReadLine().Trim());
            Assert.AreEqual("1 2 0.3", reader1.ReadLine().Trim());

            var reader2     = new StreamReader(filename);
            var corr_matrix = CorrelationMatrix.ReadCorrelationMatrix(reader2);

            Assert.AreEqual(1f, corr_matrix[0, 0]);
            Assert.AreEqual(0.1f, corr_matrix[0, 1]);
            Assert.AreEqual(0.2f, corr_matrix[0, 2]);
            Assert.AreEqual(0.1f, corr_matrix[1, 0]);
            Assert.AreEqual(1f, corr_matrix[1, 1]);
            Assert.AreEqual(0.3f, corr_matrix[1, 2]);
            Assert.AreEqual(0.2f, corr_matrix[2, 0]);
            Assert.AreEqual(0.3f, corr_matrix[2, 1]);
            Assert.AreEqual(1f, corr_matrix[2, 2]);
            // close streams and delete the text file
            reader1.Close();
            reader2.Close();
            //File.Delete(filename);
        }
        public void TestGetNearestNeighbors()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(4);
            float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f };
            float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f };
            float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f };
            float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);
            matrix.SetRow(3, row4);

            // test
            int[] nn_test = matrix.GetNearestNeighbors(2, 2);
            int[] nn_sol = { 1, 3 };
            Assert.AreEqual(nn_sol, nn_test);
        }
Example #27
0
        public void TestSumUp()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(4);

            float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f };
            float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f };
            float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f };
            float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);
            matrix.SetRow(3, row4);

            // test
            matrix.AddEntity(4);
            Assert.AreEqual(5, matrix.dim1);
        }
Example #28
0
        public void TestGetNearestNeighbors()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(4);

            float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f };
            float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f };
            float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f };
            float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);
            matrix.SetRow(3, row4);

            // test
            int[] nn_test = matrix.GetNearestNeighbors(2, 2);
            int[] nn_sol  = { 1, 3 };
            Assert.AreEqual(nn_sol, nn_test);
        }
Example #29
0
        ///
        public override void LoadModel(string filename)
        {
            using ( StreamReader reader = Model.GetReader(filename, this.GetType()) )
            {
                int num_users = int.Parse(reader.ReadLine());
                var nearest_neighbors = new int[num_users][];
                for (int u = 0; u < nearest_neighbors.Length; u++)
                {
                    string[] numbers = reader.ReadLine().Split(' ');

                    nearest_neighbors[u] = new int[numbers.Length];
                    for (int i = 0; i < numbers.Length; i++)
                        nearest_neighbors[u][i] = int.Parse(numbers[i]);
                }

                this.correlation = CorrelationMatrix.ReadCorrelationMatrix(reader);
                this.k = (uint) nearest_neighbors[0].Length;
                this.nearest_neighbors = nearest_neighbors;
            }
        }
Example #30
0
        ///
        public override void LoadModel(string filename)
        {
            using (StreamReader reader = Recommender.GetReader(filename, this.GetType()))
            {
                int num_users         = int.Parse(reader.ReadLine());
                var nearest_neighbors = new int[num_users][];
                for (int u = 0; u < nearest_neighbors.Length; u++)
                {
                    string[] numbers = reader.ReadLine().Split(' ');

                    nearest_neighbors[u] = new int[numbers.Length];
                    for (int i = 0; i < numbers.Length; i++)
                    {
                        nearest_neighbors[u][i] = int.Parse(numbers[i]);
                    }
                }

                this.correlation       = CorrelationMatrix.ReadCorrelationMatrix(reader);
                this.k                 = (uint)nearest_neighbors[0].Length;
                this.nearest_neighbors = nearest_neighbors;
            }
        }
Example #31
0
        public void TestReadCorrelationMatrix()
        {
            // create test object
            const string filename = "correlation_matrix.txt";
            var          writer   = new StreamWriter(filename);

            writer.WriteLine(3);
            writer.WriteLine("0 1 0.1");
            writer.WriteLine("0 2 0.2");
            writer.WriteLine("1 2 0.3");
            writer.Close();

            var reader      = new StreamReader(filename);
            var corr_matrix = CorrelationMatrix.ReadCorrelationMatrix(reader);

            Assert.AreEqual(1f, corr_matrix[0, 0]);
            Assert.AreEqual(1f, corr_matrix[1, 1]);
            Assert.AreEqual(1f, corr_matrix[2, 2]);

            Assert.AreEqual(0.1f, corr_matrix[0, 1]);
            Assert.AreEqual(0.1f, corr_matrix[1, 0]);

            Assert.AreEqual(0.2f, corr_matrix[0, 2]);
            Assert.AreEqual(0.2f, corr_matrix[2, 0]);

            Assert.AreEqual(0.3f, corr_matrix[1, 2]);
            Assert.AreEqual(0.3f, corr_matrix[2, 1]);


            // TODO test Exception
            // test with wrong format

            // close streams an delete the text file
            reader.Close();
            //File.Delete(filename);
        }
Example #32
0
 /// <summary>Constructor</summary>
 /// <param name="item_correlation">the similarity measure to use for diversification</param>
 public SequentialDiversification(CorrelationMatrix item_correlation)
 {
     ItemCorrelations = item_correlation;
 }
Example #33
0
 public KNN()
 {
     booleanRatings = null;
     corrMatrix = null;
 }
Example #34
0
        ///
        public override void LoadModel(string filename)
        {
            baseline_predictor.LoadModel(filename + "-global-effects");
            if (ratings != null)
                baseline_predictor.Ratings = ratings;

            using ( StreamReader reader = Model.GetReader(filename, this.GetType()) )
            {
                CorrelationMatrix correlation = CorrelationMatrix.ReadCorrelationMatrix(reader);
                this.correlation = correlation;
            }
        }
Example #35
0
        /// <summary>
        /// Parses the data and ensures the parameters are correct.
        /// </summary>
        /// <param name='p_Context'>
        /// The underlying project.
        /// </param>
        /// <returns>
        /// False if there were no parse errors.
        /// </returns>
        public bool Parse(IProject p_Context)
        {
            this.context = p_Context as Project;
            bool errors = false;

            BoolHelper.AddBool(errors, this._a1.Parse(p_Context));
            BoolHelper.AddBool(errors, this._a2.Parse(p_Context));

            BoolHelper.AddBool(errors, this._s1.Parse(p_Context));
            BoolHelper.AddBool(errors, this._s2.Parse(p_Context));
            BoolHelper.AddBool(errors, this._rho.Parse(p_Context));

            BoolHelper.AddBool(errors, this.driftAdjustment.Parse(p_Context));

            if (this._zr.Expression.IndexOf("@") == -1)
            {
                p_Context.AddError(this._zr.Expression +
                                   " is not a reference to a zero rate curve");
            }

            // Checks for the model constraints: alpha1 != alhpa2
            if (Math.Abs(this._a1.fV() - this._a2.fV()) < 10e-5)
            {
                p_Context.AddError("H&W2:  alpha1 and alpha2 must be different");
            }

            object zr_reference = Engine.Parser.EvaluateAsReference(this._zr.Expression);

            if (!Engine.Parser.GetParserError())
            {
                this.zeroRateCurve = zr_reference as Function;
                if (this.zeroRateCurve == null)
                {
                    errors = true;
                    p_Context.AddError("Cannot find the Zero Rate Curve! " + this._zr.Expression);
                }
            }
            else
            {
                errors = true;
            }

            if (!errors)
            {
                base.alpha1     = this._a1.fV();
                base.sigma1     = this._s1.fV();
                this.sigma1Pow2 = System.Math.Pow(this._s1.fV(), 2);
            }

            CorrelationMatrix R = (p_Context as ProjectProcess).Processes.r;
            int index           = (p_Context as ProjectProcess).Processes.GetProcessCorrelationIndex(this);

            // Index is -1 is when the process is not still in the process list.
            if (index != -1)
            {
                // Updates the correlation in the global correlation matrix.
                R.Set(index, index + 1, this._rho);
            }

            return(errors);
        }
Example #36
0
 public DocumentCategorizer(CorrelationMatrix matrix)
 {
     _similarity = new SimilarityAlgorithm(matrix);
 }
        public void TestWrite()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(3);
            float[] row1 = { 1f, 0.1f, 0.2f };
            float[] row2 = { 0.1f, 1f, 0.3f };
            float[] row3 = { 0.2f, 0.3f, 1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);

            // test
            string filename = "testCorrelationMatrixWriter.txt";
            var writer = new StreamWriter(filename);
            matrix.Write(writer);
            writer.Close();

            var reader1 = new StreamReader(filename);
            Assert.AreEqual("3",       reader1.ReadLine().Trim());
            Assert.AreEqual("0 1 0.1", reader1.ReadLine().Trim());
            Assert.AreEqual("0 2 0.2", reader1.ReadLine().Trim());
            Assert.AreEqual("1 2 0.3", reader1.ReadLine().Trim());

            var reader2 = new StreamReader(filename);
            var corr_matrix = CorrelationMatrix.ReadCorrelationMatrix(reader2);

            Assert.AreEqual(1f,   corr_matrix[0, 0]);
            Assert.AreEqual(0.1f, corr_matrix[0, 1]);
            Assert.AreEqual(0.2f, corr_matrix[0, 2]);
            Assert.AreEqual(0.1f, corr_matrix[1, 0]);
            Assert.AreEqual(1f,   corr_matrix[1, 1]);
            Assert.AreEqual(0.3f, corr_matrix[1, 2]);
            Assert.AreEqual(0.2f, corr_matrix[2, 0]);
            Assert.AreEqual(0.3f, corr_matrix[2, 1]);
            Assert.AreEqual(1f,   corr_matrix[2, 2]);
            // close streams and delete the text file
            reader1.Close();
            reader2.Close();
            //File.Delete(filename);
        }
Example #38
0
        public CorrelationMatrix UpdateCorrelationMatrix(CorrelationMatrix existingMatrix, IEnumerable <string> documents)
        {
            WordBreaker     wordBreaker     = new WordBreaker();
            StopWordRemover stopwordRemover = new StopWordRemover();
            SentenceBreaker sb = SentenceBreaker.Instance;

            int i = 1;

            try
            {
                Parallel.ForEach(documents, (documentContents, loopState) => //string documentContents in documents)
                {
                    int documentNumber = Interlocked.Increment(ref i);
                    using (new MonitoredScope("Learning from a document No. " + documentNumber.ToString()))
                    {
                        SStemmer stemmer = new SStemmer();
                        string[] words;
                        //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium))
                        {
                            words = sb.BreakIntoWords(documentContents);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium))
                        {
                            words = stemmer.StemWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium))
                        {
                            words = stopwordRemover.RemoveStopWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium))
                        {
                            existingMatrix.Add(words);
                        }
                    }

                    Logger.Log("Finished document number: " + documentNumber.ToString());
                    if (existingMatrix.Words.Count > 100000)
                    {
                        loopState.Break();
                    }
                    //Logger.Log("Finished document number: " + (i++).ToString() + " unique words: " + correlationMatrix.Words.Count + ", pairs: " + correlationMatrix.Matrix.Count);
                });
            }
            finally
            {
                Logger.Log("Unique words: " + existingMatrix.WordsMetadata.Count + ", Pairs: " + existingMatrix.Matrix.Count);
                string filename = "autorss_" + Guid.NewGuid().ToString();
                using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
                {
                    new CorrelationMatrixBinarySerializer().Serialize(fs, existingMatrix);
                }

                Logger.Log("Correlation Matrix saved to file: " + filename);

                filename = "autorss_Scopes_" + Guid.NewGuid().ToString();
                using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
                {
                    MonitoredScope.SerializeStatistics(fs);
                }

                Logger.Log("MonitoredScopes saved to file: " + filename);
            }

            return(existingMatrix);
        }
Example #39
0
        public CorrelationMatrix CalculateCorrelationMatrix(IEnumerable <string> documents)
        {
            CorrelationMatrix correlationMatrix = new CorrelationMatrix();

            return(UpdateCorrelationMatrix(correlationMatrix, documents));
        }
 public void Training(RatingData trainingData)
 {
     corrMatrix = new CorrelationMatrix(ratings.MaxItem);
     corrMatrix.Correlation = correlation;
     corrMatrix.Construct(trainingData, Globals.itemSim);
 }
Example #41
0
 /// <summary>Copy constructor. Creates an object of type Cosine from an existing correlation matrix</summary>
 /// <param name ="correlation_matrix">the correlation matrix to copy</param>
 public BinaryCosine(CorrelationMatrix correlation_matrix)
     : base(correlation_matrix.NumberOfRows)
 {
     this.data = correlation_matrix.data;
 }
        public void TestGetPositivelyCorrelatedEntities()
        {
            // create a test CorrelationMatrix
            var matrix = new CorrelationMatrix(4);
            float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f };
            float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f };
            float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f };
            float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f };

            matrix.SetRow(0, row1);
            matrix.SetRow(1, row2);
            matrix.SetRow(2, row3);
            matrix.SetRow(3, row4);

            Assert.AreEqual(0.1f, matrix[0, 0]);
            Assert.AreEqual(0.5f, matrix[3, 2]);

            // test
            IList<int> cor_entities_list = matrix.GetPositivelyCorrelatedEntities(2);
            int[] cor_entities = new int[5];
            cor_entities_list.CopyTo(cor_entities, 0);
            int[] pos_cor_entities = { 1, 3, 0, 0, 0 };
            Assert.AreEqual(pos_cor_entities, cor_entities);
        }
 /// <summary>Constructor</summary>
 /// <param name="item_correlation">the similarity measure to use for diversification</param>
 public SequentialDiversification(CorrelationMatrix item_correlation)
 {
     ItemCorrelations = item_correlation;
 }
Example #44
0
 /// <summary>Copy constructor. Creates an object of type Jaccard from an existing correlation matrix</summary>
 /// <param name ="correlation_matrix">the correlation matrix to copy</param>
 public Jaccard(CorrelationMatrix correlation_matrix)
     : base(correlation_matrix.NumberOfRows)
 {
     this.data = correlation_matrix.data;
 }
Example #45
0
        private static void CalculateCorrelationFromWikipediaDB(ProgramArguments programArgs)
        {
            WordBreaker       wordBreaker       = new WordBreaker();
            StopWordRemover   stopwordRemover   = new StopWordRemover();
            SStemmer          stemmer           = new SStemmer();
            CorrelationMatrix correlationMatrix = new CorrelationMatrix();

            string wikipediaPath = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml";

            using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open)))
            {
                for (int i = 0; i < programArgs.WikipediaStartArticle; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (!elementFound)
                    {
                        break;
                    }
                }

                for (int i = programArgs.WikipediaStartArticle; i < programArgs.WikipediaEndArticle; i++)
                {
                    bool elementFound = sr.ReadToFollowing("text");
                    if (elementFound)
                    {
                        string pageContents;
                        //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium))
                        {
                            sr.ReadStartElement();
                            pageContents = sr.ReadContentAsString();
                        }

                        string[] words;
                        //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium))
                        {
                            words = wordBreaker.BreakParagraph(pageContents);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium))
                        {
                            words = stopwordRemover.RemoveStopWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium))
                        {
                            words = stemmer.StemWords(words);
                        }

                        //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium))
                        {
                            correlationMatrix.Add(words);
                        }

                        Logger.Log("Finished document number: " + (i + 1).ToString());
                    }
                }
            }

            string filename = "autorss_" + Guid.NewGuid().ToString();

            using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
            {
                BinaryFormatter formatter = new BinaryFormatter();
                formatter.Serialize(fs, correlationMatrix);
            }

            Logger.Log("Saved to file: " + filename);

            filename = "autorss_Scopes_" + Guid.NewGuid().ToString();
            using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
            {
                MonitoredScope.SerializeStatistics(fs);
            }

            Logger.Log("Saved to file: " + filename);
        }
Example #46
0
        private void CreateSimilarityMatrix(string typename)
        {
            Type type = Type.GetType("MyMediaLite.Correlation." + typename, true);

            if (type.IsSubclassOf(typeof(CorrelationMatrix)))
                correlation = (CorrelationMatrix) type.GetConstructor(new Type[] { typeof(int) } ).Invoke( new object[] { Entity == EntityType.USER ? MaxUserID + 1 : MaxItemID + 1 });
            else
                throw new Exception(typename + " is not a subclass of CorrelationMatrix");
        }