public void TestGetPositivelyCorrelatedEntities() { // create a test CorrelationMatrix var matrix = new CorrelationMatrix(4); float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f }; float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f }; float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f }; float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f }; matrix.SetRow(0, row1); matrix.SetRow(1, row2); matrix.SetRow(2, row3); matrix.SetRow(3, row4); Assert.AreEqual(0.1f, matrix[0, 0]); Assert.AreEqual(0.5f, matrix[3, 2]); // test IList <int> cor_entities_list = matrix.GetPositivelyCorrelatedEntities(2); int[] cor_entities = new int[5]; cor_entities_list.CopyTo(cor_entities, 0); int[] pos_cor_entities = { 1, 3, 0, 0, 0 }; Assert.AreEqual(pos_cor_entities, cor_entities); }
private static void FindBestThresholds(ProgramArguments programArgs) { IEnumerable <DocumentCluster> originalClusters = GetSimilarNewsTopicFiles(); IEnumerable <Document> documents = Flatten(originalClusters); CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); string fileName = Guid.NewGuid().ToString() + ".csv"; using (StreamWriter sw = new StreamWriter(fileName)) { double errorGoal = 0.01; SortedDictionary <double, SortedDictionary <double, List <double> > > errorValues = new SortedDictionary <double, SortedDictionary <double, List <double> > >(); ErrorOptimizer.Optimize(0, 1, 0, 1, (permissibleValue, variationValue) => { SimilarityAlgorithm similarityAlgorithm = new SimilarityAlgorithm(correlationMatrix, permissibleValue, variationValue); DocumentCategorizer categorizer = new DocumentCategorizer(similarityAlgorithm); IEnumerable <DocumentCluster> resultClusters = categorizer.Cluster(documents); IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters); double average = (from score in errorScores select score.Value).Average(); Console.WriteLine("Average Error: " + average); sw.WriteLine("{0}, {1}, {2}", permissibleValue, variationValue, average); return(Math.Abs(average) <= errorGoal); }); } }
private static void OutputThresholdReport(List <Tuple <Statement, Statement> > pairs, ProgramArguments programArgs) { CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix); StringBuilder sb = new StringBuilder(); foreach (Tuple <Statement, Statement> pair in pairs) { Statement s1 = StemStatement(pair.Item1); Statement s2 = StemStatement(pair.Item2); double s12 = sim.StatementSimilarityToStatement(s1, s2); double s21 = sim.StatementSimilarityToStatement(s2, s1); bool areEqual = sim.StatementEqualsToStatement(s1, s2); sb.AppendFormat( "{0},{1},{2},{2}\r\n", pair.Item1.ToString().Replace(',', '.'), pair.Item2.ToString().Replace(',', '.'), Math.Min(s12, s21), Math.Abs(s12 - s21)); } string reportName = "autoRSS_thresholdReport_" + Guid.NewGuid().ToString() + ".csv"; using (StreamWriter sw = new StreamWriter(reportName)) { sw.WriteLine(sb.ToString()); } Console.WriteLine("Report: " + reportName); }
// TODO think about moving the next two methods to their own class /// <summary>Compute similarity between one item and a collection of items</summary> /// <param name="item_id">the item ID</param> /// <param name="items">a collection of items</param> /// <param name="item_correlation">the similarity measure to use</param> /// <returns>the similarity between the item and the collection</returns> public static double Similarity(int item_id, ICollection<int> items, CorrelationMatrix item_correlation) { double similarity = 0; foreach (int other_item_id in items) similarity += item_correlation[item_id, other_item_id]; return similarity; }
private static void ExperimentPandVThresholds(ProgramArguments programArgs) { IEnumerable <DocumentCluster> originalClusters = GetSimilarNewsTopicFiles(); IEnumerable <Document> documents = Flatten(originalClusters); CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); double startP = 0, endP = 1, startV = 0, endV = 1; double step = 0.01; double[,] errorValues = new double[(int)((endP - startP) / step) + 1, (int)((endV - startV) / step) + 1]; for (double i = 0, iP = startP; iP < endP; iP += step, i++) { for (double j = 0, iV = startV; iV < endV; iV += step, j++) { SimilarityAlgorithm similarityAlgorithm = new SimilarityAlgorithm( correlationMatrix, iP, iV); DocumentCategorizer categorizer = new DocumentCategorizer(similarityAlgorithm); IEnumerable <DocumentCluster> resultClusters = categorizer.Cluster(documents); IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(resultClusters, originalClusters); double average = (from score in errorScores select score.Value).Average(); Console.WriteLine("Average Error: " + average); errorValues[(int)i, (int)j] = Math.Abs(average); } } string fileName = Guid.NewGuid().ToString() + ".csv"; using (StreamWriter sw = new StreamWriter(fileName)) { sw.Write("0, "); for (double j = 0, iV = startV; iV < endV; iV += step, j++) { sw.Write("{0}, ", iV); } sw.WriteLine(); for (double i = 0, iP = startP; iP < endP; iP += step, i++) { sw.Write("{0}, ", iP); for (double j = 0, iV = startV; iV < endV; iV += step, j++) { sw.Write("{0}, ", errorValues[(int)i, (int)j]); } sw.WriteLine(); } } Logger.Log("Saved experiment to file: " + fileName); }
/// public override void LearnAttributeToFactorMapping() { BinaryCosine cosine_correlation = new BinaryCosine(MaxItemID + 1); Console.Error.WriteLine("training with max_item_id={0}", MaxItemID); cosine_correlation.ComputeCorrelations(item_attributes); this.item_correlation = cosine_correlation; _MapToLatentFactorSpace = Utils.Memoize<int, float[]>(__MapToLatentFactorSpace); }
/// public override void LearnAttributeToFactorMapping() { BinaryCosine cosine_correlation = new BinaryCosine(MaxItemID + 1); Console.Error.WriteLine("training with max_item_id={0}", MaxItemID); cosine_correlation.ComputeCorrelations(item_attributes); this.item_correlation = cosine_correlation; _MapToLatentFactorSpace = Utils.Memoize <int, double[]>(__MapToLatentFactorSpace); }
public double GetCorrelation(string label1, string label2, double t = 0) { if (CorrelationMatrix == null) { throw new Exception("No correlation matrix attached to model"); } return(CorrelationMatrix.GetCorrelation(label1, label2, t)); }
/// <summary>Compute the intra-set similarity of an item collection</summary> /// <param name="items">a collection of items</param> /// <param name="item_correlation">the similarity measure to use</param> /// <returns>the intra-set similarity of the collection</returns> public static double Similarity(ICollection<int> items, CorrelationMatrix item_correlation) { double similarity = 0; for (int i = 0; i < items.Count; i++) for (int j = i + 1; j < items.Count; j++) similarity += item_correlation[i, j]; return similarity; }
/// public override void LoadModel(string filename) { using ( StreamReader reader = Recommender.GetReader(filename, this.GetType()) ) { CorrelationMatrix correlation = CorrelationMatrix.ReadCorrelationMatrix(reader); base.Train(); // train baseline model this.correlation = new BinaryCosine(correlation); } }
// TODO think about moving the next two methods to their own class /// <summary>Compute similarity between one item and a collection of items</summary> /// <param name="item_id">the item ID</param> /// <param name="items">a collection of items</param> /// <param name="item_correlation">the similarity measure to use</param> /// <returns>the similarity between the item and the collection</returns> public static double Similarity(int item_id, ICollection <int> items, CorrelationMatrix item_correlation) { double similarity = 0; foreach (int other_item_id in items) { similarity += item_correlation[item_id, other_item_id]; } return(similarity); }
/// public override void LoadModel(string filename) { using (StreamReader reader = Recommender.GetReader(filename, this.GetType())) { CorrelationMatrix correlation = CorrelationMatrix.ReadCorrelationMatrix(reader); base.Train(); // train baseline model this.correlation = new BinaryCosine(correlation); } }
public TO_AssetFxModel ToTransportObject() => new TO_AssetFxModel { AssetCurves = _assetCurves?.ToDictionary(x => x.Key, x => x.Value.GetTransportObject()), AssetVols = _assetVols?.ToDictionary(x => x.Key.GetTransportObject(), x => x.Value.GetTransportObject()), BuildDate = BuildDate, CorrelationMatrix = CorrelationMatrix?.GetTransportObject(), Fixings = _fixings?.ToDictionary(x => x.Key, x => x.Value.GetTransportObject()), FundingModel = _fundingModel.GetTransportObject(), Portfolio = _portfolio?.ToTransportObject(), };
public void CorrelMatrixFacts() { var z = new CorrelationMatrix(); z = new CorrelationMatrix(new[] { "x" }, new[] { "y" }, new[] { new [] { 0.9999 } }); Assert.Throws <Exception>(() => z.GetCorrelation("x", "z")); Assert.False(z.TryGetCorrelation("x", "z", out var c)); var zz = z.Clone(); var bumped = zz.Bump(0.5); Assert.True(bumped.GetCorrelation("x", "y") < 1.0); }
private static void SetSigma(PersonStatistics personStats, CorrelationMatrix correlationMatrix) { var allBooleans = BooleanStatistic.GetAll(personStats); allBooleans = ExcludeNonvariantStatistcs(allBooleans); var sigma = new MultivariateBinaryGenerator(); sigma.BuildCoverianceMatrix(allBooleans, correlationMatrix, RIntegration.GetSigma); personStats.BinaryGenerator = sigma; }
private static void CreateThresholdTrainingData(ProgramArguments programArgs) { CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix); while (true) { ScanTrainData(sim); Console.WriteLine("Press Enter to rescan"); Console.ReadLine(); } }
public static object CreateCorrelationMatrix( [ExcelArgument(Description = "Object name")] string ObjectName, [ExcelArgument(Description = "Labels X")] object[] LabelsX, [ExcelArgument(Description = "Labels Y")] object[] LabelsY, [ExcelArgument(Description = "Correlations")] double[,] Correlations) { return(ExcelHelper.Execute(_logger, () => { var matrix = new CorrelationMatrix(LabelsX.ObjectRangeToVector <string>(), LabelsY.ObjectRangeToVector <string>(), Correlations.SquareToJagged()); return ExcelHelper.PushToCache <ICorrelationMatrix>(matrix, ObjectName); })); }
private static void CreateSimilarityReport(ProgramArguments programArgs) { CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix); string wikipediaPath = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml"; using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open))) { // Skip first 100 for (int i = 0; i < 100; i++) { bool elementFound = sr.ReadToFollowing("text"); if (!elementFound) { break; } } string filename = "autorss_test_" + Guid.NewGuid().ToString() + ".csv"; using (FileStream fs = new FileStream(filename, FileMode.CreateNew)) { StreamWriter sw = new StreamWriter(fs); Document prevDocument = null; for (int i = 0; i < 100; i++) { bool elementFound = sr.ReadToFollowing("text"); if (elementFound) { string pageContents; //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium)) { sr.ReadStartElement(); pageContents = sr.ReadContentAsString(); } Document document = ConstructDocument(pageContents); //Console.WriteLine("Ratio: " + sim.CalculateOddsRatio(document, document) + "\r\nDocument Contents: " + pageContents); if (null == prevDocument) { prevDocument = document; } sw.WriteLine(sim.CalculateOddsRatio(document, prevDocument)); prevDocument = document; } } sw.Flush(); } } }
private void CreateSimilarityMatrix(string typename) { Type type = Type.GetType("MyMediaLite.Correlation." + typename, true); if (type.IsSubclassOf(typeof(CorrelationMatrix))) { correlation = (CorrelationMatrix)type.GetConstructor(new Type[] { typeof(int) }).Invoke(new object[] { Entity == EntityType.USER ? MaxUserID + 1 : MaxItemID + 1 }); } else { throw new Exception(typename + " is not a subclass of CorrelationMatrix"); } }
private static IEnumerable <DocumentClusterErrorScore> CategorizeLabeledNewsArticles(ProgramArguments programArgs) { IEnumerable <DocumentCluster> originalClusters = GetSimilarNewsTopicFiles(); IEnumerable <Document> documents = Flatten(originalClusters); CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); DocumentCategorizer categorizer = new DocumentCategorizer(correlationMatrix); IEnumerable <DocumentCluster> resultClusters = categorizer.Cluster(documents); OutputClusters(resultClusters); IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters); return(errorScores); }
/// <summary>Compute the intra-set similarity of an item collection</summary> /// <param name="items">a collection of items</param> /// <param name="item_correlation">the similarity measure to use</param> /// <returns>the intra-set similarity of the collection</returns> public static double Similarity(ICollection <int> items, CorrelationMatrix item_correlation) { double similarity = 0; for (int i = 0; i < items.Count; i++) { for (int j = i + 1; j < items.Count; j++) { similarity += item_correlation[i, j]; } } return(similarity); }
public CorrelationMatrix GetMatrix(Corpus corpus) { var matrix = new CorrelationMatrix(corpus.UniqueLemmas.Count); _cycleProvider.Run(corpus.UniqueLemmas.Count, (i, j) => { var cij = GetCorrelation(corpus, i, j); var cji = GetCorrelation(corpus, j, i); var max = Math.Max(cij, cji); var min = Math.Min(cij, cji); matrix[i, j] = max == 1 ? min : max; }); return(matrix); }
public double GetCompositeVolForStrikeAndDate(string assetId, DateTime expiry, double strike, Currency ccy) { var curve = GetPriceCurve(assetId); var fxId = $"{curve.Currency.Ccy}/{ccy.Ccy}"; var fxPair = FundingModel.FxMatrix.GetFxPair(fxId); var fxSpotDate = fxPair.SpotDate(expiry); var fxFwd = FundingModel.GetFxRate(fxSpotDate, fxId); var fxVol = FundingModel.GetVolSurface(fxId).GetVolForDeltaStrike(0.5, expiry, fxFwd); var tExpC = BuildDate.CalculateYearFraction(expiry, DayCountBasis.Act365F); var correl = CorrelationMatrix.GetCorrelation(fxId, assetId, tExpC); var sigma = GetVolForStrikeAndDate(assetId, expiry, strike / fxFwd); sigma = System.Math.Sqrt(sigma * sigma + fxVol * fxVol + 2 * correl * fxVol * sigma); return(sigma); }
public void TestAddEntity() { // create a test CorrelationMatrix var matrix = new CorrelationMatrix(4); float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f }; float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f }; float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f }; float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f }; matrix.SetRow(0, row1); matrix.SetRow(1, row2); matrix.SetRow(2, row3); matrix.SetRow(3, row4); // test matrix.AddEntity(4); Assert.AreEqual(5, matrix.dim1); }
public void TestWrite() { // create a test CorrelationMatrix var matrix = new CorrelationMatrix(3); float[] row1 = { 1f, 0.1f, 0.2f }; float[] row2 = { 0.1f, 1f, 0.3f }; float[] row3 = { 0.2f, 0.3f, 1f }; matrix.SetRow(0, row1); matrix.SetRow(1, row2); matrix.SetRow(2, row3); // test string filename = "testCorrelationMatrixWriter.txt"; var writer = new StreamWriter(filename); matrix.Write(writer); writer.Close(); var reader1 = new StreamReader(filename); Assert.AreEqual("3", reader1.ReadLine().Trim()); Assert.AreEqual("0 1 0.1", reader1.ReadLine().Trim()); Assert.AreEqual("0 2 0.2", reader1.ReadLine().Trim()); Assert.AreEqual("1 2 0.3", reader1.ReadLine().Trim()); var reader2 = new StreamReader(filename); var corr_matrix = CorrelationMatrix.ReadCorrelationMatrix(reader2); Assert.AreEqual(1f, corr_matrix[0, 0]); Assert.AreEqual(0.1f, corr_matrix[0, 1]); Assert.AreEqual(0.2f, corr_matrix[0, 2]); Assert.AreEqual(0.1f, corr_matrix[1, 0]); Assert.AreEqual(1f, corr_matrix[1, 1]); Assert.AreEqual(0.3f, corr_matrix[1, 2]); Assert.AreEqual(0.2f, corr_matrix[2, 0]); Assert.AreEqual(0.3f, corr_matrix[2, 1]); Assert.AreEqual(1f, corr_matrix[2, 2]); // close streams and delete the text file reader1.Close(); reader2.Close(); //File.Delete(filename); }
public void TestGetNearestNeighbors() { // create a test CorrelationMatrix var matrix = new CorrelationMatrix(4); float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f }; float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f }; float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f }; float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f }; matrix.SetRow(0, row1); matrix.SetRow(1, row2); matrix.SetRow(2, row3); matrix.SetRow(3, row4); // test int[] nn_test = matrix.GetNearestNeighbors(2, 2); int[] nn_sol = { 1, 3 }; Assert.AreEqual(nn_sol, nn_test); }
public void TestSumUp() { // create a test CorrelationMatrix var matrix = new CorrelationMatrix(4); float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f }; float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f }; float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f }; float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f }; matrix.SetRow(0, row1); matrix.SetRow(1, row2); matrix.SetRow(2, row3); matrix.SetRow(3, row4); // test matrix.AddEntity(4); Assert.AreEqual(5, matrix.dim1); }
/// public override void LoadModel(string filename) { using ( StreamReader reader = Model.GetReader(filename, this.GetType()) ) { int num_users = int.Parse(reader.ReadLine()); var nearest_neighbors = new int[num_users][]; for (int u = 0; u < nearest_neighbors.Length; u++) { string[] numbers = reader.ReadLine().Split(' '); nearest_neighbors[u] = new int[numbers.Length]; for (int i = 0; i < numbers.Length; i++) nearest_neighbors[u][i] = int.Parse(numbers[i]); } this.correlation = CorrelationMatrix.ReadCorrelationMatrix(reader); this.k = (uint) nearest_neighbors[0].Length; this.nearest_neighbors = nearest_neighbors; } }
/// public override void LoadModel(string filename) { using (StreamReader reader = Recommender.GetReader(filename, this.GetType())) { int num_users = int.Parse(reader.ReadLine()); var nearest_neighbors = new int[num_users][]; for (int u = 0; u < nearest_neighbors.Length; u++) { string[] numbers = reader.ReadLine().Split(' '); nearest_neighbors[u] = new int[numbers.Length]; for (int i = 0; i < numbers.Length; i++) { nearest_neighbors[u][i] = int.Parse(numbers[i]); } } this.correlation = CorrelationMatrix.ReadCorrelationMatrix(reader); this.k = (uint)nearest_neighbors[0].Length; this.nearest_neighbors = nearest_neighbors; } }
public void TestReadCorrelationMatrix() { // create test object const string filename = "correlation_matrix.txt"; var writer = new StreamWriter(filename); writer.WriteLine(3); writer.WriteLine("0 1 0.1"); writer.WriteLine("0 2 0.2"); writer.WriteLine("1 2 0.3"); writer.Close(); var reader = new StreamReader(filename); var corr_matrix = CorrelationMatrix.ReadCorrelationMatrix(reader); Assert.AreEqual(1f, corr_matrix[0, 0]); Assert.AreEqual(1f, corr_matrix[1, 1]); Assert.AreEqual(1f, corr_matrix[2, 2]); Assert.AreEqual(0.1f, corr_matrix[0, 1]); Assert.AreEqual(0.1f, corr_matrix[1, 0]); Assert.AreEqual(0.2f, corr_matrix[0, 2]); Assert.AreEqual(0.2f, corr_matrix[2, 0]); Assert.AreEqual(0.3f, corr_matrix[1, 2]); Assert.AreEqual(0.3f, corr_matrix[2, 1]); // TODO test Exception // test with wrong format // close streams an delete the text file reader.Close(); //File.Delete(filename); }
/// <summary>Constructor</summary> /// <param name="item_correlation">the similarity measure to use for diversification</param> public SequentialDiversification(CorrelationMatrix item_correlation) { ItemCorrelations = item_correlation; }
public KNN() { booleanRatings = null; corrMatrix = null; }
/// public override void LoadModel(string filename) { baseline_predictor.LoadModel(filename + "-global-effects"); if (ratings != null) baseline_predictor.Ratings = ratings; using ( StreamReader reader = Model.GetReader(filename, this.GetType()) ) { CorrelationMatrix correlation = CorrelationMatrix.ReadCorrelationMatrix(reader); this.correlation = correlation; } }
/// <summary> /// Parses the data and ensures the parameters are correct. /// </summary> /// <param name='p_Context'> /// The underlying project. /// </param> /// <returns> /// False if there were no parse errors. /// </returns> public bool Parse(IProject p_Context) { this.context = p_Context as Project; bool errors = false; BoolHelper.AddBool(errors, this._a1.Parse(p_Context)); BoolHelper.AddBool(errors, this._a2.Parse(p_Context)); BoolHelper.AddBool(errors, this._s1.Parse(p_Context)); BoolHelper.AddBool(errors, this._s2.Parse(p_Context)); BoolHelper.AddBool(errors, this._rho.Parse(p_Context)); BoolHelper.AddBool(errors, this.driftAdjustment.Parse(p_Context)); if (this._zr.Expression.IndexOf("@") == -1) { p_Context.AddError(this._zr.Expression + " is not a reference to a zero rate curve"); } // Checks for the model constraints: alpha1 != alhpa2 if (Math.Abs(this._a1.fV() - this._a2.fV()) < 10e-5) { p_Context.AddError("H&W2: alpha1 and alpha2 must be different"); } object zr_reference = Engine.Parser.EvaluateAsReference(this._zr.Expression); if (!Engine.Parser.GetParserError()) { this.zeroRateCurve = zr_reference as Function; if (this.zeroRateCurve == null) { errors = true; p_Context.AddError("Cannot find the Zero Rate Curve! " + this._zr.Expression); } } else { errors = true; } if (!errors) { base.alpha1 = this._a1.fV(); base.sigma1 = this._s1.fV(); this.sigma1Pow2 = System.Math.Pow(this._s1.fV(), 2); } CorrelationMatrix R = (p_Context as ProjectProcess).Processes.r; int index = (p_Context as ProjectProcess).Processes.GetProcessCorrelationIndex(this); // Index is -1 is when the process is not still in the process list. if (index != -1) { // Updates the correlation in the global correlation matrix. R.Set(index, index + 1, this._rho); } return(errors); }
public DocumentCategorizer(CorrelationMatrix matrix) { _similarity = new SimilarityAlgorithm(matrix); }
public CorrelationMatrix UpdateCorrelationMatrix(CorrelationMatrix existingMatrix, IEnumerable <string> documents) { WordBreaker wordBreaker = new WordBreaker(); StopWordRemover stopwordRemover = new StopWordRemover(); SentenceBreaker sb = SentenceBreaker.Instance; int i = 1; try { Parallel.ForEach(documents, (documentContents, loopState) => //string documentContents in documents) { int documentNumber = Interlocked.Increment(ref i); using (new MonitoredScope("Learning from a document No. " + documentNumber.ToString())) { SStemmer stemmer = new SStemmer(); string[] words; //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium)) { words = sb.BreakIntoWords(documentContents); } //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium)) { words = stemmer.StemWords(words); } //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium)) { words = stopwordRemover.RemoveStopWords(words); } //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium)) { existingMatrix.Add(words); } } Logger.Log("Finished document number: " + documentNumber.ToString()); if (existingMatrix.Words.Count > 100000) { loopState.Break(); } //Logger.Log("Finished document number: " + (i++).ToString() + " unique words: " + correlationMatrix.Words.Count + ", pairs: " + correlationMatrix.Matrix.Count); }); } finally { Logger.Log("Unique words: " + existingMatrix.WordsMetadata.Count + ", Pairs: " + existingMatrix.Matrix.Count); string filename = "autorss_" + Guid.NewGuid().ToString(); using (FileStream fs = new FileStream(filename, FileMode.CreateNew)) { new CorrelationMatrixBinarySerializer().Serialize(fs, existingMatrix); } Logger.Log("Correlation Matrix saved to file: " + filename); filename = "autorss_Scopes_" + Guid.NewGuid().ToString(); using (FileStream fs = new FileStream(filename, FileMode.CreateNew)) { MonitoredScope.SerializeStatistics(fs); } Logger.Log("MonitoredScopes saved to file: " + filename); } return(existingMatrix); }
public CorrelationMatrix CalculateCorrelationMatrix(IEnumerable <string> documents) { CorrelationMatrix correlationMatrix = new CorrelationMatrix(); return(UpdateCorrelationMatrix(correlationMatrix, documents)); }
public void Training(RatingData trainingData) { corrMatrix = new CorrelationMatrix(ratings.MaxItem); corrMatrix.Correlation = correlation; corrMatrix.Construct(trainingData, Globals.itemSim); }
/// <summary>Copy constructor. Creates an object of type Cosine from an existing correlation matrix</summary> /// <param name ="correlation_matrix">the correlation matrix to copy</param> public BinaryCosine(CorrelationMatrix correlation_matrix) : base(correlation_matrix.NumberOfRows) { this.data = correlation_matrix.data; }
public void TestGetPositivelyCorrelatedEntities() { // create a test CorrelationMatrix var matrix = new CorrelationMatrix(4); float[] row1 = { 0.1f, 0.4f, 0.2f, 0.3f }; float[] row2 = { 0.3f, 0.1f, 0.6f, 0.7f }; float[] row3 = { 0.2f, 0.6f, 0.3f, 0.5f }; float[] row4 = { 0.4f, 0.2f, 0.5f, 0.1f }; matrix.SetRow(0, row1); matrix.SetRow(1, row2); matrix.SetRow(2, row3); matrix.SetRow(3, row4); Assert.AreEqual(0.1f, matrix[0, 0]); Assert.AreEqual(0.5f, matrix[3, 2]); // test IList<int> cor_entities_list = matrix.GetPositivelyCorrelatedEntities(2); int[] cor_entities = new int[5]; cor_entities_list.CopyTo(cor_entities, 0); int[] pos_cor_entities = { 1, 3, 0, 0, 0 }; Assert.AreEqual(pos_cor_entities, cor_entities); }
/// <summary>Copy constructor. Creates an object of type Jaccard from an existing correlation matrix</summary> /// <param name ="correlation_matrix">the correlation matrix to copy</param> public Jaccard(CorrelationMatrix correlation_matrix) : base(correlation_matrix.NumberOfRows) { this.data = correlation_matrix.data; }
private static void CalculateCorrelationFromWikipediaDB(ProgramArguments programArgs) { WordBreaker wordBreaker = new WordBreaker(); StopWordRemover stopwordRemover = new StopWordRemover(); SStemmer stemmer = new SStemmer(); CorrelationMatrix correlationMatrix = new CorrelationMatrix(); string wikipediaPath = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml"; using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open))) { for (int i = 0; i < programArgs.WikipediaStartArticle; i++) { bool elementFound = sr.ReadToFollowing("text"); if (!elementFound) { break; } } for (int i = programArgs.WikipediaStartArticle; i < programArgs.WikipediaEndArticle; i++) { bool elementFound = sr.ReadToFollowing("text"); if (elementFound) { string pageContents; //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium)) { sr.ReadStartElement(); pageContents = sr.ReadContentAsString(); } string[] words; //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium)) { words = wordBreaker.BreakParagraph(pageContents); } //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium)) { words = stopwordRemover.RemoveStopWords(words); } //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium)) { words = stemmer.StemWords(words); } //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium)) { correlationMatrix.Add(words); } Logger.Log("Finished document number: " + (i + 1).ToString()); } } } string filename = "autorss_" + Guid.NewGuid().ToString(); using (FileStream fs = new FileStream(filename, FileMode.CreateNew)) { BinaryFormatter formatter = new BinaryFormatter(); formatter.Serialize(fs, correlationMatrix); } Logger.Log("Saved to file: " + filename); filename = "autorss_Scopes_" + Guid.NewGuid().ToString(); using (FileStream fs = new FileStream(filename, FileMode.CreateNew)) { MonitoredScope.SerializeStatistics(fs); } Logger.Log("Saved to file: " + filename); }
private void CreateSimilarityMatrix(string typename) { Type type = Type.GetType("MyMediaLite.Correlation." + typename, true); if (type.IsSubclassOf(typeof(CorrelationMatrix))) correlation = (CorrelationMatrix) type.GetConstructor(new Type[] { typeof(int) } ).Invoke( new object[] { Entity == EntityType.USER ? MaxUserID + 1 : MaxItemID + 1 }); else throw new Exception(typename + " is not a subclass of CorrelationMatrix"); }