public void EmptyTargetArtifactsTest()
        {
            TLArtifactsCollection sourceArtifacts = new TLArtifactsCollection();
            sourceArtifacts.Add(new TLArtifact("id", "text"));
            TLArtifactsCollection targetArtifacts = new TLArtifactsCollection();
            TLDictionaryIndex dictionary = new TLDictionaryIndex();
            dictionary.AddTermEntry("term", 1, 1, 1);

            Workspace.Store("sourceArtifacts", sourceArtifacts);
            Workspace.Store("targetArtifacts", targetArtifacts);
            Workspace.Store("dictionaryIndex", dictionary);

            ((TracerConfig)TestComponent.Configuration).SimilarityMetric = SimilarityMetricMethod.SimpleMatching;

            TestComponent.Compute();

            TLSimilarityMatrix simMat = (TLSimilarityMatrix)Workspace.Load("similarityMatrix");
            if (simMat == null || simMat.Count != 0)
            {
                Assert.Fail("Similarity Matrix should still be created but have nothing in it");
            }
        }
        public void TestTracingOfComponent()
        {
            TLArtifactsCollection sourceArtifacts = new TLArtifactsCollection();
            TLArtifactsCollection targetArtifacts = new TLArtifactsCollection();
            TLDictionaryIndex dictionary = new TLDictionaryIndex();

            // TODO: add inputs that matter
            sourceArtifacts.Add(new TLArtifact("id1", "first text"));
            sourceArtifacts.Add(new TLArtifact("id2", "words to do stuff with"));
            sourceArtifacts.Add(new TLArtifact("id3", "some more text"));

            targetArtifacts.Add(new TLArtifact("id1", "hello world"));
            targetArtifacts.Add(new TLArtifact("id2", "very very random yes indeed"));
            targetArtifacts.Add(new TLArtifact("id3", "yep"));
            targetArtifacts.Add(new TLArtifact("id4", "chickens in the coop"));

            dictionary.AddTermEntry("term", 3, 3, 0.2);

            Workspace.Store("sourceArtifacts", sourceArtifacts);
            Workspace.Store("targetArtifacts", targetArtifacts);
            Workspace.Store("dictionaryIndex", dictionary);

            ((TracerConfig)TestComponent.Configuration).SimilarityMetric = SimilarityMetricMethod.SimpleMatching;

            TestComponent.Compute();

            TLSimilarityMatrix simMat = (TLSimilarityMatrix)Workspace.Load("similarityMatrix");
            // TODO: add tests to make sure the output is correctly formatted
            Assert.Fail();
        }
        public void IncorrectInputSourceArtifactsType()
        {
            TLArtifactsCollection targetArtifacts = new TLArtifactsCollection();
            targetArtifacts.Add(new TLArtifact("id1", "text2"));
            TLDictionaryIndex dictionary = new TLDictionaryIndex();
            dictionary.AddTermEntry("term", 1,1,1);

            Workspace.Store("sourceArtifacts", "incorrect type");
            Workspace.Store("targetArtifacts", targetArtifacts);
            Workspace.Store("dictionaryIndex", dictionary);

            ((TracerConfig)TestComponent.Configuration).SimilarityMetric = SimilarityMetricMethod.SimpleMatching;

            TestComponent.Compute();
        }
		/**
		 * input is a map of artifact.Id and processed text of each artifact
		 * 
		 */
		public static TLDictionaryIndex build(TLArtifactsCollection setOfProcessedDocuments, ComponentLogger logger)
		{
			// Variables
			TLTermEntry termEntry;
			TLPosting posting;
		   
			double vectorLength;
			// Stores the vector lenght of each document - this is used to normalize the term weights
			// The vector length is the square root of the sum of the squares of all the term weights.
			Dictionary<string, double> documentVectorLength = new Dictionary<string, double>();	
		
			// Creates the dictionary
			TLDictionaryIndex dict = new TLDictionaryIndex(); 
		
			// Iterates over all the documents
			foreach (TLArtifact artifact in setOfProcessedDocuments.Values)  
			{

				string[] terms = artifact.Text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
			
				if (terms.Length == 0)
				{
					logger.Warn(String.Format("Artifact Id {0} is empty.", artifact.Id));
				}

				// Iterates over all the terms
				foreach (string t in terms) {

					// Checks if that term has already a posting
					if(!dict.ContainsTermEntry(t)) {
						// New term
						termEntry = dict.AddTermEntry(t, 1, 1, 1.0);
						posting = termEntry.AddPosting(artifact.Id, 1, 1.0);
					} else {
						// Existing term
						termEntry = dict.GetTermEntry(t);
						termEntry.TotalFrequencyAcrossArtifacts += 1;
						termEntry.Weight = 1.0;
					
						// Checks if there is already a posting for this document
						if (!termEntry.ContainsPosting(artifact.Id)) {
							// New posting
							termEntry.NumberOfArtifactsContainingTerm += 1;
							posting = termEntry.AddPosting(artifact.Id, 1, 1.0);
						
						} else {
							// Existing posting
							posting = termEntry.GetPosting(artifact.Id);
							posting.Frequency += 1;
							posting.Weight += 1.0;
						}
					}
				}
			}

			string artId;
			// Now that all the counts are in, it calculates the document vector weights
			foreach (TLTermEntry t in dict.TermEntries) {
				foreach (TLPosting p in t.Postings) {
					artId = p.ArtifactId;
					vectorLength = Math.Pow(p.Frequency, 2);
					if (documentVectorLength.ContainsKey(artId)) {
						// The document has other terms
						vectorLength += documentVectorLength[artId];
					}
					documentVectorLength[artId] = vectorLength;
				}
			}
			
			// Finally, we need to get the square root of all entries in the document vector length
			foreach (TLArtifact artifact in setOfProcessedDocuments.Values)  
			{
				if (documentVectorLength.ContainsKey(artifact.Id))
				{
					vectorLength = Math.Sqrt(documentVectorLength[artifact.Id]);
					// Here we update the document vector length of the dictionary - not the internal structure any more
					dict.SetDocumentVectorWeight(artifact.Id, vectorLength);
				}		
			}		
		
			return dict;
		}