/// <summary> /// Reads the data. (allows faster custom serialization for better performance in TraceLab) /// </summary> /// <param name="reader">The reader.</param> public void ReadData(System.IO.BinaryReader reader) { int dataversion = reader.ReadInt32(); if (dataversion != version) { throw new InvalidOperationException("Binary reader did not correct version data. Data corrupted. Potentially IRawSerializable not implemented correctly"); } else { int termsCount = reader.ReadInt32(); //create new collections m_terms = new Dictionary <string, TLTermEntry>(); m_termEntries = new TermEntryCollection(); for (int i = 0; i < termsCount; ++i) { TLTermEntry termEntry = new TLTermEntry(); termEntry.ReadData(reader); AddTermEntry(termEntry); } int vectorLenghtsOfArtifactsCount = reader.ReadInt32(); m_vectorLengthsOfArtifacts = new Dictionary <string, double>(); for (int i = 0; i < vectorLenghtsOfArtifactsCount; ++i) { string artifactId = reader.ReadString(); double weight = reader.ReadDouble(); m_vectorLengthsOfArtifacts.Add(artifactId, weight); } } }
/// <summary> /// Generates an object from its XML representation. /// </summary> /// <param name="reader">The XmlReader stream from which the object is deserialized. </param> public void ReadXml(System.Xml.XmlReader reader) { bool wasEmpty = reader.IsEmptyElement; if (wasEmpty) { return; } version = int.Parse(reader.GetAttribute("Version")); reader.Read(); reader.Read(); while (reader.NodeType != System.Xml.XmlNodeType.EndElement) { TLTermEntry termEntry = new TLTermEntry(); termEntry.ReadXml(reader); AddTermEntry(termEntry); } reader.Read(); reader.Read(); while (reader.NodeType != System.Xml.XmlNodeType.EndElement) { string artifactId = reader.GetAttribute("artifactId"); double vectorLength = double.Parse(reader.GetAttribute("vectorLength")); SetDocumentVectorWeight(artifactId, vectorLength); reader.Read(); } }
/// <summary> /// Allows to add a new term entry to the dictionary index. /// </summary> /// <param name="termEntry">term entry to be added</param> private void AddTermEntry(TLTermEntry termEntry) { if (Terms.ContainsKey(termEntry.Term)) { throw new ArgumentException("The dictionary already contains that term"); } Terms.Add(termEntry.Term, termEntry); m_termEntries.Add(termEntry); }
/// <summary> /// Allows to add the new term entry to the dictionary index. /// </summary> /// <param name="term">term word</param> /// <param name="numberOfArtifactsContainingTerm">number of all artifacts of each text contain given term</param> /// <param name="totalFrequencyAcrossAllArtifacts">total frequency across all artifacts</param> /// <param name="weight">weight for the given term</param> /// <returns>just created term entry</returns> public TLTermEntry AddTermEntry(string term, int numberOfArtifactsContainingTerm, int totalFrequencyAcrossAllArtifacts, double weight) { // Integrity checks if (Terms.ContainsKey(term)) { throw new ArgumentException("The dictionary already contains that term"); } TLTermEntry termEntry = new TLTermEntry(term, numberOfArtifactsContainingTerm, totalFrequencyAcrossAllArtifacts, weight); Terms.Add(term, termEntry); m_termEntries.Add(termEntry); return(termEntry); }
/// <summary> /// Reads the data. (allows faster custom serialization for better performance in TraceLab) /// </summary> /// <param name="reader">The reader.</param> public void ReadData(System.IO.BinaryReader reader) { int dataversion = reader.ReadInt32(); if (dataversion != version) { throw new InvalidOperationException("Binary reader did not correct version data. Data corrupted. Potentially IRawSerializable not implemented correctly"); } else { int termsCount = reader.ReadInt32(); //create new collections m_terms = new Dictionary<string, TLTermEntry>(); m_termEntries = new TermEntryCollection(); for (int i = 0; i < termsCount; ++i) { TLTermEntry termEntry = new TLTermEntry(); termEntry.ReadData(reader); AddTermEntry(termEntry); } int vectorLenghtsOfArtifactsCount = reader.ReadInt32(); m_vectorLengthsOfArtifacts = new Dictionary<string, double>(); for (int i = 0; i < vectorLenghtsOfArtifactsCount; ++i) { string artifactId = reader.ReadString(); double weight = reader.ReadDouble(); m_vectorLengthsOfArtifacts.Add(artifactId, weight); } } }
/// <summary> /// Generates an object from its XML representation. /// </summary> /// <param name="reader">The XmlReader stream from which the object is deserialized. </param> public void ReadXml(System.Xml.XmlReader reader) { bool wasEmpty = reader.IsEmptyElement; if (wasEmpty) return; version = int.Parse(reader.GetAttribute("Version")); reader.Read(); reader.Read(); while (reader.NodeType != System.Xml.XmlNodeType.EndElement) { TLTermEntry termEntry = new TLTermEntry(); termEntry.ReadXml(reader); AddTermEntry(termEntry); } reader.Read(); reader.Read(); while (reader.NodeType != System.Xml.XmlNodeType.EndElement) { string artifactId = reader.GetAttribute("artifactId"); double vectorLength = double.Parse(reader.GetAttribute("vectorLength")); SetDocumentVectorWeight(artifactId, vectorLength); reader.Read(); } }
/// <summary> /// Allows to add a new term entry to the dictionary index. /// </summary> /// <param name="termEntry">term entry to be added</param> private void AddTermEntry(TLTermEntry termEntry) { if (Terms.ContainsKey(termEntry.Term)) throw new ArgumentException("The dictionary already contains that term"); Terms.Add(termEntry.Term, termEntry); m_termEntries.Add(termEntry); }
/// <summary> /// Allows to add the new term entry to the dictionary index. /// </summary> /// <param name="term">term word</param> /// <param name="numberOfArtifactsContainingTerm">number of all artifacts of each text contain given term</param> /// <param name="totalFrequencyAcrossAllArtifacts">total frequency across all artifacts</param> /// <param name="weight">weight for the given term</param> /// <returns>just created term entry</returns> public TLTermEntry AddTermEntry(string term, int numberOfArtifactsContainingTerm, int totalFrequencyAcrossAllArtifacts, double weight) { // Integrity checks if (Terms.ContainsKey(term)) throw new ArgumentException("The dictionary already contains that term"); TLTermEntry termEntry = new TLTermEntry(term, numberOfArtifactsContainingTerm, totalFrequencyAcrossAllArtifacts, weight); Terms.Add(term, termEntry); m_termEntries.Add(termEntry); return termEntry; }
/// <summary> /// Compares two specified TLTermEntry objects and returns an integer that indicates their relative position in the sort order. /// </summary> /// <param name="x">The first TLTermEntry to compare. </param> /// <param name="y">The second TLTermEntry to compare. </param> /// <returns>A 32-bit signed integer that indicates the lexical relationship between the two comparands.</returns> public int Compare(TLTermEntry x, TLTermEntry y) { return ((new CaseInsensitiveComparer()).Compare(x.Term, y.Term)); }
/// <summary> /// Compares two specified TLTermEntry objects and returns an integer that indicates their relative position in the sort order. /// </summary> /// <param name="x">The first TLTermEntry to compare. </param> /// <param name="y">The second TLTermEntry to compare. </param> /// <returns>A 32-bit signed integer that indicates the lexical relationship between the two comparands.</returns> public int Compare(TLTermEntry x, TLTermEntry y) { return((new CaseInsensitiveComparer()).Compare(x.Term, y.Term)); }