Ejemplo n.º 1
0
        /// <summary>
        /// Reads the data. (allows faster custom serialization for better performance in TraceLab)
        /// </summary>
        /// <param name="reader">The reader.</param>
        public void ReadData(System.IO.BinaryReader reader)
        {
            int dataversion = reader.ReadInt32();

            if (dataversion != version)
            {
                throw new InvalidOperationException("Binary reader did not correct version data. Data corrupted. Potentially IRawSerializable not implemented correctly");
            }
            else
            {
                int termsCount = reader.ReadInt32();

                //create new collections
                m_terms       = new Dictionary <string, TLTermEntry>();
                m_termEntries = new TermEntryCollection();

                for (int i = 0; i < termsCount; ++i)
                {
                    TLTermEntry termEntry = new TLTermEntry();
                    termEntry.ReadData(reader);
                    AddTermEntry(termEntry);
                }

                int vectorLenghtsOfArtifactsCount = reader.ReadInt32();

                m_vectorLengthsOfArtifacts = new Dictionary <string, double>();
                for (int i = 0; i < vectorLenghtsOfArtifactsCount; ++i)
                {
                    string artifactId = reader.ReadString();
                    double weight     = reader.ReadDouble();
                    m_vectorLengthsOfArtifacts.Add(artifactId, weight);
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Generates an object from its XML representation.
        /// </summary>
        /// <param name="reader">The XmlReader stream from which the object is deserialized. </param>
        public void ReadXml(System.Xml.XmlReader reader)
        {
            bool wasEmpty = reader.IsEmptyElement;

            if (wasEmpty)
            {
                return;
            }

            version = int.Parse(reader.GetAttribute("Version"));

            reader.Read();
            reader.Read();
            while (reader.NodeType != System.Xml.XmlNodeType.EndElement)
            {
                TLTermEntry termEntry = new TLTermEntry();
                termEntry.ReadXml(reader);
                AddTermEntry(termEntry);
            }

            reader.Read();
            reader.Read();
            while (reader.NodeType != System.Xml.XmlNodeType.EndElement)
            {
                string artifactId   = reader.GetAttribute("artifactId");
                double vectorLength = double.Parse(reader.GetAttribute("vectorLength"));
                SetDocumentVectorWeight(artifactId, vectorLength);
                reader.Read();
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Allows to add a new term entry to the dictionary index.
        /// </summary>
        /// <param name="termEntry">term entry to be added</param>
        private void AddTermEntry(TLTermEntry termEntry)
        {
            if (Terms.ContainsKey(termEntry.Term))
            {
                throw new ArgumentException("The dictionary already contains that term");
            }

            Terms.Add(termEntry.Term, termEntry);
            m_termEntries.Add(termEntry);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Allows to add the new term entry to the dictionary index.
        /// </summary>
        /// <param name="term">term word</param>
        /// <param name="numberOfArtifactsContainingTerm">number of all artifacts of each text contain given term</param>
        /// <param name="totalFrequencyAcrossAllArtifacts">total frequency across all artifacts</param>
        /// <param name="weight">weight for the given term</param>
        /// <returns>just created term entry</returns>
        public TLTermEntry AddTermEntry(string term, int numberOfArtifactsContainingTerm, int totalFrequencyAcrossAllArtifacts, double weight)
        {
            // Integrity checks
            if (Terms.ContainsKey(term))
            {
                throw new ArgumentException("The dictionary already contains that term");
            }

            TLTermEntry termEntry = new TLTermEntry(term, numberOfArtifactsContainingTerm, totalFrequencyAcrossAllArtifacts, weight);

            Terms.Add(term, termEntry);
            m_termEntries.Add(termEntry);

            return(termEntry);
        }
        /// <summary>
        /// Reads the data. (allows faster custom serialization for better performance in TraceLab)
        /// </summary>
        /// <param name="reader">The reader.</param>
        public void ReadData(System.IO.BinaryReader reader)
        {
            int dataversion = reader.ReadInt32();
            if (dataversion != version)
            {
                throw new InvalidOperationException("Binary reader did not correct version data. Data corrupted. Potentially IRawSerializable not implemented correctly");
            }
            else
            {
                int termsCount = reader.ReadInt32();

                //create new collections
                m_terms = new Dictionary<string, TLTermEntry>();
                m_termEntries = new TermEntryCollection();

                for (int i = 0; i < termsCount; ++i)
                {
                    TLTermEntry termEntry = new TLTermEntry();
                    termEntry.ReadData(reader);
                    AddTermEntry(termEntry);
                }

                int vectorLenghtsOfArtifactsCount = reader.ReadInt32();

                m_vectorLengthsOfArtifacts = new Dictionary<string, double>();
                for (int i = 0; i < vectorLenghtsOfArtifactsCount; ++i)
                {
                    string artifactId = reader.ReadString();
                    double weight = reader.ReadDouble();
                    m_vectorLengthsOfArtifacts.Add(artifactId, weight);
                }
            }
        }
        /// <summary>
        /// Generates an object from its XML representation.
        /// </summary>
        /// <param name="reader">The XmlReader stream from which the object is deserialized. </param>
        public void ReadXml(System.Xml.XmlReader reader)
        {
            bool wasEmpty = reader.IsEmptyElement;

            if (wasEmpty)
                return;

            version = int.Parse(reader.GetAttribute("Version"));

            reader.Read();
            reader.Read();
            while (reader.NodeType != System.Xml.XmlNodeType.EndElement)
            {
                TLTermEntry termEntry = new TLTermEntry();
                termEntry.ReadXml(reader);
                AddTermEntry(termEntry);
            }

            reader.Read();
            reader.Read();
            while (reader.NodeType != System.Xml.XmlNodeType.EndElement)
            {
                string artifactId = reader.GetAttribute("artifactId");
                double vectorLength = double.Parse(reader.GetAttribute("vectorLength"));
                SetDocumentVectorWeight(artifactId, vectorLength);
                reader.Read();
            }
        }
        /// <summary>
        /// Allows to add a new term entry to the dictionary index.
        /// </summary>
        /// <param name="termEntry">term entry to be added</param>
        private void AddTermEntry(TLTermEntry termEntry)
        {
            if (Terms.ContainsKey(termEntry.Term))
                throw new ArgumentException("The dictionary already contains that term");

            Terms.Add(termEntry.Term, termEntry);
            m_termEntries.Add(termEntry);
        }
        /// <summary>
        /// Allows to add the new term entry to the dictionary index.
        /// </summary>
        /// <param name="term">term word</param>
        /// <param name="numberOfArtifactsContainingTerm">number of all artifacts of each text contain given term</param>
        /// <param name="totalFrequencyAcrossAllArtifacts">total frequency across all artifacts</param>
        /// <param name="weight">weight for the given term</param>
        /// <returns>just created term entry</returns>
        public TLTermEntry AddTermEntry(string term, int numberOfArtifactsContainingTerm, int totalFrequencyAcrossAllArtifacts, double weight)
        {
            // Integrity checks
            if (Terms.ContainsKey(term))
                throw new ArgumentException("The dictionary already contains that term");

            TLTermEntry termEntry = new TLTermEntry(term, numberOfArtifactsContainingTerm, totalFrequencyAcrossAllArtifacts, weight);

            Terms.Add(term, termEntry);
            m_termEntries.Add(termEntry);

            return termEntry;
        }
Ejemplo n.º 9
0
 /// <summary>
 /// Compares two specified TLTermEntry objects and returns an integer that indicates their relative position in the sort order.
 /// </summary>
 /// <param name="x">The first TLTermEntry to compare. </param>
 /// <param name="y">The second TLTermEntry to compare. </param>
 /// <returns>A 32-bit signed integer that indicates the lexical relationship between the two comparands.</returns>
 public int Compare(TLTermEntry x, TLTermEntry y)
 {
     return ((new CaseInsensitiveComparer()).Compare(x.Term, y.Term));
 }
Ejemplo n.º 10
0
 /// <summary>
 /// Compares two specified TLTermEntry objects and returns an integer that indicates their relative position in the sort order.
 /// </summary>
 /// <param name="x">The first TLTermEntry to compare. </param>
 /// <param name="y">The second TLTermEntry to compare. </param>
 /// <returns>A 32-bit signed integer that indicates the lexical relationship between the two comparands.</returns>
 public int Compare(TLTermEntry x, TLTermEntry y)
 {
     return((new CaseInsensitiveComparer()).Compare(x.Term, y.Term));
 }