Exemplo n.º 1
0
        public static TLArtifactsCollection Import(String idPath, String docPath)
        {
            TLArtifactsCollection artifacts = new TLArtifactsCollection();

            StreamReader idFile  = new StreamReader(idPath);
            StreamReader docFile = new StreamReader(docPath);

            String origid;
            String doc;

            while ((origid = idFile.ReadLine()) != null)
            {
                // read doc
                doc = docFile.ReadLine().Trim();

                // set vars
                String id  = origid.Trim();
                int    num = 0;

                while (artifacts.ContainsKey(id))
                {
                    num++;
                    id = origid.Trim() + "_" + num.ToString();
                }

                artifacts.Add(new TLArtifact(id, doc));
            }

            idFile.Close();
            return(artifacts);
        }
Exemplo n.º 2
0
        public static TLArtifactsCollection Import(String idPath, String docPath)
        {
            TLArtifactsCollection artifacts = new TLArtifactsCollection();

            StreamReader idFile = new StreamReader(idPath);
            StreamReader docFile = new StreamReader(docPath);

            String origid;
            String doc;

            while ((origid = idFile.ReadLine()) != null)
            {
                // read doc
                doc = docFile.ReadLine().Trim();

                // set vars
                String id = origid.Trim();
                int num = 0;

                while (artifacts.ContainsKey(id))
                {
                    num++;
                    id = origid.Trim() + "_" + num.ToString();
                }

                artifacts.Add(new TLArtifact(id, doc));
            }

            idFile.Close();
            return artifacts;
        }
        /// <summary>
        /// Reads artifacts from given XML file
        /// </summary>
        public static TLArtifactsCollection ReadXMLFile(string filepath, bool trimValues)
        {
            TLArtifactsCollection artifacts = new TLArtifactsCollection();

            XPathDocument doc = new XPathDocument(filepath);
            XPathNavigator nav = doc.CreateNavigator();
            
            string art_id, art_text, art_content;

            XPathNodeIterator nodeItor = nav.Select("/artifacts/artifact");

            while (nodeItor.MoveNext())
            {
                // Only reading xml tags: art_id, art_title, art_content
                art_id = ReadSingleItem(filepath, nodeItor.Current, "art_id");
                art_text = ReadSingleItem(filepath, nodeItor.Current, "art_title");
                art_content = ReadSingleItem(filepath, nodeItor.Current, "art_content");

                if (trimValues)
                {
                    art_id = art_id.Trim();
                    art_text = art_text.Trim();
                    art_content = art_content.Trim();
                }

                art_text = art_text + " " + art_content;
                
                // Checking if ID is already in Artifacts List
                if (!artifacts.ContainsKey(art_id))
                {
                    TLArtifact artifact = new TLArtifact(art_id, art_text);
                    artifacts.Add(art_id, artifact);
                }
                else
                {
                    PoirotFormatArtifactsReader.Logger.Warn(
                        String.Format("Repeated artifact ID '{0}' found in file '{1}'.", art_id, filepath)
                        );
                }
            }

            return artifacts;
        }
Exemplo n.º 4
0
        /// <summary>
        /// Reads artifacts from given XML file
        /// </summary>
        public static TLArtifactsCollection ReadXMLFile(string filepath, bool trimValues)
        {
            TLArtifactsCollection artifacts = new TLArtifactsCollection();

            XPathDocument  doc = new XPathDocument(filepath);
            XPathNavigator nav = doc.CreateNavigator();

            string art_id, art_text, art_content;

            XPathNodeIterator nodeItor = nav.Select("/artifacts/artifact");

            while (nodeItor.MoveNext())
            {
                // Only reading xml tags: art_id, art_title, art_content
                art_id      = ReadSingleItem(filepath, nodeItor.Current, "art_id");
                art_text    = ReadSingleItem(filepath, nodeItor.Current, "art_title");
                art_content = ReadSingleItem(filepath, nodeItor.Current, "art_content");

                if (trimValues)
                {
                    art_id      = art_id.Trim();
                    art_text    = art_text.Trim();
                    art_content = art_content.Trim();
                }

                art_text = art_text + " " + art_content;

                // Checking if ID is already in Artifacts List
                if (!artifacts.ContainsKey(art_id))
                {
                    TLArtifact artifact = new TLArtifact(art_id, art_text);
                    artifacts.Add(art_id, artifact);
                }
                else
                {
                    PoirotFormatArtifactsReader.Logger.Warn(
                        String.Format("Repeated artifact ID '{0}' found in file '{1}'.", art_id, filepath)
                        );
                }
            }

            return(artifacts);
        }
Exemplo n.º 5
0
 public static TLArtifactsCollection Import(string directory)
 {
     TLArtifactsCollection artifacts = new TLArtifactsCollection();
     char[] split = new char[] { '\\' };
     foreach (string filename in Directory.EnumerateFiles(directory))
     {
         string id = Regex.Replace(filename.Split(split, StringSplitOptions.RemoveEmptyEntries).Last(), "[^0-9]", "");
         StreamReader file = new StreamReader(filename);
         string text = file.ReadToEnd();
         if (artifacts.ContainsKey(id))
         {
             artifacts[id].Text += " " + text;
         }
         else
         {
             artifacts.Add(new TLArtifact(id, text));
         }
         file.Close();
     }
     return artifacts;
 }
Exemplo n.º 6
0
        public static TLArtifactsCollection Import(string directory)
        {
            TLArtifactsCollection artifacts = new TLArtifactsCollection();

            char[] split = new char[] { '\\' };
            foreach (string filename in Directory.EnumerateFiles(directory))
            {
                string       id   = Regex.Replace(filename.Split(split, StringSplitOptions.RemoveEmptyEntries).Last(), "[^0-9]", "");
                StreamReader file = new StreamReader(filename);
                string       text = file.ReadToEnd();
                if (artifacts.ContainsKey(id))
                {
                    artifacts[id].Text += " " + text;
                }
                else
                {
                    artifacts.Add(new TLArtifact(id, text));
                }
                file.Close();
            }
            return(artifacts);
        }
Exemplo n.º 7
0
 /// <summary>
 /// Imports artifacts from an XML file in standard CoEST format.
 /// </summary>
 /// <param name="filepath">Input file path</param>
 /// <param name="trimValues">Trim whitespace from entries?</param>
 /// <returns>Artifacts collection</returns>
 public static TLArtifactsCollection ImportXMLFile(string filepath, bool trimValues)
 {
     TLArtifactsCollection artifacts = new TLArtifactsCollection();
     XPathDocument doc = new XPathDocument(filepath);
     XPathNavigator nav = doc.CreateNavigator();
     //read collection info
     artifacts.CollectionId = ReadSingleXMLNode(filepath, nav, "/artifacts_collection/collection_info/id");
     artifacts.CollectionName = ReadSingleXMLNode(filepath, nav, "/artifacts_collection/collection_info/name");
     artifacts.CollectionVersion = ReadSingleXMLNode(filepath, nav, "/artifacts_collection/collection_info/version");
     artifacts.CollectionDescription = ReadSingleXMLNode(filepath, nav, "/artifacts_collection/collection_info/description");
     if (trimValues)
     {
         artifacts.CollectionId = artifacts.CollectionId.Trim();
         artifacts.CollectionName = artifacts.CollectionName.Trim();
         artifacts.CollectionVersion = artifacts.CollectionVersion.Trim();
         artifacts.CollectionDescription = artifacts.CollectionDescription.Trim();
     }
     //check what type of content location the file has
     XPathNavigator iter = nav.SelectSingleNode("/artifacts_collection/collection_info/content_location");
     string content_location_type = "internal"; //default content location is internal
     //if content location has been sprecified read it
     if (iter != null)
     {
         content_location_type = iter.Value;
     }
     //root dir is going to be needed to external content type, to determine absolute paths of the files
     string rootDir = System.IO.Path.GetDirectoryName(filepath);
     XPathNodeIterator artifactsIterator = nav.Select("/artifacts_collection/artifacts/artifact");
     string artifactId;
     string content;
     while (artifactsIterator.MoveNext())
     {
         iter = artifactsIterator.Current.SelectSingleNode("id");
         artifactId = iter.InnerXml;
         iter = artifactsIterator.Current.SelectSingleNode("content");
         if (content_location_type.Equals("external"))
         {
             content = System.IO.File.ReadAllText(System.IO.Path.Combine(rootDir, iter.InnerXml.Trim()));
         }
         else
         {
             content = iter.InnerXml;
         }
         if (trimValues)
         {
             artifactId = artifactId.Trim();
             content = content.Trim();
         }
         // Checking if ID is already in Artifacts List
         if (!artifacts.ContainsKey(artifactId))
         {
             TLArtifact artifact = new TLArtifact(artifactId, content);
             artifacts.Add(artifactId, artifact);
         }
         else
         {
             /*
              CoestDatasetImporterHelper.Logger.Warn(
                 String.Format("Repeated artifact ID '{0}' found in file '{1}'.", artifactId, filepath)
              );
             */
         }
     }
     return artifacts;
 }
        /// <summary>
        /// Imports the answer set.
        /// </summary>
        /// <param name="filepath">The filepath.</param>
        /// <param name="sourceArtifacts">The source artifacts.</param>
        /// <param name="sourceArtifactsFilePath">The source artifacts file path.</param>
        /// <param name="targetArtifacts">The target artifacts.</param>
        /// <param name="targetArtifactsFilePath">The target artifacts file path.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="trimValues">if set to <c>true</c> [trim values].</param>
        /// <returns></returns>
        public static TLSimilarityMatrix ImportAnswerSet(string filepath, TLArtifactsCollection sourceArtifacts, string sourceArtifactsFilePath, TLArtifactsCollection targetArtifacts, string targetArtifactsFilePath, ComponentLogger logger, bool trimValues)
        {
            string friendlyAnswerSetFilename = System.IO.Path.GetFileName(filepath);
            string friendlySourceArtifactsFilename = System.IO.Path.GetFileName(sourceArtifactsFilePath); 
            string friendlyTargetArtifactsFilename = System.IO.Path.GetFileName(targetArtifactsFilePath);

            TLSimilarityMatrix answerSet = new TLSimilarityMatrix();

            XPathDocument doc = new XPathDocument(filepath);
            XPathNavigator nav = doc.CreateNavigator();

            //read collection info
            XPathNavigator iter = nav.SelectSingleNode("/answer_set/answer_info/source_artifacts_collection");
            string source_artifacts_collection_id = iter.Value;
            if (source_artifacts_collection_id.Equals(sourceArtifacts.CollectionId) == false)
            {
                throw new ArgumentException(String.Format("The answer set refers to source artifact collection with id '{0}', while loaded artifacts collection has different id '{1}'. Importing answer set from {2}", 
                                                    source_artifacts_collection_id, sourceArtifacts.CollectionId, filepath));
            }

            iter = nav.SelectSingleNode("/answer_set/answer_info/target_artifacts_collection");
            string target_artifacts_collection_id = iter.Value;
            if (target_artifacts_collection_id.Equals(targetArtifacts.CollectionId) == false)
            {
                throw new ArgumentException(String.Format("The answer set refers to target artifact collection with id '{0}', while loaded artifacts collection has different id '{1}'. Importing answer set from {2}", 
                                                    target_artifacts_collection_id, targetArtifacts.CollectionId, filepath));
            }

            XPathNodeIterator linksIterator = nav.Select("/answer_set/links/link");

            string source_artifact_id;
            string target_artifact_id;
            double confidence_score;
            while (linksIterator.MoveNext())
            {
                // Parse Source Artifact Id
                iter = linksIterator.Current.SelectSingleNode("source_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The source_artifact_id has not been provided for the link. File location: {0}", filepath));
                }
                
                source_artifact_id = iter.Value;
                if (trimValues)
                {
                    source_artifact_id = source_artifact_id.Trim();
                }

                if (sourceArtifacts.ContainsKey(source_artifact_id) == false)
                {
                    logger.Warn(String.Format("The source artifact id '{0}' referenced in the answer set {1} has not been found in the source artifacts {2}. Therefore, this link has been removed in this experiment.", source_artifact_id, friendlyAnswerSetFilename, friendlySourceArtifactsFilename));
                }

                // Parse Target Artifact Id
                iter = linksIterator.Current.SelectSingleNode("target_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The target_artifact_id has not been provided for the link. File location: {0}", filepath));
                }

                target_artifact_id = iter.Value;
                if (trimValues)
                {
                    target_artifact_id = target_artifact_id.Trim();
                }

                if (targetArtifacts.ContainsKey(target_artifact_id) == false)
                {
                    logger.Warn(String.Format("The target artifact id '{0}' referenced in the answer set {1} has not been found in the target artifacts {2}. Therefore, this link has been removed in this experiment.", target_artifact_id, friendlyAnswerSetFilename, friendlyTargetArtifactsFilename));
                }

                //Parse confidence score
                iter = linksIterator.Current.SelectSingleNode("confidence_score");
                if (iter == null)
                {
                    //if confidence score is not provided set it to default value 1
                    confidence_score = 1.0;
                }
                else
                {
                    string tmpValue = iter.Value;
                    if (trimValues) tmpValue = tmpValue.Trim();

                    if (double.TryParse(tmpValue, out confidence_score) == false)
                    {
                        throw new XmlException(String.Format("The confidence score provided for link from source artifact {0} to target artifact is in incorrect format {1}. File location: {2}", source_artifact_id, target_artifact_id, filepath));
                    }
                }

                answerSet.AddLink(source_artifact_id, target_artifact_id, confidence_score);
            }

            return answerSet;
        }
Exemplo n.º 9
0
        public static TLArtifactsCollection ImportArtifacts(string filepath, bool trimValues)
        {
            TLArtifactsCollection artifacts = new TLArtifactsCollection();

            XPathDocument  doc = new XPathDocument(filepath);
            XPathNavigator nav = doc.CreateNavigator();

            //read collection info
            artifacts.CollectionId          = ReadSingleNode(filepath, nav, "/artifacts_collection/collection_info/id");
            artifacts.CollectionName        = ReadSingleNode(filepath, nav, "/artifacts_collection/collection_info/name");
            artifacts.CollectionVersion     = ReadSingleNode(filepath, nav, "/artifacts_collection/collection_info/version");
            artifacts.CollectionDescription = ReadSingleNode(filepath, nav, "/artifacts_collection/collection_info/description");

            if (trimValues)
            {
                artifacts.CollectionId          = artifacts.CollectionId.Trim();
                artifacts.CollectionName        = artifacts.CollectionName.Trim();
                artifacts.CollectionVersion     = artifacts.CollectionVersion.Trim();
                artifacts.CollectionDescription = artifacts.CollectionDescription.Trim();
            }

            //check what type of content location the file has
            XPathNavigator iter = nav.SelectSingleNode("/artifacts_collection/collection_info/content_location");
            string         content_location_type = "internal"; //default content location is internal

            //if content location has been sprecified read it
            if (iter != null)
            {
                content_location_type = iter.Value;
            }

            //root dir is going to be needed to external content type, to determine absolute paths of the files
            string rootDir = System.IO.Path.GetDirectoryName(filepath);

            XPathNodeIterator artifactsIterator = nav.Select("/artifacts_collection/artifacts/artifact");

            string artifactId;
            string content;

            while (artifactsIterator.MoveNext())
            {
                iter       = artifactsIterator.Current.SelectSingleNode("id");
                artifactId = iter.InnerXml;

                iter = artifactsIterator.Current.SelectSingleNode("content");

                if (content_location_type.Equals("external"))
                {
                    content = System.IO.File.ReadAllText(System.IO.Path.Combine(rootDir, iter.InnerXml.Trim()));
                }
                else
                {
                    content = iter.InnerXml;
                }

                if (trimValues)
                {
                    artifactId = artifactId.Trim();
                    content    = content.Trim();
                }

                // Checking if ID is already in Artifacts List
                if (!artifacts.ContainsKey(artifactId))
                {
                    TLArtifact artifact = new TLArtifact(artifactId, content);
                    artifacts.Add(artifactId, artifact);
                }
                else
                {
                    CoestDatasetImporterHelper.Logger.Warn(
                        String.Format("Repeated artifact ID '{0}' found in file '{1}'.", artifactId, filepath)
                        );
                }

                //artifacts.Add(artifactId, (new TLArtifact(artifactId, content)));
            }

            return(artifacts);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Imports the answer set.
        /// </summary>
        /// <param name="filepath">The filepath.</param>
        /// <param name="sourceArtifacts">The source artifacts.</param>
        /// <param name="sourceArtifactsFilePath">The source artifacts file path.</param>
        /// <param name="targetArtifacts">The target artifacts.</param>
        /// <param name="targetArtifactsFilePath">The target artifacts file path.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="trimValues">if set to <c>true</c> [trim values].</param>
        /// <returns></returns>
        public static TLSimilarityMatrix ImportAnswerSet(string filepath, TLArtifactsCollection sourceArtifacts, string sourceArtifactsFilePath, TLArtifactsCollection targetArtifacts, string targetArtifactsFilePath, ComponentLogger logger, bool trimValues)
        {
            string friendlyAnswerSetFilename       = System.IO.Path.GetFileName(filepath);
            string friendlySourceArtifactsFilename = System.IO.Path.GetFileName(sourceArtifactsFilePath);
            string friendlyTargetArtifactsFilename = System.IO.Path.GetFileName(targetArtifactsFilePath);

            TLSimilarityMatrix answerSet = new TLSimilarityMatrix();

            XPathDocument  doc = new XPathDocument(filepath);
            XPathNavigator nav = doc.CreateNavigator();

            //read collection info
            XPathNavigator iter = nav.SelectSingleNode("/answer_set/answer_info/source_artifacts_collection");
            string         source_artifacts_collection_id = iter.Value;

            if (source_artifacts_collection_id.Equals(sourceArtifacts.CollectionId) == false)
            {
                throw new ArgumentException(String.Format("The answer set refers to source artifact collection with id '{0}', while loaded artifacts collection has different id '{1}'. Importing answer set from {2}",
                                                          source_artifacts_collection_id, sourceArtifacts.CollectionId, filepath));
            }

            iter = nav.SelectSingleNode("/answer_set/answer_info/target_artifacts_collection");
            string target_artifacts_collection_id = iter.Value;

            if (target_artifacts_collection_id.Equals(targetArtifacts.CollectionId) == false)
            {
                throw new ArgumentException(String.Format("The answer set refers to target artifact collection with id '{0}', while loaded artifacts collection has different id '{1}'. Importing answer set from {2}",
                                                          target_artifacts_collection_id, targetArtifacts.CollectionId, filepath));
            }

            XPathNodeIterator linksIterator = nav.Select("/answer_set/links/link");

            string source_artifact_id;
            string target_artifact_id;
            double confidence_score;

            while (linksIterator.MoveNext())
            {
                // Parse Source Artifact Id
                iter = linksIterator.Current.SelectSingleNode("source_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The source_artifact_id has not been provided for the link. File location: {0}", filepath));
                }

                source_artifact_id = iter.Value;
                if (trimValues)
                {
                    source_artifact_id = source_artifact_id.Trim();
                }

                if (sourceArtifacts.ContainsKey(source_artifact_id) == false)
                {
                    logger.Warn(String.Format("The source artifact id '{0}' referenced in the answer set {1} has not been found in the source artifacts {2}. Therefore, this link has been removed in this experiment.", source_artifact_id, friendlyAnswerSetFilename, friendlySourceArtifactsFilename));
                }

                // Parse Target Artifact Id
                iter = linksIterator.Current.SelectSingleNode("target_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The target_artifact_id has not been provided for the link. File location: {0}", filepath));
                }

                target_artifact_id = iter.Value;
                if (trimValues)
                {
                    target_artifact_id = target_artifact_id.Trim();
                }

                if (targetArtifacts.ContainsKey(target_artifact_id) == false)
                {
                    logger.Warn(String.Format("The target artifact id '{0}' referenced in the answer set {1} has not been found in the target artifacts {2}. Therefore, this link has been removed in this experiment.", target_artifact_id, friendlyAnswerSetFilename, friendlyTargetArtifactsFilename));
                }

                //Parse confidence score
                iter = linksIterator.Current.SelectSingleNode("confidence_score");
                if (iter == null)
                {
                    //if confidence score is not provided set it to default value 1
                    confidence_score = 1.0;
                }
                else
                {
                    string tmpValue = iter.Value;
                    if (trimValues)
                    {
                        tmpValue = tmpValue.Trim();
                    }

                    if (double.TryParse(tmpValue, out confidence_score) == false)
                    {
                        throw new XmlException(String.Format("The confidence score provided for link from source artifact {0} to target artifact is in incorrect format {1}. File location: {2}", source_artifact_id, target_artifact_id, filepath));
                    }
                }

                answerSet.AddLink(source_artifact_id, target_artifact_id, confidence_score);
            }

            return(answerSet);
        }