コード例 #1
0
ファイル: Corpus.cs プロジェクト: mgrcar/Detextive
        public void LoadFromGigaFidaFile(string fileName)
        {
            Utils.ThrowException(fileName == null ? new ArgumentNullException("fileName") : null);
            Utils.ThrowException(!Utils.VerifyFileNameOpen(fileName) ? new ArgumentValueException("fileName") : null);
            XmlTextReader xmlReader = null;

            try
            {
                bool hasHeader = false;
                mTaggedWords.Clear();
                mTeiHeader = null;
                xmlReader  = new XmlTextReader(new FileStream(fileName, FileMode.Open));
                while (xmlReader.Read())
                {
                    if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "teiHeader") // header
                    {
                        hasHeader = true;
                        Utils.XmlSkip(xmlReader, "teiHeader");
                    }
                    else if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "p") // paragraph
                    {
                        ThreadHandler.AbortCheckpoint();                                         // TODO: do this at various appropriate places
                        xmlReader.Read();
                        Corpus aux = new Corpus();
                        aux.LoadFromTextSsjTokenizer(xmlReader.Value);
                        if (aux.TaggedWords.Count > 0)
                        {
                            foreach (TaggedWord word in aux.TaggedWords)
                            {
                                word.MoreInfo.RemoveEndOfParagraphFlag();
                                mTaggedWords.Add(word);
                            }
                            aux.TaggedWords.Last.MoreInfo.SetEndOfParagraphFlag();
                        }
                    }
                }
                xmlReader.Close();
                if (hasHeader)
                {
                    ReadTeiHeader(fileName);
                }
            }
            catch
            {
                try { xmlReader.Close(); } catch { }
                throw;
            }
        }