private FDO.IText MakeText(string contents)
        {
            var text   = Cache.ServiceLocator.GetInstance <ITextFactory>().Create();
            var stText = Cache.ServiceLocator.GetInstance <IStTextFactory>().Create();

            text.ContentsOA = stText;
            var para = Cache.ServiceLocator.GetInstance <IStTxtParaFactory>().Create();

            stText.ParagraphsOS.Add(para);
            para.Contents = Cache.TsStrFactory.MakeString(contents, Cache.DefaultVernWs);

            using (var pp = new ParagraphParser(Cache))
            {
                pp.Parse(para);
            }

            ISegment seg = para.SegmentsOS.First();

            for (int i = 0; i < seg.AnalysesRS.Count; i++)
            {
                IAnalysis analysis = seg.AnalysesRS[i];
                var       wordform = analysis as IWfiWordform;
                if (wordform != null)
                {
                    seg.AnalysesRS[i] = wordform.AnalysesOC.First().MeaningsOC.First();
                }
            }
            return(text);
        }
Example #2
0
 private void ReparseParagraph(IStTxtPara para)
 {
     using (var parser = new ParagraphParser(para))
     {
         parser.Parse(para);
     }
 }
Example #3
0
        public void OrcIsLabel()
        {
            ITsStrFactory   tsf = TsStrFactoryClass.Create();
            ParagraphParser pp  = new ParagraphParser(m_para);
            // String with embedded ORC.
            string    test1 = "This is a simple sentence";
            string    test2 = "\xfffc";
            string    test3 = " with a footnote.";
            ITsString tss   = tsf.MakeString(test1 + test2 + test3, 1);
            // To be recognized an ORC must have unique properties.
            ITsStrBldr bldr = tss.GetBldr();

            bldr.SetStrPropValue(test1.Length, test1.Length + test2.Length, (int)FwTextPropType.ktptObjData, "nonsence");
            tss = bldr.GetString();
            m_para.Contents.UnderlyingTsString = tss;
            List <int> results;
            List <int> segments = pp.CollectSegmentAnnotations(tss, out results);

            VerifyBreaks(new int[] { test1.Length, test1.Length + test2.Length + 1, test1.Length + test2.Length + test3.Length - 1 },
                         results, "multi-sentence string with ORC");
            Assert.AreEqual(3, segments.Count);
            // The segments break around the ORC.
            VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "first seg of multi-sentence w. ORC");
            VerifySegment(segments[1], test1.Length, test1.Length + test2.Length + 1, m_para.Hvo, "second seg of multi-sentence w. ORC");
            VerifySegment(segments[2], test1.Length + test2.Length + 1,
                          test1.Length + test2.Length + test3.Length, m_para.Hvo, "third seg of multi-sentence w. ORC");
        }
Example #4
0
        public void LeadingPunctuation()
        {
            ITsStrFactory   tsf   = TsStrFactoryClass.Create();
            ParagraphParser pp    = new ParagraphParser(m_para);
            string          test1 = "?This is a question with special punctuation?";
            ITsString       tss   = tsf.MakeString(test1, 1);

            m_para.Contents.UnderlyingTsString = tss;
            List <int> results;
            List <int> segments = pp.CollectSegmentAnnotations(tss, out results);

            VerifyBreaks(new int[] { test1.Length - 1 }, results, "leading QM");
            Assert.AreEqual(1, segments.Count);
            VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "leading QM");

            // Now try leading punctuation following a verse number.
            ITsStrBldr bldr  = tss.GetBldr();
            string     verse = "5 ";

            bldr.Replace(0, 0, verse, null);
            bldr.SetStrPropValue(0, 1, (int)FwTextPropType.ktptNamedStyle,
                                 ScrStyleNames.VerseNumber);
            ITsString tssMultiV = bldr.GetString();

            m_para.Contents.UnderlyingTsString = tssMultiV;
            segments = pp.CollectSegmentAnnotations(tssMultiV, out results);
            VerifyBreaks(new int[] { verse.Length, tssMultiV.Length - 1 },
                         results, "leading verse and QM");
            Assert.AreEqual(2, segments.Count);
            VerifySegment(segments[0], 0, verse.Length, m_para.Hvo, "first seg of leading verse and QM");
            VerifySegment(segments[1], verse.Length, tssMultiV.Length, m_para.Hvo, "second seg of leading verse and QM");
        }
        public void TwoSegsPerVerse()
        {
            string pc1    = "Das Buch ist rot. ";
            string pc2    = "Das Maedchen ist schoen.";
            string verse1 = "9";
            string pc3    = "Der Herr ist gross.";
            string pc4    = "Ich spreche nicht viel Deutsch.";
            string verse2 = "10";
            string pc5    = "Was ist das?";
            string pc6    = "Wie gehts?";

            ITsStrBldr bldr = m_tsf.MakeString(pc1 + pc2 + verse1 + pc3 + pc4 + verse2 + pc5 + pc6, m_wsVern).GetBldr();

            bldr.SetStrPropValue(pc1.Length + pc2.Length, pc1.Length + pc2.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle,
                                 ScrStyleNames.VerseNumber);
            int ichEndV1 = pc1.Length + pc2.Length + verse1.Length + pc3.Length + pc4.Length;

            bldr.SetStrPropValue(ichEndV1, ichEndV1 + verse2.Length, (int)FwTextPropType.ktptNamedStyle,
                                 ScrStyleNames.VerseNumber);
            m_para.Contents = bldr.GetString();
            using (ParagraphParser pp = new ParagraphParser(m_para))
            {
                List <int> eosIndexes;
                var        segments = pp.CollectSegments(m_para.Contents, out eosIndexes);
                Assert.AreEqual(8, segments.Count);
                Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 0));
                Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 1));
                Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 3));
                Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 4));
                Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 6));
                Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 7));
            }
        }
Example #6
0
        private void Setup2ndText()
        {
            var textFactory   = Cache.ServiceLocator.GetInstance <ITextFactory>();
            var stTextFactory = Cache.ServiceLocator.GetInstance <IStTextFactory>();
            var text2         = textFactory.Create();

            //Cache.LangProject.TextsOC.Add(text2);
            m_stText2        = stTextFactory.Create();
            text2.ContentsOA = m_stText2;
            m_para2          = m_stText2.AddNewTextPara(null);
            m_para2.Contents = TsStringUtils.MakeString("Small one segment paragraph.", Cache.DefaultVernWs);

            using (ParagraphParser pp = new ParagraphParser(Cache))
                foreach (IStTxtPara para in m_stText2.ParagraphsOS)
                {
                    pp.Parse(para);
                }

            m_expectedAnOcsPara2 = new List <AnalysisOccurrence>();
            foreach (IStTxtPara para in m_stText2.ParagraphsOS)
            {
                foreach (ISegment seg in para.SegmentsOS)
                {
                    for (int i = 0; i < seg.AnalysesRS.Count; i++)
                    {
                        m_expectedAnOcsPara2.Add(new AnalysisOccurrence(seg, i));
                    }
                }
            }
        }
Example #7
0
        public void ExportGuesses()
        {
            //NOTE: The new test paragraphs need to have all new words w/o duplicates so we can predict the guesses
            //xxxcrayzee xxxyouneek xxxsintents.

            // copy a text of first paragraph into a new paragraph to generate guesses.
            StTxtPara paraGlossed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara;
            StTxtPara paraGuessed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara;

            paraGlossed.Contents.UnderlyingTsString = StringUtils.MakeTss("xxxcrayzee xxxyouneek xxxsintents.", Cache.DefaultVernWs);
            paraGuessed.Contents.UnderlyingTsString = paraGlossed.Contents.UnderlyingTsString;

            // collect expected guesses from the glosses in the first paragraph.
            ParagraphAnnotator paGlossed       = new ParagraphAnnotator(paraGlossed);
            List <int>         expectedGuesses = paGlossed.SetupDefaultWordGlosses();

            // then verify we've created guesses for the new text.
            ParagraphAnnotator paGuessed = new ParagraphAnnotator(paraGuessed);
            bool fDidParse;

            ParagraphParser.ParseText(m_text1.ContentsOA, new NullProgressState(), out fDidParse);
            paGuessed.LoadParaDefaultAnalyses();

            // export the paragraph and test the Display results
            m_choices.Add(InterlinLineChoices.kflidWord);
            m_choices.Add(InterlinLineChoices.kflidWordGloss);
            XmlDocument exportedDoc = ExportToXml();

            ValidateExportedParagraph(exportedDoc, m_choices, paraGuessed);
        }
Example #8
0
        public void TwoSegsPerVerse()
        {
            string pc1    = "Das buch ist rot. ";
            string pc2    = "Das Madchen ist shon.";
            string verse1 = "9";
            string pc3    = "Der Herr ist gross.";
            string pc4    = "Ich spreche nicht viel Deutsch.";
            string verse2 = "10";
            string pc5    = "Was is das?";
            string pc6    = "Wie gehts?";

            ITsStrBldr bldr = m_tsf.MakeString(pc1 + pc2 + verse1 + pc3 + pc4 + verse2 + pc5 + pc6, m_wsVern).GetBldr();

            bldr.SetStrPropValue(pc1.Length + pc2.Length, pc1.Length + pc2.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle,
                                 ScrStyleNames.VerseNumber);
            int ichEndV1 = pc1.Length + pc2.Length + verse1.Length + pc3.Length + pc4.Length;

            bldr.SetStrPropValue(ichEndV1, ichEndV1 + verse2.Length, (int)FwTextPropType.ktptNamedStyle,
                                 ScrStyleNames.VerseNumber);
            m_para.Contents.UnderlyingTsString = bldr.GetString();
            ParagraphParser pp = new ParagraphParser(m_para);
            List <int>      eosIndexes;
            List <int>      segments = pp.CollectSegmentAnnotations(m_para.Contents.UnderlyingTsString, out eosIndexes);

            Cache.VwCacheDaAccessor.CacheVecProp(m_para.Hvo, ktagParaSegments, segments.ToArray(), segments.Count);
            Assert.AreEqual(8, segments.Count);
            Assert.AreEqual("a", AnnotationRefHandler.VerseSegLabel(m_para, 0, ktagParaSegments));
            Assert.AreEqual("b", AnnotationRefHandler.VerseSegLabel(m_para, 1, ktagParaSegments));
            Assert.AreEqual("a", AnnotationRefHandler.VerseSegLabel(m_para, 3, ktagParaSegments));
            Assert.AreEqual("b", AnnotationRefHandler.VerseSegLabel(m_para, 4, ktagParaSegments));
            Assert.AreEqual("a", AnnotationRefHandler.VerseSegLabel(m_para, 6, ktagParaSegments));
            Assert.AreEqual("b", AnnotationRefHandler.VerseSegLabel(m_para, 7, ktagParaSegments));
        }
        public void OneSegPerVerse()
        {
            string pc1    = "Das Buch ist rot. ";
            string verse1 = "9";
            string pc2    = "Der Herr ist gross.";
            string verse2 = "10";
            string pc3    = "Ich spreche nicht viel Deutsch.";

            ITsStrBldr bldr = m_tsf.MakeString(pc1 + verse1 + pc2 + verse2 + pc3, m_wsVern).GetBldr();

            bldr.SetStrPropValue(pc1.Length, pc1.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle,
                                 ScrStyleNames.VerseNumber);
            int ichV2 = pc1.Length + verse1.Length + pc2.Length;

            bldr.SetStrPropValue(ichV2, ichV2 + verse2.Length, (int)FwTextPropType.ktptNamedStyle,
                                 ScrStyleNames.VerseNumber);
            m_para.Contents = bldr.GetString();
            using (ParagraphParser pp = new ParagraphParser(m_para))
            {
                List <int> eosIndexes;
                var        segments = pp.CollectSegments(m_para.Contents, out eosIndexes);
                Assert.AreEqual(5, segments.Count);
                Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 0));
                Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 2));
                Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 4));
            }
        }
Example #10
0
        public void EllipsesAndRefs()
        {
            ITsStrFactory   tsf   = TsStrFactoryClass.Create();
            ParagraphParser pp    = new ParagraphParser(m_para);
            string          test1 = "This is...not ... a simple sentence; it discusses Scripture (Gen 1.2 and Rom 1.2-4.5) and has ellipses.";
            ITsString       tss   = tsf.MakeString(test1, 1);

            m_para.Contents.UnderlyingTsString = tss;
            List <int> results;
            List <int> segments = pp.CollectSegmentAnnotations(tss, out results);

            VerifyBreaks(new int[] { test1.Length - 1 }, results, "ellipses verse period string");
            Assert.AreEqual(1, segments.Count);
            VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "ellipses verse period");

            string test2a  = "Here we have";
            string twoDots = "..";
            string test2b  = "just two periods, and at the end, another two";

            tss = tsf.MakeString(test2a + twoDots + test2b + twoDots, 1);
            m_para.Contents.UnderlyingTsString = tss;
            segments = pp.CollectSegmentAnnotations(tss, out results);
            VerifyBreaks(new int[] { test2a.Length, test2a.Length + 2 + test2b.Length }, results, "string with double dots");
            Assert.AreEqual(2, segments.Count);
            VerifySegment(segments[0], 0, test2a.Length + 2, m_para.Hvo, "string with double dots(1)");
            VerifySegment(segments[1], test2a.Length + 2, tss.Length, m_para.Hvo, "string with double dots(2)");

            string test3 = "This sentence ends with an ellipsis...";

            tss = tsf.MakeString(test3, 1);
            m_para.Contents.UnderlyingTsString = tss;
            segments = pp.CollectSegmentAnnotations(tss, out results);
            VerifyBreaks(new int[] {  }, results, "string with final ellipsis");
            Assert.AreEqual(1, segments.Count);
            VerifySegment(segments[0], 0, test3.Length, m_para.Hvo, "string with final ellipsis");

            string fourDots = "....";

            tss = tsf.MakeString(test2a + fourDots + test2b + fourDots, 1);
            m_para.Contents.UnderlyingTsString = tss;
            segments = pp.CollectSegmentAnnotations(tss, out results);
            VerifyBreaks(new int[] { test2a.Length, test2a.Length + 4 + test2b.Length }, results, "string with four dots");
            Assert.AreEqual(2, segments.Count);
            VerifySegment(segments[0], 0, test2a.Length + 4, m_para.Hvo, "string with four dots(1)");
            VerifySegment(segments[1], test2a.Length + 4, tss.Length, m_para.Hvo, "string with four dots(2)");
            // Case 2 periods with surrounding numbers

            string test5a = "Here is a number and two dots: 5";
            string test5b = "2 and another number, and the final dot has a number before it: 2.";

            tss = tsf.MakeString(test5a + twoDots + test5b, 1);
            m_para.Contents.UnderlyingTsString = tss;
            segments = pp.CollectSegmentAnnotations(tss, out results);
            VerifyBreaks(new int[] { test5a.Length, test5a.Length + 2 + test5b.Length - 1 }, results, "string with numbers and double dots");
            Assert.AreEqual(2, segments.Count);
            // One plus 2 for the two dots, but the following digit and space go in the previous segment, too.
            VerifySegment(segments[0], 0, test5a.Length + 2 + 2, m_para.Hvo, "string with numbers and double dots(1)");
            VerifySegment(segments[1], test5a.Length + 2 + 2, tss.Length, m_para.Hvo, "string with numbers and double dots(2)");
        }
 /// <summary>
 /// Return true if this occurrence is a phrase (and so can be broken down to wordforms).
 /// </summary>
 /// <returns></returns>
 public bool CanBreakPhrase()
 {
     if (!HasWordform)
     {
         return(false);
     }
     return(ParagraphParser.IsPhrase(Segment.Cache, BaselineText));
 }
        /// <summary>
        /// Dispose ParagraphParser after each test
        /// </summary>
        public override void TestTearDown()
        {
            if (m_pp != null)
            {
                m_pp.Dispose();
            }
            m_pp = null;

            base.TestTearDown();
        }
Example #13
0
        public DocxParser(string path, IEnumerable <IProcessor> processors, bool isEditable = false)
        {
            _wordDocument = WordprocessingDocument.Open(path, isEditable);
            _document     = _wordDocument.MainDocumentPart.Document;

            _paragraphParser = new ParagraphParser(_wordDocument.MainDocumentPart, _numberingManager);
            _runParser       = new RunParser();

            _processors.AddRange(processors);
        }
        public void Parse()
        {
            DomDocument         document           = new DomDocument();
            IBlockElementParser blockElementParser = new ParagraphParser();

            blockElementParser.Parse(null, document,
                                     "p. Paragraph one\r\n\r\nImplicit paragraph\r\n\r\nNot a paragraph");

            Assert.AreEqual(2, document.ChildElements.Count);
        }
 private IList <ISegment> GetSegments(ITsStrBldr bldr, IScrTxtPara para)
 {
     para.Contents = bldr.GetString();
     using (ParagraphParser pp = new ParagraphParser(para))
     {
         List <int> eosIndexes;
         var        segments = pp.CollectSegments(para.Contents, out eosIndexes);
         return(segments);
     }
 }
Example #16
0
        private List <int> GetSegments(ITsStrBldr bldr, ScrTxtPara para)
        {
            para.Contents.UnderlyingTsString = bldr.GetString();
            ParagraphParser pp = new ParagraphParser(para);
            List <int>      eosIndexes;
            List <int>      segments = pp.CollectSegmentAnnotations(para.Contents.UnderlyingTsString, out eosIndexes);

            Cache.VwCacheDaAccessor.CacheVecProp(para.Hvo, ktagParaSegments, segments.ToArray(), segments.Count);
            return(segments);
        }
        public void ParagraphParser_EmptyCase()
        {
            List <string> input = new List <string>()
            {
                "", " ", ""
            };

            List <Paragraph> paragraphs = ParagraphParser.ParseParagraphs(input.ToArray());

            Assert.Empty(paragraphs);
        }
Example #18
0
        /// <summary>
        /// Ensure that the segments property of the paragraph is consistent with its contents and consists of real
        /// database objects.
        /// </summary>
        internal static FdoCache EnsureMainParaSegments(IStTxtPara para, int wsBt)
        {
            ParagraphParser pp = new ParagraphParser(para);
            List <int>      EosOffsets;
            List <int>      segs = pp.CollectSegmentAnnotationsOfPara(out EosOffsets);
            // Make sure the segments list is up to date.
            FdoCache cache = para.Cache;

            cache.VwCacheDaAccessor.CacheVecProp(para.Hvo, StTxtPara.SegmentsFlid(cache), segs.ToArray(), segs.Count);
            // This further makes sure all are real.
            StTxtPara.LoadSegmentFreeTranslations(new int[] { para.Hvo }, cache, wsBt);
            return(cache);
        }
Example #19
0
        public void ExportPhraseWordGuids()
        {
            // create two paragraphs with two identical sentences.
            // copy a text of first paragraph into a new paragraph to generate guesses.
            StTxtPara paraGlossed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara;
            StTxtPara paraGuessed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara;

            paraGlossed.Contents.UnderlyingTsString = StringUtils.MakeTss(
                "xxxwordone xxxwordtwo xxxwordthree. xxxwordone xxxwordtwo xxxwordthree.",
                Cache.DefaultVernWs);
            paraGuessed.Contents.UnderlyingTsString = paraGlossed.Contents.UnderlyingTsString;

            // collect expected guesses from the glosses in the first paragraph.
            ParagraphAnnotator paGlossed       = new ParagraphAnnotator(paraGlossed);
            List <int>         expectedGuesses = paGlossed.SetupDefaultWordGlosses();

            // then verify we've created guesses for the new text.
            ParagraphAnnotator paGuessed = new ParagraphAnnotator(paraGuessed);
            bool fDidParse;

            ParagraphParser.ParseText(m_text1.ContentsOA, new NullProgressState(), out fDidParse);
            paGuessed.LoadParaDefaultAnalyses();

            // export the paragraph and test the Display results
            m_choices.Add(InterlinLineChoices.kflidWord);
            m_choices.Add(InterlinLineChoices.kflidWordGloss);
            m_choices.Add(InterlinLineChoices.kflidMorphemes);
            m_choices.Add(InterlinLineChoices.kflidLexEntries);
            m_choices.Add(InterlinLineChoices.kflidLexGloss);
            m_choices.Add(InterlinLineChoices.kflidLexPos);

            XmlDocument exportedDoc = ExportToXml("elan");
            // validate that we included the expected metadata
            string exportName = XmlUtils.GetOptionalAttributeValue(exportedDoc.DocumentElement, "exportTarget");

            Assert.AreEqual("elan", exportName);
            string version = XmlUtils.GetOptionalAttributeValue(exportedDoc.DocumentElement, "version");

            Assert.AreEqual("1", version);
            ExportedInterlinearReader         exportReader = new ExportedInterlinearReader(exportedDoc, m_choices);
            ExportedParagraphValidatorForELAN validator    = new ExportedParagraphValidatorForELAN(exportReader, paraGlossed);

            validator.ValidateParagraphs(paraGlossed, exportReader.GetParaNode(paraGlossed.IndexInOwner));
            validator.ValidateParagraphs(paraGuessed, exportReader.GetParaNode(paraGuessed.IndexInOwner));
            // only expecting to collect a total of 2 paragraph guids,
            // each paragraph with 2 phrase guids (2*2)
            // and each phrase with 3 word guids (2*2*3).
            validator.ValidateNonrepeatingGuidCount(2 + 2 * 2 + 2 * 2 * 3);
        }
        private void ParseText()
        {
            using (var pp = new ParagraphParser(Cache))
            {
                pp.Parse(m_txtPara);
            }
            var seg       = m_txtPara.SegmentsOS[0];
            var wordArray = seg.AnalysesRS.ToArray();
            var cwords    = wordArray.Length;

            m_occurrences = new AnalysisOccurrence[cwords];
            for (var i = 0; i < cwords; i++)
            {
                m_occurrences[i] = new AnalysisOccurrence(seg, i);
            }
        }
Example #21
0
        /// <summary>
        /// non-undoable task
        /// </summary>
        private void DoSetupFixture()
        {
            var textFactory   = Cache.ServiceLocator.GetInstance <ITextFactory>();
            var stTextFactory = Cache.ServiceLocator.GetInstance <IStTextFactory>();

            m_text = textFactory.Create();
            //Cache.LangProject.TextsOC.Add(m_text);
            m_stText          = stTextFactory.Create();
            m_text.ContentsOA = m_stText;
            m_para0           = m_stText.AddNewTextPara(null);
            m_para0.Contents  = TsStringUtils.MakeString("Xxxhope xxxthis xxxwill xxxdo. xxxI xxxhope.", Cache.DefaultVernWs);
            m_para1           = m_stText.AddNewTextPara(null);
            m_para1.Contents  = TsStringUtils.MakeString("Xxxcertain xxxto xxxcatch xxxa xxxfrog. xxxCertainly xxxcan xxxon xxxLake xxxMonroe.", Cache.DefaultVernWs);

            using (ParagraphParser pp = new ParagraphParser(Cache))
            {
                foreach (IStTxtPara para in m_stText.ParagraphsOS)
                {
                    if (para.ParseIsCurrent)
                    {
                        continue;
                    }
                    pp.Parse(para);
                }
            }

            m_expectedOccurrences = new List <AnalysisOccurrence>();
            foreach (IStTxtPara para in m_stText.ParagraphsOS)
            {
                foreach (var seg in para.SegmentsOS)
                {
                    for (int i = 0; i < seg.AnalysesRS.Count; i++)
                    {
                        m_expectedOccurrences.Add(new AnalysisOccurrence(seg, i));
                    }
                }
            }

            m_expectedOccurrencesPara0 = new List <AnalysisOccurrence>();
            foreach (var seg in m_para0.SegmentsOS)
            {
                for (int i = 0; i < seg.AnalysesRS.Count; i++)
                {
                    m_expectedOccurrencesPara0.Add(new AnalysisOccurrence(seg, i));
                }
            }
        }
        public override void Initialize()
        {
            CheckDisposed();
            base.Initialize();
            m_text1            = Cache.LangProject.TextsOC.Add(new Text());
            m_text1.ContentsOA = new StText();
            m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara());
            (m_text1.ContentsOA.ParagraphsOS[0] as StTxtPara).Contents.UnderlyingTsString =
                StringUtils.MakeTss("xxxa xxxb xxxc xxxd xxxe, xxxa xxxb.", Cache.DefaultVernWs);
            bool fDidParse;

            ParagraphParser.ParseText(m_text1.ContentsOA, new NullProgressState(), out fDidParse);
            InterlinLineChoices lineChoices = InterlinLineChoices.DefaultChoices(0, Cache.DefaultAnalWs, Cache.LangProject,
                                                                                 InterlinLineChoices.InterlinMode.GlossAddWordsToLexicon);

            m_sandbox = new SandboxForTests(Cache, lineChoices);
        }
Example #23
0
        private void ParseUnparsedParagraphs()
        {
            ConcDecorator concDecorator = ConcDecorator;

            IStTxtPara[] needsParsing = concDecorator.InterestingTexts.SelectMany(txt => txt.ParagraphsOS).Cast <IStTxtPara>().Where(para => !para.ParseIsCurrent).ToArray();
            if (needsParsing.Length > 0)
            {
                NonUndoableUnitOfWorkHelper.DoSomehow(m_cache.ActionHandlerAccessor,
                                                      () =>
                {
                    foreach (IStTxtPara para in needsParsing)
                    {
                        ParagraphParser.ParseParagraph(para);
                    }
                });
            }
        }
 /// <summary>
 /// Break an occurrence that is a phrase into its constituent wordforms.
 /// </summary>
 public void BreakPhrase()
 {
     using (var pp = new ParagraphParser(Paragraph))
     {
         // This is a new paragraph parser, and we haven't set up any pre-existing analyses, so it doesn't matter
         // what we pass for cWfAnalysisPrev.
         IList <IAnalysis> wordforms = pp.CollectSegmentForms(GetMyBeginOffsetInPara(), GetMyEndOffsetInPara(), 0, false);
         if (wordforms.Count > 1)
         {
             var oldWordform = Analysis.Wordform;
             Segment.AnalysesRS.Replace(Index, 1, wordforms.Cast <ICmObject>());
             // Enhance JohnT: for this sort of automatic deletion, I wonder whether we should make
             // stronger checks, such as that it has no analysis or glosses?
             DeleteWordformIfPossible(oldWordform);
         }
     }
 }
Example #25
0
        private void openFileDialog1_FileOk(object sender, CancelEventArgs e)
        {
            StreamReader sr         = new StreamReader(openFileDialog1.FileName);
            string       WallOfText = sr.ReadToEnd();

            var pp = new ParagraphParser(WallOfText);

            foreach (var item in pp)
            {
                List <TreeNode> ltn = new List <TreeNode>();
                foreach (var item2 in item.Value)
                {
                    ltn.Add(new TreeNode(item2));
                }
                TreeNode tn = new TreeNode(item.Key, ltn.ToArray());
                treeView1.Nodes.Add(tn);
            }
        }
        private void ParseTestText()
        {
            // Seg:  0								1								2
            // Index:0	  1		 2			 3 0			  1	  2		3 0		 1			 2
            //		xxxpus xxxyalola xxxnihimbilira. xxxnihimbilira xxxpus xxxyalola. xxxhesyla xxxnihimbilira.
            using (var pp = new ParagraphParser(Cache))
            {
                pp.Parse(m_para1);
            }
            var coords = new int[8, 2] {
                { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 }, { 1, 2 }, { 2, 0 }, { 2, 1 }
            };

            m_occurrences = new AnalysisOccurrence[8];
            for (int i = 0; i < 8; i++)
            {
                m_occurrences[i] = new AnalysisOccurrence(m_para1.SegmentsOS[coords[i, 0]], coords[i, 1]);
            }
        }
Example #27
0
        private LCModel.IText MakeText(string guid, string para1Content)
        {
            var sl   = Cache.ServiceLocator;
            var wsf  = Cache.WritingSystemFactory;
            var text = sl.GetInstance <ITextFactory>().Create(Cache,
                                                              new Guid(guid));
            var sttext1 = sl.GetInstance <IStTextFactory>().Create();

            text.ContentsOA = sttext1;
            var para1_1 = sl.GetInstance <IStTxtParaFactory>().Create();

            sttext1.ParagraphsOS.Add(para1_1);
            var para1_1Contents = TsStringUtils.MakeString(para1Content,
                                                           wsf.get_Engine("en").Handle);

            para1_1.Contents = para1_1Contents;
            ParagraphParser.ParseText(sttext1);
            return(text);
        }
Example #28
0
        private void ParseTestParagraphWithSpecificContent(IStTxtPara paraToParse)
        {
            using (var pp = new ParagraphParser(Cache))
            {
                pp.Parse(paraToParse);
            }
            GlossParagraph(paraToParse);
            var temp = new List <AnalysisOccurrence>();

            foreach (var seg in paraToParse.SegmentsOS)
            {
                var formMax = seg.AnalysesRS.Count;
                for (var i = 0; i < formMax; i++)
                {
                    temp.Add(new AnalysisOccurrence(seg, i));
                }
            }
            m_allOccurrences[paraToParse] = temp.ToArray();
        }
        public void ParagraphParser_BasicTest(bool trailingLines)
        {
            List <string> input = new List <string>();

            input.Add("This is the first sentence of the first paragraph.");
            input.Add("Each sentence is on one line.");
            input.Add("There is a blank line between paragraphs.");
            input.Add("");
            input.Add("Some paragraphs will be longer than others.");
            input.Add("Paragraphs with more sentences will receive higher weight scores.");
            input.Add("");
            input.Add("There could be trailing lines at the end of the file, but they should be ignored.");

            if (trailingLines)
            {
                input.Add("");
                input.Add("");
            }

            List <Paragraph> paragraphs = ParagraphParser.ParseParagraphs(input.ToArray());

            Assert.Equal(3, paragraphs.Count);
            Assert.Equal(3, paragraphs[0].Weight);
            Assert.Equal(2, paragraphs[1].Weight);
            Assert.Equal(1, paragraphs[2].Weight);

            Assert.Equal(
                "This is the first sentence of the first paragraph." +
                "Each sentence is on one line." +
                "There is a blank line between paragraphs.",
                paragraphs[0].ParagraphText);
            Assert.Equal(
                "Some paragraphs will be longer than others." +
                "Paragraphs with more sentences will receive higher weight scores.",
                paragraphs[1].ParagraphText);
            Assert.Equal(
                "There could be trailing lines at the end of the file, but they should be ignored.",
                paragraphs[2].ParagraphText);
        }
Example #30
0
        private IStTxtPara MakeSimpleParsedText()
        {
            var textFactory   = Cache.ServiceLocator.GetInstance <ITextFactory>();
            var stTextFactory = Cache.ServiceLocator.GetInstance <IStTextFactory>();
            var text          = textFactory.Create();
            //Cache.LangProject.TextsOC.Add(text);
            var stText = stTextFactory.Create();

            text.ContentsOA = stText;
            var para0 = stText.AddNewTextPara(null);

            para0.Contents =
                TsStringUtils.MakeString("the book is red. the pages in the book are the color of the paper.",
                                         Cache.DefaultVernWs);

            using (ParagraphParser pp = new ParagraphParser(Cache))
                foreach (IStTxtPara para in stText.ParagraphsOS)
                {
                    pp.Parse(para);
                }
            return(para0);
        }
Example #31
0
		private void ReparseParagraph(IStTxtPara para)
		{
			using (var parser = new ParagraphParser(para))
			{
				parser.Parse(para);
			}
		}
		private void ParseTestText()
		{
			// Seg:  0								1								2
			// Index:0	  1		 2			 3 0			  1	  2		3 0		 1			 2
			//		xxxpus xxxyalola xxxnihimbilira. xxxnihimbilira xxxpus xxxyalola. xxxhesyla xxxnihimbilira.
			using (var pp = new ParagraphParser(Cache))
			{
				pp.Parse(m_para1);
			}
			var coords = new int[8, 2] { { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 }, { 1, 2 }, { 2, 0 }, { 2, 1 } };
			m_occurrences = new AnalysisOccurrence[8];
			for (int i = 0; i < 8; i++)
				m_occurrences[i] = new AnalysisOccurrence(m_para1.SegmentsOS[coords[i, 0]], coords[i, 1]);
		}