private FDO.IText MakeText(string contents) { var text = Cache.ServiceLocator.GetInstance <ITextFactory>().Create(); var stText = Cache.ServiceLocator.GetInstance <IStTextFactory>().Create(); text.ContentsOA = stText; var para = Cache.ServiceLocator.GetInstance <IStTxtParaFactory>().Create(); stText.ParagraphsOS.Add(para); para.Contents = Cache.TsStrFactory.MakeString(contents, Cache.DefaultVernWs); using (var pp = new ParagraphParser(Cache)) { pp.Parse(para); } ISegment seg = para.SegmentsOS.First(); for (int i = 0; i < seg.AnalysesRS.Count; i++) { IAnalysis analysis = seg.AnalysesRS[i]; var wordform = analysis as IWfiWordform; if (wordform != null) { seg.AnalysesRS[i] = wordform.AnalysesOC.First().MeaningsOC.First(); } } return(text); }
private void ReparseParagraph(IStTxtPara para) { using (var parser = new ParagraphParser(para)) { parser.Parse(para); } }
public void OrcIsLabel() { ITsStrFactory tsf = TsStrFactoryClass.Create(); ParagraphParser pp = new ParagraphParser(m_para); // String with embedded ORC. string test1 = "This is a simple sentence"; string test2 = "\xfffc"; string test3 = " with a footnote."; ITsString tss = tsf.MakeString(test1 + test2 + test3, 1); // To be recognized an ORC must have unique properties. ITsStrBldr bldr = tss.GetBldr(); bldr.SetStrPropValue(test1.Length, test1.Length + test2.Length, (int)FwTextPropType.ktptObjData, "nonsence"); tss = bldr.GetString(); m_para.Contents.UnderlyingTsString = tss; List <int> results; List <int> segments = pp.CollectSegmentAnnotations(tss, out results); VerifyBreaks(new int[] { test1.Length, test1.Length + test2.Length + 1, test1.Length + test2.Length + test3.Length - 1 }, results, "multi-sentence string with ORC"); Assert.AreEqual(3, segments.Count); // The segments break around the ORC. VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "first seg of multi-sentence w. ORC"); VerifySegment(segments[1], test1.Length, test1.Length + test2.Length + 1, m_para.Hvo, "second seg of multi-sentence w. ORC"); VerifySegment(segments[2], test1.Length + test2.Length + 1, test1.Length + test2.Length + test3.Length, m_para.Hvo, "third seg of multi-sentence w. ORC"); }
public void LeadingPunctuation() { ITsStrFactory tsf = TsStrFactoryClass.Create(); ParagraphParser pp = new ParagraphParser(m_para); string test1 = "?This is a question with special punctuation?"; ITsString tss = tsf.MakeString(test1, 1); m_para.Contents.UnderlyingTsString = tss; List <int> results; List <int> segments = pp.CollectSegmentAnnotations(tss, out results); VerifyBreaks(new int[] { test1.Length - 1 }, results, "leading QM"); Assert.AreEqual(1, segments.Count); VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "leading QM"); // Now try leading punctuation following a verse number. ITsStrBldr bldr = tss.GetBldr(); string verse = "5 "; bldr.Replace(0, 0, verse, null); bldr.SetStrPropValue(0, 1, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); ITsString tssMultiV = bldr.GetString(); m_para.Contents.UnderlyingTsString = tssMultiV; segments = pp.CollectSegmentAnnotations(tssMultiV, out results); VerifyBreaks(new int[] { verse.Length, tssMultiV.Length - 1 }, results, "leading verse and QM"); Assert.AreEqual(2, segments.Count); VerifySegment(segments[0], 0, verse.Length, m_para.Hvo, "first seg of leading verse and QM"); VerifySegment(segments[1], verse.Length, tssMultiV.Length, m_para.Hvo, "second seg of leading verse and QM"); }
public void TwoSegsPerVerse() { string pc1 = "Das Buch ist rot. "; string pc2 = "Das Maedchen ist schoen."; string verse1 = "9"; string pc3 = "Der Herr ist gross."; string pc4 = "Ich spreche nicht viel Deutsch."; string verse2 = "10"; string pc5 = "Was ist das?"; string pc6 = "Wie gehts?"; ITsStrBldr bldr = m_tsf.MakeString(pc1 + pc2 + verse1 + pc3 + pc4 + verse2 + pc5 + pc6, m_wsVern).GetBldr(); bldr.SetStrPropValue(pc1.Length + pc2.Length, pc1.Length + pc2.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); int ichEndV1 = pc1.Length + pc2.Length + verse1.Length + pc3.Length + pc4.Length; bldr.SetStrPropValue(ichEndV1, ichEndV1 + verse2.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); m_para.Contents = bldr.GetString(); using (ParagraphParser pp = new ParagraphParser(m_para)) { List <int> eosIndexes; var segments = pp.CollectSegments(m_para.Contents, out eosIndexes); Assert.AreEqual(8, segments.Count); Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 0)); Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 1)); Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 3)); Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 4)); Assert.AreEqual("a", ScriptureServices.VerseSegLabel(m_para, 6)); Assert.AreEqual("b", ScriptureServices.VerseSegLabel(m_para, 7)); } }
private void Setup2ndText() { var textFactory = Cache.ServiceLocator.GetInstance <ITextFactory>(); var stTextFactory = Cache.ServiceLocator.GetInstance <IStTextFactory>(); var text2 = textFactory.Create(); //Cache.LangProject.TextsOC.Add(text2); m_stText2 = stTextFactory.Create(); text2.ContentsOA = m_stText2; m_para2 = m_stText2.AddNewTextPara(null); m_para2.Contents = TsStringUtils.MakeString("Small one segment paragraph.", Cache.DefaultVernWs); using (ParagraphParser pp = new ParagraphParser(Cache)) foreach (IStTxtPara para in m_stText2.ParagraphsOS) { pp.Parse(para); } m_expectedAnOcsPara2 = new List <AnalysisOccurrence>(); foreach (IStTxtPara para in m_stText2.ParagraphsOS) { foreach (ISegment seg in para.SegmentsOS) { for (int i = 0; i < seg.AnalysesRS.Count; i++) { m_expectedAnOcsPara2.Add(new AnalysisOccurrence(seg, i)); } } } }
public void ExportGuesses() { //NOTE: The new test paragraphs need to have all new words w/o duplicates so we can predict the guesses //xxxcrayzee xxxyouneek xxxsintents. // copy a text of first paragraph into a new paragraph to generate guesses. StTxtPara paraGlossed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara; StTxtPara paraGuessed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara; paraGlossed.Contents.UnderlyingTsString = StringUtils.MakeTss("xxxcrayzee xxxyouneek xxxsintents.", Cache.DefaultVernWs); paraGuessed.Contents.UnderlyingTsString = paraGlossed.Contents.UnderlyingTsString; // collect expected guesses from the glosses in the first paragraph. ParagraphAnnotator paGlossed = new ParagraphAnnotator(paraGlossed); List <int> expectedGuesses = paGlossed.SetupDefaultWordGlosses(); // then verify we've created guesses for the new text. ParagraphAnnotator paGuessed = new ParagraphAnnotator(paraGuessed); bool fDidParse; ParagraphParser.ParseText(m_text1.ContentsOA, new NullProgressState(), out fDidParse); paGuessed.LoadParaDefaultAnalyses(); // export the paragraph and test the Display results m_choices.Add(InterlinLineChoices.kflidWord); m_choices.Add(InterlinLineChoices.kflidWordGloss); XmlDocument exportedDoc = ExportToXml(); ValidateExportedParagraph(exportedDoc, m_choices, paraGuessed); }
public void TwoSegsPerVerse() { string pc1 = "Das buch ist rot. "; string pc2 = "Das Madchen ist shon."; string verse1 = "9"; string pc3 = "Der Herr ist gross."; string pc4 = "Ich spreche nicht viel Deutsch."; string verse2 = "10"; string pc5 = "Was is das?"; string pc6 = "Wie gehts?"; ITsStrBldr bldr = m_tsf.MakeString(pc1 + pc2 + verse1 + pc3 + pc4 + verse2 + pc5 + pc6, m_wsVern).GetBldr(); bldr.SetStrPropValue(pc1.Length + pc2.Length, pc1.Length + pc2.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); int ichEndV1 = pc1.Length + pc2.Length + verse1.Length + pc3.Length + pc4.Length; bldr.SetStrPropValue(ichEndV1, ichEndV1 + verse2.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); m_para.Contents.UnderlyingTsString = bldr.GetString(); ParagraphParser pp = new ParagraphParser(m_para); List <int> eosIndexes; List <int> segments = pp.CollectSegmentAnnotations(m_para.Contents.UnderlyingTsString, out eosIndexes); Cache.VwCacheDaAccessor.CacheVecProp(m_para.Hvo, ktagParaSegments, segments.ToArray(), segments.Count); Assert.AreEqual(8, segments.Count); Assert.AreEqual("a", AnnotationRefHandler.VerseSegLabel(m_para, 0, ktagParaSegments)); Assert.AreEqual("b", AnnotationRefHandler.VerseSegLabel(m_para, 1, ktagParaSegments)); Assert.AreEqual("a", AnnotationRefHandler.VerseSegLabel(m_para, 3, ktagParaSegments)); Assert.AreEqual("b", AnnotationRefHandler.VerseSegLabel(m_para, 4, ktagParaSegments)); Assert.AreEqual("a", AnnotationRefHandler.VerseSegLabel(m_para, 6, ktagParaSegments)); Assert.AreEqual("b", AnnotationRefHandler.VerseSegLabel(m_para, 7, ktagParaSegments)); }
public void OneSegPerVerse() { string pc1 = "Das Buch ist rot. "; string verse1 = "9"; string pc2 = "Der Herr ist gross."; string verse2 = "10"; string pc3 = "Ich spreche nicht viel Deutsch."; ITsStrBldr bldr = m_tsf.MakeString(pc1 + verse1 + pc2 + verse2 + pc3, m_wsVern).GetBldr(); bldr.SetStrPropValue(pc1.Length, pc1.Length + verse1.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); int ichV2 = pc1.Length + verse1.Length + pc2.Length; bldr.SetStrPropValue(ichV2, ichV2 + verse2.Length, (int)FwTextPropType.ktptNamedStyle, ScrStyleNames.VerseNumber); m_para.Contents = bldr.GetString(); using (ParagraphParser pp = new ParagraphParser(m_para)) { List <int> eosIndexes; var segments = pp.CollectSegments(m_para.Contents, out eosIndexes); Assert.AreEqual(5, segments.Count); Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 0)); Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 2)); Assert.AreEqual("", ScriptureServices.VerseSegLabel(m_para, 4)); } }
public void EllipsesAndRefs() { ITsStrFactory tsf = TsStrFactoryClass.Create(); ParagraphParser pp = new ParagraphParser(m_para); string test1 = "This is...not ... a simple sentence; it discusses Scripture (Gen 1.2 and Rom 1.2-4.5) and has ellipses."; ITsString tss = tsf.MakeString(test1, 1); m_para.Contents.UnderlyingTsString = tss; List <int> results; List <int> segments = pp.CollectSegmentAnnotations(tss, out results); VerifyBreaks(new int[] { test1.Length - 1 }, results, "ellipses verse period string"); Assert.AreEqual(1, segments.Count); VerifySegment(segments[0], 0, test1.Length, m_para.Hvo, "ellipses verse period"); string test2a = "Here we have"; string twoDots = ".."; string test2b = "just two periods, and at the end, another two"; tss = tsf.MakeString(test2a + twoDots + test2b + twoDots, 1); m_para.Contents.UnderlyingTsString = tss; segments = pp.CollectSegmentAnnotations(tss, out results); VerifyBreaks(new int[] { test2a.Length, test2a.Length + 2 + test2b.Length }, results, "string with double dots"); Assert.AreEqual(2, segments.Count); VerifySegment(segments[0], 0, test2a.Length + 2, m_para.Hvo, "string with double dots(1)"); VerifySegment(segments[1], test2a.Length + 2, tss.Length, m_para.Hvo, "string with double dots(2)"); string test3 = "This sentence ends with an ellipsis..."; tss = tsf.MakeString(test3, 1); m_para.Contents.UnderlyingTsString = tss; segments = pp.CollectSegmentAnnotations(tss, out results); VerifyBreaks(new int[] { }, results, "string with final ellipsis"); Assert.AreEqual(1, segments.Count); VerifySegment(segments[0], 0, test3.Length, m_para.Hvo, "string with final ellipsis"); string fourDots = "...."; tss = tsf.MakeString(test2a + fourDots + test2b + fourDots, 1); m_para.Contents.UnderlyingTsString = tss; segments = pp.CollectSegmentAnnotations(tss, out results); VerifyBreaks(new int[] { test2a.Length, test2a.Length + 4 + test2b.Length }, results, "string with four dots"); Assert.AreEqual(2, segments.Count); VerifySegment(segments[0], 0, test2a.Length + 4, m_para.Hvo, "string with four dots(1)"); VerifySegment(segments[1], test2a.Length + 4, tss.Length, m_para.Hvo, "string with four dots(2)"); // Case 2 periods with surrounding numbers string test5a = "Here is a number and two dots: 5"; string test5b = "2 and another number, and the final dot has a number before it: 2."; tss = tsf.MakeString(test5a + twoDots + test5b, 1); m_para.Contents.UnderlyingTsString = tss; segments = pp.CollectSegmentAnnotations(tss, out results); VerifyBreaks(new int[] { test5a.Length, test5a.Length + 2 + test5b.Length - 1 }, results, "string with numbers and double dots"); Assert.AreEqual(2, segments.Count); // One plus 2 for the two dots, but the following digit and space go in the previous segment, too. VerifySegment(segments[0], 0, test5a.Length + 2 + 2, m_para.Hvo, "string with numbers and double dots(1)"); VerifySegment(segments[1], test5a.Length + 2 + 2, tss.Length, m_para.Hvo, "string with numbers and double dots(2)"); }
/// <summary> /// Return true if this occurrence is a phrase (and so can be broken down to wordforms). /// </summary> /// <returns></returns> public bool CanBreakPhrase() { if (!HasWordform) { return(false); } return(ParagraphParser.IsPhrase(Segment.Cache, BaselineText)); }
/// <summary> /// Dispose ParagraphParser after each test /// </summary> public override void TestTearDown() { if (m_pp != null) { m_pp.Dispose(); } m_pp = null; base.TestTearDown(); }
public DocxParser(string path, IEnumerable <IProcessor> processors, bool isEditable = false) { _wordDocument = WordprocessingDocument.Open(path, isEditable); _document = _wordDocument.MainDocumentPart.Document; _paragraphParser = new ParagraphParser(_wordDocument.MainDocumentPart, _numberingManager); _runParser = new RunParser(); _processors.AddRange(processors); }
public void Parse() { DomDocument document = new DomDocument(); IBlockElementParser blockElementParser = new ParagraphParser(); blockElementParser.Parse(null, document, "p. Paragraph one\r\n\r\nImplicit paragraph\r\n\r\nNot a paragraph"); Assert.AreEqual(2, document.ChildElements.Count); }
private IList <ISegment> GetSegments(ITsStrBldr bldr, IScrTxtPara para) { para.Contents = bldr.GetString(); using (ParagraphParser pp = new ParagraphParser(para)) { List <int> eosIndexes; var segments = pp.CollectSegments(para.Contents, out eosIndexes); return(segments); } }
private List <int> GetSegments(ITsStrBldr bldr, ScrTxtPara para) { para.Contents.UnderlyingTsString = bldr.GetString(); ParagraphParser pp = new ParagraphParser(para); List <int> eosIndexes; List <int> segments = pp.CollectSegmentAnnotations(para.Contents.UnderlyingTsString, out eosIndexes); Cache.VwCacheDaAccessor.CacheVecProp(para.Hvo, ktagParaSegments, segments.ToArray(), segments.Count); return(segments); }
public void ParagraphParser_EmptyCase() { List <string> input = new List <string>() { "", " ", "" }; List <Paragraph> paragraphs = ParagraphParser.ParseParagraphs(input.ToArray()); Assert.Empty(paragraphs); }
/// <summary> /// Ensure that the segments property of the paragraph is consistent with its contents and consists of real /// database objects. /// </summary> internal static FdoCache EnsureMainParaSegments(IStTxtPara para, int wsBt) { ParagraphParser pp = new ParagraphParser(para); List <int> EosOffsets; List <int> segs = pp.CollectSegmentAnnotationsOfPara(out EosOffsets); // Make sure the segments list is up to date. FdoCache cache = para.Cache; cache.VwCacheDaAccessor.CacheVecProp(para.Hvo, StTxtPara.SegmentsFlid(cache), segs.ToArray(), segs.Count); // This further makes sure all are real. StTxtPara.LoadSegmentFreeTranslations(new int[] { para.Hvo }, cache, wsBt); return(cache); }
public void ExportPhraseWordGuids() { // create two paragraphs with two identical sentences. // copy a text of first paragraph into a new paragraph to generate guesses. StTxtPara paraGlossed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara; StTxtPara paraGuessed = m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()) as StTxtPara; paraGlossed.Contents.UnderlyingTsString = StringUtils.MakeTss( "xxxwordone xxxwordtwo xxxwordthree. xxxwordone xxxwordtwo xxxwordthree.", Cache.DefaultVernWs); paraGuessed.Contents.UnderlyingTsString = paraGlossed.Contents.UnderlyingTsString; // collect expected guesses from the glosses in the first paragraph. ParagraphAnnotator paGlossed = new ParagraphAnnotator(paraGlossed); List <int> expectedGuesses = paGlossed.SetupDefaultWordGlosses(); // then verify we've created guesses for the new text. ParagraphAnnotator paGuessed = new ParagraphAnnotator(paraGuessed); bool fDidParse; ParagraphParser.ParseText(m_text1.ContentsOA, new NullProgressState(), out fDidParse); paGuessed.LoadParaDefaultAnalyses(); // export the paragraph and test the Display results m_choices.Add(InterlinLineChoices.kflidWord); m_choices.Add(InterlinLineChoices.kflidWordGloss); m_choices.Add(InterlinLineChoices.kflidMorphemes); m_choices.Add(InterlinLineChoices.kflidLexEntries); m_choices.Add(InterlinLineChoices.kflidLexGloss); m_choices.Add(InterlinLineChoices.kflidLexPos); XmlDocument exportedDoc = ExportToXml("elan"); // validate that we included the expected metadata string exportName = XmlUtils.GetOptionalAttributeValue(exportedDoc.DocumentElement, "exportTarget"); Assert.AreEqual("elan", exportName); string version = XmlUtils.GetOptionalAttributeValue(exportedDoc.DocumentElement, "version"); Assert.AreEqual("1", version); ExportedInterlinearReader exportReader = new ExportedInterlinearReader(exportedDoc, m_choices); ExportedParagraphValidatorForELAN validator = new ExportedParagraphValidatorForELAN(exportReader, paraGlossed); validator.ValidateParagraphs(paraGlossed, exportReader.GetParaNode(paraGlossed.IndexInOwner)); validator.ValidateParagraphs(paraGuessed, exportReader.GetParaNode(paraGuessed.IndexInOwner)); // only expecting to collect a total of 2 paragraph guids, // each paragraph with 2 phrase guids (2*2) // and each phrase with 3 word guids (2*2*3). validator.ValidateNonrepeatingGuidCount(2 + 2 * 2 + 2 * 2 * 3); }
private void ParseText() { using (var pp = new ParagraphParser(Cache)) { pp.Parse(m_txtPara); } var seg = m_txtPara.SegmentsOS[0]; var wordArray = seg.AnalysesRS.ToArray(); var cwords = wordArray.Length; m_occurrences = new AnalysisOccurrence[cwords]; for (var i = 0; i < cwords; i++) { m_occurrences[i] = new AnalysisOccurrence(seg, i); } }
/// <summary> /// non-undoable task /// </summary> private void DoSetupFixture() { var textFactory = Cache.ServiceLocator.GetInstance <ITextFactory>(); var stTextFactory = Cache.ServiceLocator.GetInstance <IStTextFactory>(); m_text = textFactory.Create(); //Cache.LangProject.TextsOC.Add(m_text); m_stText = stTextFactory.Create(); m_text.ContentsOA = m_stText; m_para0 = m_stText.AddNewTextPara(null); m_para0.Contents = TsStringUtils.MakeString("Xxxhope xxxthis xxxwill xxxdo. xxxI xxxhope.", Cache.DefaultVernWs); m_para1 = m_stText.AddNewTextPara(null); m_para1.Contents = TsStringUtils.MakeString("Xxxcertain xxxto xxxcatch xxxa xxxfrog. xxxCertainly xxxcan xxxon xxxLake xxxMonroe.", Cache.DefaultVernWs); using (ParagraphParser pp = new ParagraphParser(Cache)) { foreach (IStTxtPara para in m_stText.ParagraphsOS) { if (para.ParseIsCurrent) { continue; } pp.Parse(para); } } m_expectedOccurrences = new List <AnalysisOccurrence>(); foreach (IStTxtPara para in m_stText.ParagraphsOS) { foreach (var seg in para.SegmentsOS) { for (int i = 0; i < seg.AnalysesRS.Count; i++) { m_expectedOccurrences.Add(new AnalysisOccurrence(seg, i)); } } } m_expectedOccurrencesPara0 = new List <AnalysisOccurrence>(); foreach (var seg in m_para0.SegmentsOS) { for (int i = 0; i < seg.AnalysesRS.Count; i++) { m_expectedOccurrencesPara0.Add(new AnalysisOccurrence(seg, i)); } } }
public override void Initialize() { CheckDisposed(); base.Initialize(); m_text1 = Cache.LangProject.TextsOC.Add(new Text()); m_text1.ContentsOA = new StText(); m_text1.ContentsOA.ParagraphsOS.Append(new StTxtPara()); (m_text1.ContentsOA.ParagraphsOS[0] as StTxtPara).Contents.UnderlyingTsString = StringUtils.MakeTss("xxxa xxxb xxxc xxxd xxxe, xxxa xxxb.", Cache.DefaultVernWs); bool fDidParse; ParagraphParser.ParseText(m_text1.ContentsOA, new NullProgressState(), out fDidParse); InterlinLineChoices lineChoices = InterlinLineChoices.DefaultChoices(0, Cache.DefaultAnalWs, Cache.LangProject, InterlinLineChoices.InterlinMode.GlossAddWordsToLexicon); m_sandbox = new SandboxForTests(Cache, lineChoices); }
private void ParseUnparsedParagraphs() { ConcDecorator concDecorator = ConcDecorator; IStTxtPara[] needsParsing = concDecorator.InterestingTexts.SelectMany(txt => txt.ParagraphsOS).Cast <IStTxtPara>().Where(para => !para.ParseIsCurrent).ToArray(); if (needsParsing.Length > 0) { NonUndoableUnitOfWorkHelper.DoSomehow(m_cache.ActionHandlerAccessor, () => { foreach (IStTxtPara para in needsParsing) { ParagraphParser.ParseParagraph(para); } }); } }
/// <summary> /// Break an occurrence that is a phrase into its constituent wordforms. /// </summary> public void BreakPhrase() { using (var pp = new ParagraphParser(Paragraph)) { // This is a new paragraph parser, and we haven't set up any pre-existing analyses, so it doesn't matter // what we pass for cWfAnalysisPrev. IList <IAnalysis> wordforms = pp.CollectSegmentForms(GetMyBeginOffsetInPara(), GetMyEndOffsetInPara(), 0, false); if (wordforms.Count > 1) { var oldWordform = Analysis.Wordform; Segment.AnalysesRS.Replace(Index, 1, wordforms.Cast <ICmObject>()); // Enhance JohnT: for this sort of automatic deletion, I wonder whether we should make // stronger checks, such as that it has no analysis or glosses? DeleteWordformIfPossible(oldWordform); } } }
private void openFileDialog1_FileOk(object sender, CancelEventArgs e) { StreamReader sr = new StreamReader(openFileDialog1.FileName); string WallOfText = sr.ReadToEnd(); var pp = new ParagraphParser(WallOfText); foreach (var item in pp) { List <TreeNode> ltn = new List <TreeNode>(); foreach (var item2 in item.Value) { ltn.Add(new TreeNode(item2)); } TreeNode tn = new TreeNode(item.Key, ltn.ToArray()); treeView1.Nodes.Add(tn); } }
private void ParseTestText() { // Seg: 0 1 2 // Index:0 1 2 3 0 1 2 3 0 1 2 // xxxpus xxxyalola xxxnihimbilira. xxxnihimbilira xxxpus xxxyalola. xxxhesyla xxxnihimbilira. using (var pp = new ParagraphParser(Cache)) { pp.Parse(m_para1); } var coords = new int[8, 2] { { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 }, { 1, 2 }, { 2, 0 }, { 2, 1 } }; m_occurrences = new AnalysisOccurrence[8]; for (int i = 0; i < 8; i++) { m_occurrences[i] = new AnalysisOccurrence(m_para1.SegmentsOS[coords[i, 0]], coords[i, 1]); } }
private LCModel.IText MakeText(string guid, string para1Content) { var sl = Cache.ServiceLocator; var wsf = Cache.WritingSystemFactory; var text = sl.GetInstance <ITextFactory>().Create(Cache, new Guid(guid)); var sttext1 = sl.GetInstance <IStTextFactory>().Create(); text.ContentsOA = sttext1; var para1_1 = sl.GetInstance <IStTxtParaFactory>().Create(); sttext1.ParagraphsOS.Add(para1_1); var para1_1Contents = TsStringUtils.MakeString(para1Content, wsf.get_Engine("en").Handle); para1_1.Contents = para1_1Contents; ParagraphParser.ParseText(sttext1); return(text); }
private void ParseTestParagraphWithSpecificContent(IStTxtPara paraToParse) { using (var pp = new ParagraphParser(Cache)) { pp.Parse(paraToParse); } GlossParagraph(paraToParse); var temp = new List <AnalysisOccurrence>(); foreach (var seg in paraToParse.SegmentsOS) { var formMax = seg.AnalysesRS.Count; for (var i = 0; i < formMax; i++) { temp.Add(new AnalysisOccurrence(seg, i)); } } m_allOccurrences[paraToParse] = temp.ToArray(); }
public void ParagraphParser_BasicTest(bool trailingLines) { List <string> input = new List <string>(); input.Add("This is the first sentence of the first paragraph."); input.Add("Each sentence is on one line."); input.Add("There is a blank line between paragraphs."); input.Add(""); input.Add("Some paragraphs will be longer than others."); input.Add("Paragraphs with more sentences will receive higher weight scores."); input.Add(""); input.Add("There could be trailing lines at the end of the file, but they should be ignored."); if (trailingLines) { input.Add(""); input.Add(""); } List <Paragraph> paragraphs = ParagraphParser.ParseParagraphs(input.ToArray()); Assert.Equal(3, paragraphs.Count); Assert.Equal(3, paragraphs[0].Weight); Assert.Equal(2, paragraphs[1].Weight); Assert.Equal(1, paragraphs[2].Weight); Assert.Equal( "This is the first sentence of the first paragraph." + "Each sentence is on one line." + "There is a blank line between paragraphs.", paragraphs[0].ParagraphText); Assert.Equal( "Some paragraphs will be longer than others." + "Paragraphs with more sentences will receive higher weight scores.", paragraphs[1].ParagraphText); Assert.Equal( "There could be trailing lines at the end of the file, but they should be ignored.", paragraphs[2].ParagraphText); }
private IStTxtPara MakeSimpleParsedText() { var textFactory = Cache.ServiceLocator.GetInstance <ITextFactory>(); var stTextFactory = Cache.ServiceLocator.GetInstance <IStTextFactory>(); var text = textFactory.Create(); //Cache.LangProject.TextsOC.Add(text); var stText = stTextFactory.Create(); text.ContentsOA = stText; var para0 = stText.AddNewTextPara(null); para0.Contents = TsStringUtils.MakeString("the book is red. the pages in the book are the color of the paper.", Cache.DefaultVernWs); using (ParagraphParser pp = new ParagraphParser(Cache)) foreach (IStTxtPara para in stText.ParagraphsOS) { pp.Parse(para); } return(para0); }
private void ParseTestText() { // Seg: 0 1 2 // Index:0 1 2 3 0 1 2 3 0 1 2 // xxxpus xxxyalola xxxnihimbilira. xxxnihimbilira xxxpus xxxyalola. xxxhesyla xxxnihimbilira. using (var pp = new ParagraphParser(Cache)) { pp.Parse(m_para1); } var coords = new int[8, 2] { { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 }, { 1, 2 }, { 2, 0 }, { 2, 1 } }; m_occurrences = new AnalysisOccurrence[8]; for (int i = 0; i < 8; i++) m_occurrences[i] = new AnalysisOccurrence(m_para1.SegmentsOS[coords[i, 0]], coords[i, 1]); }