/// <summary> /// Parse through all the given texts, even if they've been fully analyzed. /// Collect occurrences of words and cache all paragraph and wordform related virtual properties. /// </summary> /// <param name="cache"></param> /// <param name="hvosStText">list of hvos for StText objects</param> public static void ConcordTexts(FdoCache cache, int[] hvosStText, ProgressState progress) { if (progress == null) progress = new NullProgressState(); using (ParagraphParser pp = new ParagraphParser(cache)) { #if PROFILING long ticks = DateTime.Now.Ticks; #endif // Ensure all info about paragraphs of texts and contents of paragraphs is in cache and current. // Enhance JohnT: possibly performance would be helped, especially in cases where we have a lot // of archived Scripture versions, by restricting this to just the texts in hvosStText. cache.LoadAllOfAnOwningVectorProp((int)StText.StTextTags.kflidParagraphs, "StText"); cache.LoadAllOfAStringProp((int)StTxtPara.StTxtParaTags.kflidContents); #if PROFILING Debug.WriteLine("Time to end of loading text data = " + (DateTime.Now.Ticks - ticks)); #endif pp.m_hvosStText = hvosStText; //// Get a list of all the paragraphs. //List<int> targetParagraphs = new List<int>(); //foreach (IStText text in new FdoObjectSet<IStText>(cache, pp.m_hvosStText, true)) //{ // targetParagraphs.AddRange(text.ParagraphsOS.HvoArray); //} pp.RebuildingConcordanceWordforms = true; WordformInventory wfi = (cache.LangProject.WordformInventoryOA as WordformInventory); wfi.ResetConcordanceWordformsAndOccurrences(); #if PROFILING Debug.WriteLine("Time to end of reset occurrenes = " + (DateTime.Now.Ticks - ticks)); #endif ParagraphParser.ResetParseSessionDependentStaticData(); // Estimate the number of total number of milestones we'll set. // Enhance: we could construct a way to set percentage done based upon // number of texts and paragraphs in each text. if (progress is MilestoneProgressState) { MilestoneProgressState mp = progress as SIL.FieldWorks.Common.Controls.MilestoneProgressState; for (int i = 0; i < pp.m_hvosStText.Length; ++i) { AddParseTextMilestones(mp); } } // Parse each text to load our paragraph and wordform segment annotations. using (SuppressSubTasks suppressor = new SuppressSubTasks(cache, true)) { List<IStText> texts = new List<IStText>(new FdoObjectSet<IStText>(cache, pp.m_hvosStText, false)); // Anything like this is currently redundant, we loaded the contents of ALL paragraphs above. //List<IStText> parsedTexts = texts.FindAll(HasLastParsedTimestamp); //if (parsedTexts.Count != 0) //{ // // We actually have parsed some texts before...yet we have to again. Possibly another program changed // // the data. Reload it as efficiently as possible. // int[] parsedHvos = new int[parsedTexts.Count]; // for (int i = 0; i < parsedHvos.Length; i++) // parsedHvos[i] = parsedTexts[i].Hvo; // int index = 0; // string Hvos = DbOps.MakePartialIdList(ref index, parsedHvos); // string whereClause = ""; // if (index == parsedHvos.Length) // { // // If we can make a single where clause we'll do it; otherwise do them all // whereClause = " where Owner$ in (" + Hvos + ")"; // } // string sql = "select Owner$, Id, UpdStmp, Contents, Contents_Fmt from StTxtPara_ " + whereClause + " order by owner$, OwnOrd$"; // IDbColSpec dcs = DbColSpecClass.Create(); // dcs.Push((int)DbColType.koctBaseId, 0, 0, 0); // dcs.Push((int)DbColType.koctObjVecOwn, 1, (int)StText.StTextTags.kflidParagraphs, 0); // dcs.Push((int)DbColType.koctTimeStamp, 2, 0, 0); // dcs.Push((int)DbColType.koctString, 2, (int)StTxtPara.StTxtParaTags.kflidContents, 0); // dcs.Push((int)DbColType.koctFmt, 2, (int)StTxtPara.StTxtParaTags.kflidContents, 0); // cache.VwOleDbDaAccessor.Load(sql, dcs, 0, 0, null, false); //} // Need a separate loop for these, otherwise things get confused as we start to reuse // annotations in pp.Parse() and then re-encounter them in later attempts to salvage Pfics and segments. #if PROFILING Debug.WriteLine("Time to end of preliminaries = " + (DateTime.Now.Ticks - ticks)); #endif foreach (IStText text in texts) pp.SalvageDummyAnnotations(text); #if PROFILING Debug.WriteLine("Time to start of main parse loop = " + (DateTime.Now.Ticks - ticks)); #endif foreach (IStText text in texts) { pp.Parse(text, progress); } #if PROFILING Debug.WriteLine("Time to end of main parse loop = " + (DateTime.Now.Ticks - ticks)); #endif StText.RecordParseTimestamps(texts); pp.CleanupLeftoverAnnotations(progress); } //Debug.WriteLine("Time for whole ConcordTexts = " + (DateTime.Now.Ticks - ticks)); progress.SetMilestone(); progress.Breath(); #if PROFILING Debug.WriteLine("Parse required " + pp.m_cDummyAnnotations + " dummy annotations" + " but could only reuse " + pp.m_dummyAnnotationsToReuse.Count); Debug.WriteLine(" Parse created " + pp.m_cWficsMade + " Wfics, " + pp.m_cPficsMade + " Pfics, and " + pp.m_cSegmentsMade + " Segments"); Debug.WriteLine(" So far we made a total of " + s_cTotalDummiesMade + "; this parse making dummies took " + pp.m_cTicksMakingDummies); Debug.WriteLine(" This parse we reset " + pp.m_cTotalDummiesReset + " in a time of " + pp.m_cTicksResettingDummies); #endif } }