public void RecomputeSentenceAlignment() { var manualHighlights1 = Utils.GetAlignManual(AlignData, Text1, 0, Text1.Length, take2: false); var sectionAll1 = new DocSection(0, Text1.Length); var sectionsByManual1 = Utils.SplitSection(sectionAll1, manualHighlights1); var manualHighlights2 = Utils.GetAlignManual(AlignData, Text2, 0, Text2.Length, take2: true); var sectionAll2 = new DocSection(0, Text2.Length); var sectionsByManual2 = Utils.SplitSection(sectionAll2, manualHighlights2); sectionsByManual1.EnumerateWith(sectionsByManual2, (sectionM1, sectionM2, indexM) => { var regexHighlights1 = Utils.GetAlignRegexes(AlignData, Text1, sectionM1.StartIndex, sectionM1.Length, take2: false) .OrderBy(x => x.StartIndex) .ToArray(); var regexHighlights2 = Utils.GetAlignRegexes(AlignData, Text2, sectionM2.StartIndex, sectionM2.Length, take2: true) .OrderBy(x => x.StartIndex) .ToArray(); var regexPositions1 = regexHighlights1.Select(x => x.StartIndex).ToArray(); var regexPositions2 = regexHighlights2.Select(x => x.StartIndex).ToArray(); var regexAlignmentIndices = algoArrayAlign.GetBestAlignment(regexPositions1, regexPositions2, Globals.CharThreshRegexAlign); // TODO: use this data to align them store all sections `d21f` }); //var regexHighlights = Utils.GetAlignRegexes(AlignData, text, 0, text.Length, take2); }
public static IEnumerable <DocSection> SplitSection(DocSection sectionAll, IEnumerable <WordHighlight> manualHighlights) { var withinHighlights = manualHighlights .Where(x => x.StartIndex >= sectionAll.StartIndex && x.StartIndex <= sectionAll.StartIndex + sectionAll.Length) .OrderBy(x => x.StartIndex); if (!manualHighlights.Any()) { sectionAll.ToIEnumerable(); } var results = new List <DocSection>(); var indexPrev = sectionAll.StartIndex; foreach (var highlight in withinHighlights) { var indexCur = highlight.StartIndex; if (highlight.StartIndex <= indexPrev) { continue; } results.Add(new DocSection(indexPrev, indexCur - indexPrev)); indexPrev = indexCur; } if (indexPrev < sectionAll.StartIndex + sectionAll.Length) // last one! { results.Add(new DocSection(indexPrev, sectionAll.StartIndex + sectionAll.Length - indexPrev)); } return(results); }
/// <summary> /// Returns posB, B being this object /// </summary> public int GetRelativePosition(DocSection sectionA, int posA) { return((int)(StartIndex + (long)Length * posA / sectionA.Length)); }