public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { var result = base.AnalyzeSubSentence(subSentence); result.ForAllHierarchy(FindPassiveSubject); return(result); }
/// <summary> /// Выделение подпредложений в заданном /// </summary> /// <param name="subSentence">подпредложение</param> /// <returns>список выделенных подпредложений</returns> protected List <SubSentence> SelectSubSentencesByContent(SubSentence subSentence) { List <SubSentence> result = new List <SubSentence>(); UnitTextBase first = null; StringBuilder builder = new StringBuilder(); foreach (UnitTextBase unit in subSentence.Units) { if (first == null) { first = unit; } if (IsSeparator(unit)) { AnalyzeText(builder.ToString(), result, first, unit.Next, false); first = unit; builder.Clear(); continue; } builder.Append(unit.Text); } AnalyzeText(builder.ToString(), result, first, null, first == subSentence.Units.First()); return(result); }
/// <summary> /// Поиск пассивного субъекта в подпредложении /// </summary> /// <param name="subSentence">подпредложение</param> private void FindPassiveSubject(SubSentence subSentence) { if (subSentence.Subject == null) { return; } /// косвенное дополнение Entity indirectObjective = null; /// глагол в пассивном залоге Entity passiveVerb = null; foreach (var unit in subSentence.Units) { if (unit.IsEntity && (unit != subSentence.Subject)) { Entity entity = (Entity)unit; if (entity.IsType(EntityType.Verb) && IsPassiveVerb(entity)) { passiveVerb = entity; } else if (entity.IsType(EntityType.OrgName) || entity.IsType(EntityType.OrgNoInfo) || IsDativeOrInstrumentalAnimationEntity(entity)) { indirectObjective = entity; } } } if (passiveVerb != null) { subSentence.IsPassiveSubject = indirectObjective != null; } }
/// <summary> /// Раскладка дочерних подпредложений по подпредложениям /// </summary> /// <param name="units">коллекция дочерних подпредложений</param> /// <param name="subSentences">коллекция подпредложений</param> public static void DecomposedSubSentences(this IEnumerable <SubSentence> collection, IList <SubSentence> subSentences) { int currentIndex = 0; SubSentence current = subSentences[currentIndex]; List <SubSentence> childList = new List <SubSentence>(); foreach (var child in collection) { if (child.IsChildSubSentence(subSentences, currentIndex)) { childList.Add(child); } else { current.AppendSubSentences(childList); childList.Clear(); childList.Add(child); current = child.GetParentSubSentence(subSentences, ref currentIndex); } } current.AppendSubSentences(childList); for (++currentIndex; currentIndex < subSentences.Count; ++currentIndex) { subSentences[currentIndex].AppendSubSentences(new SubSentence[] { }); } }
public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { var result = base.AnalyzeSubSentence(subSentence); result.ForAllHierarchy(SetRole); return(result); }
/// <summary> /// Получение вопросительного слова из подпредложения /// </summary> /// <param name="subSentence">подпредложение</param> /// <returns>вопросительное слово</returns> private Entity GetQuestionWord(SubSentence subSentence) { #region [.defense.] if (subSentence == null) { throw new ArgumentNullException("subSentence"); } #endregion Entity result = null; UnitTextBase unit = subSentence.Units.FirstOrDefault(_ => !_.IsEmptyText()); if ((unit != null) && unit.IsEntity) { Entity entity = (Entity)unit; if (entity.IsType(EntityType.Pretext)) { unit = unit.GetNonEmptyNext(); if (unit.IsEntity) { entity = (Entity)unit; } else { entity = null; } } if ((entity != null) && IsQuestionWord(entity)) { result = entity; } } return(result); }
/// <summary> /// Выделение контекстов для интервалов чисел из подпредложения /// </summary> /// <param name="subSentence">подпредложения</param> /// <param name="language">язык текста</param> /// <returns>контексты для интервалов чисел</returns> private TextRange[] SelectTextRangesFromHierarchy(SubSentence subSentence, string language) { List <TextRange> result = new List <TextRange>(); if (subSentence.SubTextInfo != null) { return(SelectTextRanges(subSentence.SubTextInfo, language).ToArray()); } else { foreach (var range in SelectTextRanges(subSentence, language)) { result.Add(range); } foreach (var child in subSentence.Children) { foreach (var range in SelectTextRangesFromHierarchy(child, language)) { string leftContext = subSentence.Units.Where(_ => _.PositionInfo.End <= child.StartPosition).GetTextWithSubText(); string rightContext = subSentence.Units.Where(_ => _.PositionInfo.Start > child.StartPosition).GetTextWithSubText(); if (child.SubTextInfo != null) { leftContext += child.SubTextInfo.OpenSymbol; rightContext = child.SubTextInfo.CloseSymbol + rightContext; } range.Range.StartPosition += child.StartPosition + child.ParentObject.StartPosition; range.Range.EndPosition += child.StartPosition + child.ParentObject.StartPosition; range.UpdateLeftContext(leftContext); range.UpdateRightContext(rightContext); result.Add(range); } } } return(result.ToArray()); }
/// <summary> /// Создание подпредложения /// </summary> /// <param name="begin">начало интервала юнитов</param> /// <param name="end">конец интервала юнитов</param> /// <returns>подпредложение</returns> private SubSentence CreateSubSentence(UnitTextBase begin, UnitTextBase end) { SubSentence result = new SubSentence(SubSentenceType, begin.PositionInfo.Start); result.SetSubSentenceUnits(begin, end); return(result); }
public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { var childSubSentences = SelectSubSentencesByContent(subSentence); FillSubSentence(subSentence, childSubSentences); return(base.AnalyzeSubSentence(subSentence)); }
private void Initialize(int startPosition) { SubSentence = new SubSentence(SubSentenceType.Default, startPosition); Participles = new List <Entity>(); Verbs = new List <Entity>(); Childs = new List <SubSentenceInfo>(); }
/// <summary> /// Присоединение заданной информации о подпредложении /// </summary> /// <param name="subSentenceInfo">информация о подпредложении</param> public void Append(SubSentenceInfo subSentenceInfo) { Participles.AddRange(subSentenceInfo.Participles); Verbs.AddRange(subSentenceInfo.Verbs); AddChildrenSubSentences(subSentenceInfo.Childs); SubSentence.SetUnits(SubSentence.Units.Union(subSentenceInfo.SubSentence.Units)); SubSentence.AppendSubSentences(subSentenceInfo.SubSentence.Children); }
/// <summary> /// Задание юнитам нового родительского подпредложения /// </summary> /// <param name="subSentence">родительское подпредложение</param> /// <param name="begin">начало интервала юнитов</param> /// <param name="end">конец интервала юнитов</param> public static void SetSubSentenceUnits(this SubSentence subSentence, UnitTextBase begin, UnitTextBase end) { UnitTextBase current = begin; while (current != end) { current.ParentObject = subSentence; current = current.Next; } }
/// <summary> /// Получение начальной позиции последнего юнита подпредложения /// </summary> /// <param name="subSentence">подпредложение</param> /// <returns>начальная позиция последнего юнита</returns> public static int GetLastUnitStartPosition(this SubSentence subSentence) { if (subSentence.Units.Any()) { return(subSentence.Units.Last().PositionInfo.Start); } else { return(subSentence.StartPosition); } }
/// <summary> /// Проверка, что текущее подпредложение является дочерним для заданного /// </summary> /// <param name="current">текущее подпредложение</param> /// <param name="parents">коллекция потенциальных родительских подпредложений</param> /// <param name="index">индекс заданного подпредложения</param> /// <returns>результат проверки</returns> private static bool IsChildSubSentence(this SubSentence current, IList <SubSentence> parents, int index) { if (parents[index].StartPosition <= current.StartPosition) { SubSentence next = (index < parents.Count - 1) ? next = parents[index + 1] : null; if ((next == null) || (current.StartPosition < next.StartPosition)) { return(true); } } return(false); }
/// <summary> /// Проведение поверхностного синтаксического разбора подпредложения /// </summary> /// <param name="subSentence">подпредложение</param> /// <returns>список подпредложений, образованных из заданного</returns> private SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { if (subSentence.SubTextInfo != null) { Analyze(subSentence.SubTextInfo); return(new SubSentence[] { subSentence }); } else { return(AnalyzeCurrentSubSentence(subSentence)); } }
/// <summary> /// Поиск родительского подпредложения для заданного дочернего подпредложения /// </summary> /// <param name="child">дочернее</param> /// <param name="parents">коллекция возможных родителей</param> /// <param name="startIndex">индекс, с которого начинать поиск</param> /// <returns>родительское подпредложение</returns> private static SubSentence GetParentSubSentence(this SubSentence child, IList <SubSentence> parents, ref int startIndex) { for (; startIndex < parents.Count; ++startIndex) { if (child.IsChildSubSentence(parents, startIndex)) { return(parents[startIndex]); } } #region [.defense.] throw new InvalidOperationException("Can't find parent subSentence"); #endregion }
private void InsertIntroductory(SubSentence introductory, UnitTextBase unit) { #region [.defense.] if (introductory.Type != SubSentenceType.Introductory) { throw new ArgumentException("subSentence isn't Introductory"); } #endregion if ((_hierarchy.CurrentSubSentence != null) && (_hierarchy.CurrentType == SubSentenceType.Participial)) { _hierarchy.CreateNewSubSentence(unit, true); } }
public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { var lastUnit = subSentence.ParentObject.SubsentsHierarchical.Last().Units.LastOrDefault(_ => !_.IsEmptyText()); if ((lastUnit != null) && !lastUnit.IsEntity && (lastUnit.Text.EndsWith("?") || lastUnit.Text.EndsWith("?!"))) { Entity questionWord = GetQuestionWord(subSentence); if (questionWord != null) { questionWord.Type.EntityType = EntityType.QuestionWord; } } return(base.AnalyzeSubSentence(subSentence)); }
/// <summary> /// Проверка, что заданное подпредложение является подпредложением одного из заданных типов /// если коллекция типов - null, то вернется true /// </summary> /// <param name="subSentence">подпредложение</param> /// <param name="types">коллекция типов</param> /// <returns>результат проверки</returns> private bool IsAnyType(SubSentence subSentence, params SubSentenceType[] types) { if (types == null) { return(true); } foreach (var type in types) { if (subSentence.Type == type) { return(true); } } return(false); }
/// <summary> /// Заполнение заданного подпредложения /// </summary> /// <param name="subSentence">родительское подпредложение</param> /// <param name="childSubSentences">дочерние подпредложения</param> protected void FillSubSentence(SubSentence subSentence, IEnumerable <SubSentence> childSubSentences) { IEnumerable <UnitTextBase> decomposingUnits = subSentence.Units; if (!subSentence.Units.Any(_ => _.ParentObject == subSentence)) { var unit = new UnmarkedText("", subSentence.Units.Last().PositionInfo.End) { ParentObject = subSentence }; decomposingUnits = decomposingUnits.Union(new UnitTextBase[] { unit }); } decomposingUnits.DecomposedUnits(childSubSentences.Union(new SubSentence[] { subSentence })); subSentence.AppendSubSentences(childSubSentences); }
public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { SyntaxAnalyzerBase analyzer; if ((subSentence.ParentObject.Language != null) && _syntaxAnalyzers.TryGetValue(subSentence.ParentObject.Language, out analyzer)) { return(analyzer.AnalyzeSubSentence(subSentence)); } else { return new SubSentence[] { subSentence } }; } #endregion }
/// <summary> /// Установка ролей в заданном подпредложении /// </summary> /// <param name="subSentence">подпредложение</param> private void SetRole(SubSentence subSentence) { foreach (UnitTextBase unit in subSentence.Units) { if (unit.IsEntity) { Entity entity = (Entity)unit; if (entity.IsSentenceMember()) { SetEntityRole(entity); } else if (entity.IsCompoundVerb()) { SetCompoundVerbRole(entity); } } } }
/// <summary> /// Получение текста из заданного подпредложения /// </summary> /// <param name="unitsInterval">коллекция юнитов</param> /// <returns>текст</returns> public static string GetText(this SubSentence subSentence) { StringBuilder result = new StringBuilder(); if (subSentence.SubTextInfo != null) { foreach (var sentence in subSentence.SubTextInfo.Sentences) { foreach (var childSubSentence in sentence.SubsentsHierarchical) { result.Append(childSubSentence.GetText()); } } } else { result.Append(subSentence.Units.GetTextWithSubText()); } return(result.ToString()); }
/// <summary> /// Выделение контекстов для интервалов чисел из подпредложения /// </summary> /// <param name="subSentence">подпредложения</param> /// <param name="language">язык текста</param> /// <returns>контексты для интервалов чисел</returns> private TextRange[] SelectTextRanges(SubSentence subSentence, string language) { List <TextRange> result = new List <TextRange>(); MarkedTextMap map = new MarkedTextMap(); var units = RegroupUnits(SelectFloatNumeric(subSentence.Units, language)); string coded = _coder.CodeUnits(units); #region [.range.] foreach (var range in SelectNumberRanges(coded)) { if (map.TryMarkText(range.StartPosition, range.EndPosition)) { result.Add(new TextRange(range, units, ContextLength)); } } #endregion #region [.numeric.] foreach (var unit in units) { if (!unit.IsEntity) { continue; } Entity entity = (Entity)unit; if (entity.IsNumber() && map.TryMarkText(entity.PositionInfo.Start, entity.PositionInfo.End)) { var range = Number.Create(entity); if (range != null) { result.Add(new TextRange(range, units, ContextLength)); } } } #endregion foreach (var textRange in result) { textRange.Range.StartPosition += subSentence.ParentObject.StartPosition; textRange.Range.EndPosition += subSentence.ParentObject.StartPosition; } return(result.ToArray()); }
/// <summary> /// Разрешение аббревиатур в коллекции подпредложений /// </summary> /// <param name="parentSubSentence">родительское подпредложение для коллекции</param> /// <param name="subSentences">коллекция подпредложений</param> private void Resolve(SubSentence parentSubSentence, IEnumerable <SubSentence> subSentences) { foreach (var subSentence in subSentences) { if ((subSentence.Type == SubSentenceType.ApproximatingForceps) && (parentSubSentence != null)) { var previousUnits = parentSubSentence.Units.Where(_ => _.PositionInfo.Start < subSentence.StartPosition).ToArray(); var potencialSource = previousUnits.LastOrDefault(_ => !(string.IsNullOrWhiteSpace(_.Text) || IsQuote(_.Text.Trim()))); Resolve(GetAbbreviationFromBrackets(subSentence), potencialSource); } if (subSentence.SubTextInfo != null) { Resolve(subSentence.SubTextInfo); } else { Resolve(subSentence, subSentence.Children); } } }
/// <summary> /// Проверка, что между заданной сущностью и подлежащим ее подпредложения есть глагол /// </summary> /// <param name="entity">сущность</param> /// <returns>результат проверки</returns> private bool IsVerbBetweenEntityAndSubject(Entity entity) { SubSentence subSentence = entity.ParentObject; UnitTextBase first = null; foreach (UnitTextBase unit in subSentence.Units) { if (unit == subSentence.Subject) { if (first == null) { first = unit; } else { return(false); } } else if (unit == entity) { if (first == null) { first = unit; } else { return(false); } } else if ((first != null) && unit.IsEntity) { Entity ent = (Entity)unit; if (ent.IsType(EntityType.Verb)) { return(true); } } } return(true); }
public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { var introductories = subSentence.Children.Where(_ => _.Type == SubSentenceType.Introductory).ToArray(); int introductoryIndex = 0; _hierarchy = new SubSentenceHierarchy(subSentence); foreach (var unit in subSentence.Units) { if (!unit.IsEmptyText()) { AnalyzeUnit(unit); } if ((introductoryIndex < introductories.Length) && (unit.PositionInfo.End == introductories[introductoryIndex].StartPosition)) { InsertIntroductory(introductories[introductoryIndex], unit); ++introductoryIndex; } } return(_hierarchy.GetHierarchy()); }
/// <summary> /// Получение аббревиатуры из подпредложения в скобках /// </summary> /// <param name="subSentence">подпредложение</param> /// <returns>аббревиатура</returns> private Entity GetAbbreviationFromBrackets(SubSentence subSentence) { #region [.defense.] if (subSentence.Type != SubSentenceType.ApproximatingForceps) { throw new InvalidOperationException("subSentence not ApproximatingForceps"); } #endregion Entity result = null; var firstSentence = subSentence.SubTextInfo.Sentences.First(); var firstSubSentence = firstSentence.SubsentsFlatten.First(); if ((subSentence.SubTextInfo.Sentences.Length == 1) && (firstSentence.SubsentsFlatten.Length == 1) && !firstSubSentence.Children.Any()) /// на разрешение попадают только те аббревиатуры, которые находятся одни в скобках (можно в кавычках) { foreach (var unit in firstSubSentence.Units) { if (unit.IsEntity) { Entity entity = (Entity)unit; if (IsAbbreviation(entity)) { result = entity; } } else if (string.IsNullOrWhiteSpace(unit.Text) || IsQuote(unit.Text.Trim())) { continue; } else { result = null; break; } } } return(result); }
public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { return(_component.AnalyzeSubSentence(subSentence)); }
public override SubSentence[] AnalyzeSubSentence(SubSentence subSentence) { subSentence.SetUnits(_transformer.Transform(subSentence.Units)); return(base.AnalyzeSubSentence(subSentence)); }