public static DigestOutputResult ExecuteDigestInprocWithLinguisticService( XDocument rdf, ICoreferenceInfo coreferenceInfo, ObjectAllocateMethod objectAllocateMethod) { #region [.check input.] rdf.ThrowIfNull("rdf"); #endregion #region [.process.] rdf = Algorithms.PreprocessRdf(rdf); var opinionMiningResult = OpinionMiningWcfService.ExecuteOpinionMiningInprocWithLinguisticService( rdf, false, objectAllocateMethod, coreferenceInfo); var inputParams = new DigestInputParams("[NEVER MIND]", InputTextFormat.LinguisticXml) { ExecuteTonalityMarking = true, ObjectAllocateMethod = objectAllocateMethod, }; var digestTuples = GenerateResult(opinionMiningResult, inputParams.ExecuteTonalityMarking, inputParams); #endregion #region [.result.] return(new DigestOutputResult(digestTuples, opinionMiningResult.OperateRulesNames)); #endregion }
public static bool IsContainsInSynonyms (this Essence essence, List <string> inquiriesSynonyms, ICoreferenceInfo coreferenceInfo) { if (inquiriesSynonyms == null) { return(true); } if (Algorithms.IsHomogeneous(essence)) { var elements = Algorithms.GetObjectsFromHomogeneous(essence.Entity, ObjectAllocateMethod.PhysicalOrJuridicalPersons); if (elements.Contains(essence.Entity)) //(elements.Count == 1) && (elements.First() == essence.Entity) ) { elements = Algorithms.GetObjectsFromHomogeneous(essence.Entity, ObjectAllocateMethod.FirstEntityWithTypePronoun); } var essenceItems = Algorithms.ConstructEssenceItems4Homogeneous(elements, coreferenceInfo); essence.SetEssenceItems4Homogeneous(essenceItems); } foreach (var ei in essence.EssenceItems) { if (ei.IsContainsInInquiriesSynonyms(inquiriesSynonyms)) { return(true); } } return(false); }
private LingvisticsResult GetResultFromRDF(XElement rdfXml, ICoreferenceInfo coreferenceInfo, LingvisticsResultOptions options, bool generateAllSubThemes) { var rdf = default(string); var themeList = default(ThemeItem[]); var linkList = default(LinkItem[]); if ((options & LingvisticsResultOptions.RDF) != LingvisticsResultOptions.None) { rdf = rdfXml.ToString(); } if ((options & LingvisticsResultOptions.ThemeList) == LingvisticsResultOptions.ThemeList) { if ((options & LingvisticsResultOptions.SemNet) == LingvisticsResultOptions.SemNet) { var sn = _LinguisticsKernelConroller.GetSemanticNetwork(rdfXml, coreferenceInfo, generateAllSubThemes); themeList = GetThemeList(sn.Item1); linkList = GetLinkList(sn.Item2); } else //LingvisticResultOptions.ThemeList { themeList = GetThemeList(rdfXml, generateAllSubThemes, _LinguisticsKernelConroller); } } return(new LingvisticsResult(options, rdf, themeList, linkList)); }
private static void ReprocessEssenceItems4Homogeneous (Essence essence, ICoreferenceInfo coreferenceInfo /*, ObjectAllocateMethod objectAllocateMethod*/) { if (Algorithms.IsHomogeneous(essence)) { var essenceItems = ConstructEssenceItems4Homogeneous(essence.Entity, coreferenceInfo /*, objectAllocateMethod*/); essence.SetEssenceItems4Homogeneous(essenceItems); } }
internal void TrySetValueWithCoreferenceInfo(ICoreferenceInfo coreferenceInfo) { if (Entity != null) { var tuple = coreferenceInfo.TryGetMaster(Entity); TrySetValueWithCoreferenceInfo(tuple); } }
private static DigestOutputResult CreateOpinionMiningWithTonalityResult( XElement rdf, ICoreferenceInfo coreferenceInfo, ObjectAllocateMethod objectAllocateMethod) { var xdoc = new XDocument(rdf); var result = DigestWcfService.ExecuteDigestInprocWithLinguisticService(xdoc, coreferenceInfo, objectAllocateMethod); return(result); }
private static TonalityMarkingOutputResult CreateTonalityResult( XElement rdf, ICoreferenceInfo coreferenceInfo, ObjectAllocateMethod objectAllocateMethod, TonalityMarkingInputParams4InProcess inputParams) { var xdoc = new XDocument(rdf); var result = TonalityMarkingWcfService.ExecuteTonalityMarkingInprocWithLinguisticService(xdoc, coreferenceInfo, objectAllocateMethod, inputParams); return(result); }
public static void ReprocessEssenceItems4Homogeneous (OpinionMiningTuple opinionMiningTuple, ICoreferenceInfo coreferenceInfo /*, ObjectAllocateMethod objectAllocateMethod*/) { if (!opinionMiningTuple.Subject.IsAuthor) { ReprocessEssenceItems4Homogeneous(opinionMiningTuple.Subject, coreferenceInfo /*, objectAllocateMethod*/); } if (opinionMiningTuple.HasObject) { ReprocessEssenceItems4Homogeneous(opinionMiningTuple.Object, coreferenceInfo /*, objectAllocateMethod*/); } }
/// <summary> /// Проверка, что заданная сущность является темой /// </summary> /// <param name="entity">сущность</param> /// <param name="coreferenceInfo">информация о корефернтности</param> /// <returns>результат проверки</returns> public static bool IsTheme(XElement entity, ICoreferenceInfo coreferenceInfo) { bool result = false; if (entity.Name.IsEqual(UnitTextType.ENTITY)) { string type = entity.Attribute(BlockAttribute.TYPE); bool isNotTheme = entity.IsContainAttribute(BlockAttribute.NOT_THEME); isNotTheme = isNotTheme || !Enum.GetNames(typeof(EntityType)).Contains(type); isNotTheme = isNotTheme || (type.IsEqual(EntityType.Pronoun) && !entity.IsContainAttribute(BlockAttribute.LINK)); isNotTheme = isNotTheme || type.IsEqual(EntityType.Adverb) || type.IsEqual(EntityType.Homogeneous); isNotTheme = isNotTheme || (entity.Name.IsEqual(UnitTextType.CET) && !type.IsEqual(EntityType.Conjunction)); string masterName = string.Empty; XElement testEntity = entity; if (!isNotTheme) { if (coreferenceInfo != null) { var master = coreferenceInfo.TryGetMaster(entity); if (master != null) { testEntity = master.Item2; masterName = master.Item1; } } if (type.IsEqual(EntityType.PersonName)) { bool isContainLastName = testEntity.IsContainAttribute(BlockAttribute.LN); bool isContainFirstName = testEntity.IsContainAttribute(BlockAttribute.FN); bool isContainMiddleName = testEntity.IsContainAttribute(BlockAttribute.MN); bool isFullName = isContainLastName && (isContainFirstName || isContainMiddleName); isNotTheme |= !(isFullName || testEntity.IsContainAttribute(BlockAttribute.PROF) || testEntity.IsContainAttribute(BlockAttribute.PRE)); } } string theme = GetThemeName(entity, masterName); isNotTheme |= theme.IsEmpty() || (theme.Length > 256); //result = result || (type.IsEqual(EntityType.Date) && (entity.Attribute(BlockAttribute.VALUE) == "0000-00-00/0000-00-00")); isNotTheme |= type.IsEqual(EntityType.Adjective) || type.IsEqual(EntityType.ShortAdjective) || type.IsEqual(EntityType.Comparative) || type.IsEqual(EntityType.Superlative); result = !isNotTheme; } return(result); }
private static ReadOnlyCollection <EssenceItem> ConstructEssenceItems4Homogeneous (XElement essenceElement, ICoreferenceInfo coreferenceInfo /*, ObjectAllocateMethod objectAllocateMethod*/) { //ГДЕ-ТО В "OM" КАК СУБЪЕКТЫ-ОБЪЕКТЫ ВЫДЕЛЯЮТСЯ ТОЛЬКО [OrgName's] !!!!!!!!! var elements = essenceElement.GetObjectsFromHomogeneous(ObjectAllocateMethod.PhysicalOrJuridicalPersons /*objectAllocateMethod*/); if (elements.Contains(essenceElement)) // (elements.Count == 1) && (elements.First() == essenceElement) ) { elements = essenceElement.GetObjectsFromHomogeneous(ObjectAllocateMethod.FirstVerbEntityWithRoleObj); } if (elements.Count == 0) { throw (new ArgumentNullException("GetObjectsFromHomogeneous() == 0")); } var lst = Algorithms.ConstructEssenceItems4Homogeneous(elements, coreferenceInfo); return(lst); }
/// <summary> /// построить семантическую сеть документа по xml /// </summary> /// <param name="xmlText">текст с выделенными сущностями и разбитый на предложения</param> /// <returns>сем. сеть в xml</returns> public Tuple <IEnumerable <ThemeItem>, IEnumerable <LinkItem> > BuildSemanticNetwork(XElement xmlText, ICoreferenceInfo coreferenceInfo, bool generateAllSubThemes) { _isAllSubThemes = generateAllSubThemes; _ThemeId = 1; _Links = new Dictionary <string, LinkItem>(); _Themes = new Dictionary <string, ThemeItem>(); _Entities = new Dictionary <string, XElement>(); _coreferenceInfo = coreferenceInfo; if (xmlText != null) { _Text = xmlText; foreach (XElement sentence in xmlText.Elements(UnitTextType.SENT.ToString())) { _sentenceLanguage = sentence.Attribute(BlockAttribute.LANG); SentenceCycle(sentence); } } //var result = _Themes.Values.Where(_ => _.Name.IndexOf("MOLECULE") >= 0).ToList(); return(new Tuple <IEnumerable <ThemeItem>, IEnumerable <LinkItem> >(_Themes.Values, _Links.Values)); }
private static IEnumerable <OpinionMiningTuple> ExecuteInternal( XDocument xdocument, InputParamsBase inputParams, ICoreferenceInfo coreferenceInfo) { var language = Config.ThemesManager[inputParams.ThemeType].LanguagesManager[inputParams.LanguageType]; int sentGlobalNumber = 0; int directAndIndirectSpeechGlobalNumber = 0; var opinionMiningTuples = new List <OpinionMiningTuple>(); var objectAllocateMethod = inputParams.ObjectAllocateMethod; var isi = new InquiriesSynonymsInfo(inputParams.InquiriesSynonyms); #region [.Direct-speech & Indirect-speech.] var sents = xdocument.GetSentences(language); foreach (var sent in sents) { #region [.check 'max-sent-count-in-text'.] if (Config.ResultLimit_MaxSentCountInText <= sentGlobalNumber) { LOG.Error("Превышено допустимое число предложений в тексте, будут обработанны только первые '" + Config.ResultLimit_MaxSentCountInText + "' предложений"); break; } #endregion #region [.check 'max-sent-length-without-space'.] var lengthWithoutSpace = sent.Value.LengthWithoutSpace(); if (Config.ResultLimit_MaxSentLengthWithoutSpace < lengthWithoutSpace) { LOG.Error("Превышена допустимая длина (в '" + Config.ResultLimit_MaxSentLengthWithoutSpace + "' символов) одного предложения: '" + lengthWithoutSpace + "', данное предложение не будет обработанно: \r\n" + sent.ToString()); continue; } #endregion var subjectObjectsTuples = language.Rules.Process(sent, ref directAndIndirectSpeechGlobalNumber, inputParams.ObjectAllocateMethod); //[some-concrete-subject] if (subjectObjectsTuples.AnyEx()) { #region foreach (var t in subjectObjectsTuples) { //[some-objects] are exists if (t.Objects.Any()) { #region var omts1 = from o in t.Objects from sd in t.Subjects from s in sd.SubjectEssences let fs = inputParams.InquiriesSynonyms.IsContainsInSynonyms(s, o, coreferenceInfo) where fs.HasValue select new OpinionMiningTuple ( s, o, sent, sentGlobalNumber, fs.Value, coreferenceInfo ); opinionMiningTuples.AddRange(omts1); #endregion } //not has [some-objects] else { #region var omts2 = from sd in t.Subjects from s in sd.SubjectEssences where s.IsContainsInSynonyms(inputParams.InquiriesSynonyms, coreferenceInfo) select new OpinionMiningTuple ( s, sent, sentGlobalNumber, FilterBySynonyms.Subject, coreferenceInfo ); opinionMiningTuples.AddRange(omts2); #endregion } //[Author-subject]-[subject-as-object] #region var omts3 = from sd in t.Subjects from s in sd.SubjectEssences where s.IsContainsInSynonyms(inputParams.InquiriesSynonyms, coreferenceInfo) select OpinionMiningTuple.Create4AuthorSubject ( s.ToObjectEssence(), sent.Copy().RemoveDirectAndIndirectSpeechBeginEndAttributes(), sentGlobalNumber, FilterBySynonyms.Object, coreferenceInfo ); opinionMiningTuples.AddRange(omts3); #endregion } #endregion } //[Author-subject]-[some-objects] else { #region var os = sent.TryAllocateObjects4AuthorSubject(objectAllocateMethod, isi); #region [.check 'max-object-in-one-sent'.] if (Config.ResultLimit_MaxObjectInOneSent < os.Count) { LOG.Error("Превышено допустимое число объектов в одном предложении: '" + os.Count + "', будут использоваться только первые '" + Config.ResultLimit_MaxObjectInOneSent + "' объектов: \r\n" + sent.ToString()); } #endregion var omts = from o in os.Take(Config.ResultLimit_MaxObjectInOneSent) select OpinionMiningTuple.Create4AuthorSubject ( o, sent, sentGlobalNumber, FilterBySynonyms.Object, coreferenceInfo ); opinionMiningTuples.AddRange(omts); #endregion } sentGlobalNumber++; } #endregion #region [.Reprocess EssenceItems 4 Homogeneous.] foreach (var omt in opinionMiningTuples) { Common.ReprocessEssenceItems4Homogeneous(omt, coreferenceInfo /*, inputParams.ObjectAllocateMethod*/); } #endregion return(opinionMiningTuples); }
public static FilterBySynonyms?IsContainsInSynonyms (this List <string> inquiriesSynonyms, SubjectEssence subject, ObjectEssence @object, ICoreferenceInfo coreferenceInfo) { if (subject.IsContainsInSynonyms(inquiriesSynonyms, coreferenceInfo)) { if (@object.IsContainsInSynonyms(inquiriesSynonyms, coreferenceInfo)) { return(FilterBySynonyms.SubjectAndObject); } return(FilterBySynonyms.Subject); } else if (@object.IsContainsInSynonyms(inquiriesSynonyms, coreferenceInfo)) { return(FilterBySynonyms.Object); } return(null); //(FilterBySynonyms.NoFiltration); }
public static OpinionMiningTuple Create4AuthorSubject(ObjectEssence @object, XElement sentence , int sentenceNumber, FilterBySynonyms filterBySynonyms, ICoreferenceInfo coreferenceInfo) { return(new OpinionMiningTuple(SubjectEssence.Author, @object, sentence , sentenceNumber, filterBySynonyms, coreferenceInfo)); }
private XElement Initialize(SubjectEssence subject, XElement sentence , int sentenceNumber, FilterBySynonyms filterBySynonyms, ICoreferenceInfo coreferenceInfo) { subject.ThrowIfNull("subject"); sentence.ThrowIfNull("sentence"); Subject = subject; Subject.TrySetValueWithCoreferenceInfo(coreferenceInfo); SentenceNumber = sentenceNumber; FilterBySynonyms = filterBySynonyms; var tempSentence = new XElement(sentence); #region [.Remove all ISSUBJECT-attribute from other than Subject.Entity elements.] if (!Subject.IsAuthor) { tempSentence.RemoveOtherSubjectAttributes(Subject.AnaphoriaAndEntityTuple.Entity, ObjectEntityEqualityComparer.Instance.Equals); } #endregion _Sentence = tempSentence; SentenceText = tempSentence.ToString(); return(tempSentence); }
public OpinionMiningTuple(SubjectEssence subject, ObjectEssence @object, XElement sentence , int sentenceNumber, FilterBySynonyms filterBySynonyms, ICoreferenceInfo coreferenceInfo) { var tempSentence = Initialize(subject, sentence, sentenceNumber, filterBySynonyms, coreferenceInfo); @object.ThrowIfNull("object"); #region [.coreference-info.] var tuple = coreferenceInfo.TryGetMaster(@object.Entity); #endregion #region [.Find Object-entity & mark him as-inquiry.] var objectEntityInCopy = tempSentence.DescendantsAndSelf().FirstOrDefault(d => ObjectEntityEqualityComparer.Instance.Equals(d, @object.AnaphoriaAndEntityTuple.Entity)); if (objectEntityInCopy == null) { throw (new ArgumentException("Object-Entity not found in his Sentence. " + ("object.Entity: " + @object.AnaphoriaAndEntityTuple.Entity.ToString().InSingleQuote() + ", sentence: " + sentence.ToString().InSingleQuote()).InBrackets())); } objectEntityInCopy.MarkAsInquiry(); var objectAnaphoriaInCopy = default(XElement); if (@object.AnaphoriaAndEntityTuple.Anaphoria != null) { objectAnaphoriaInCopy = tempSentence.DescendantsAndSelf().FirstOrDefault(d => ObjectEntityEqualityComparer.Instance.Equals(d, @object.AnaphoriaAndEntityTuple.Anaphoria)); if (objectAnaphoriaInCopy == null) { throw (new ArgumentException("Object-Anaphoria-Entity not found in his Sentence. " + ("object.Entity: " + @object.AnaphoriaAndEntityTuple.Anaphoria.ToString().InSingleQuote() + ", sentence: " + sentence.ToString().InSingleQuote()).InBrackets())); } } @object = new ObjectEssence(new SubjectAndAnaphoriaEntityTuple(objectEntityInCopy, objectAnaphoriaInCopy), @object.IsSubjectIndeed); SentenceText = tempSentence.ToString(); tempSentence = null; #endregion Object = @object; #region [.coreference-info.] Object.TrySetValueWithCoreferenceInfo(tuple); #endregion }
public OpinionMiningTuple(SubjectEssence subject, XElement sentence , int sentenceNumber, FilterBySynonyms filterBySynonyms, ICoreferenceInfo coreferenceInfo) { Initialize(subject, sentence, sentenceNumber, filterBySynonyms, coreferenceInfo); }
public static OpinionMiningOutputResult ExecuteOpinionMiningInprocWithLinguisticService( XDocument rdf, bool callPreprocessRdf, ObjectAllocateMethod objectAllocateMethod, ICoreferenceInfo coreferenceInfo) { #region [.check input.] rdf.ThrowIfNull("rdf"); if (!rdf.Root.HasElements) { throw (new ArgumentException("Input linguistics-RDF is empty.")); } #endregion #region [.Opinion-mining.] if (callPreprocessRdf) { rdf = Algorithms.PreprocessRdf(rdf); } var inputParams = new OpinionMiningInputParams("[NEVER MIND]", InputTextFormat.LinguisticXml) { ObjectAllocateMethod = objectAllocateMethod }; var opinionMiningTuples = ExecuteInternal(rdf, inputParams, coreferenceInfo); #endregion #region [.result.] var opinionMiningOutputResult = new OpinionMiningOutputResult(opinionMiningTuples, RuleBase.GetOperateRulesNamesDebugInfoOutput()); return(opinionMiningOutputResult); #endregion }