public NotenizerNote(NotenizerSentence originalSentence) { _text = String.Empty; _noteParts = new List<NotePart>(); _originalSentence = originalSentence; _createdAt = DateTime.Now; _updatedAt = DateTime.Now; _structure = new NotenizerStructure(); }
/// <summary> /// Parses sentence. /// </summary> /// <param name="sentence"></param> /// <returns></returns> public virtual NotenizerNote Parse(NotenizerSentence sentence) { return null; }
/// <summary> /// Checks if sentence is parsable. /// </summary> /// <param name="sentence"></param> /// <returns></returns> public virtual bool IsParsableSentence(NotenizerSentence sentence) { return false; }
public NotePart(NotenizerSentence originalSentence) { _noteParticles = new List<NoteParticle>(); _originalSentence = originalSentence; InitializeStructure(originalSentence.Structure.DependencyWordsInSentenceCount()); }
/// <summary> /// Parses sentence. /// </summary> /// <param name="sentence"></param> /// <returns></returns> public override NotenizerNote Parse(NotenizerSentence sentence) { NotenizerNote note = new NotenizerNote(sentence); foreach (NotenizerDependency dependencyLoop in sentence.Structure.Dependencies) { if (dependencyLoop.Relation.IsNominalSubject() && !((note.Structure.CompressedDependencies.ContainsKey(GrammaticalConstants.NominalSubject) && note.Structure.CompressedDependencies[GrammaticalConstants.NominalSubject].Any(x => x.Key == dependencyLoop.Key)) || (note.Structure.CompressedDependencies.ContainsKey(GrammaticalConstants.NominalSubjectPassive) && note.Structure.CompressedDependencies[GrammaticalConstants.NominalSubjectPassive].Any(x => x.Key == dependencyLoop.Key)))) { NotePart notePart = new NotePart(sentence); NoteParticle nsubj = new NoteParticle(dependencyLoop, TokenType.Dependent); notePart.Add(nsubj); String pos = dependencyLoop.Governor.POS.Tag; if (POSConstants.NounLikePOS.Contains(pos)) { NotenizerDependency compound = sentence.Structure.GetDependencyByShortName( dependencyLoop, ComparisonType.DependentToGovernor, GrammaticalConstants.CompoudModifier); if (compound != null) { NoteParticle compoundObj = new NoteParticle(compound, TokenType.Dependent); notePart.Add(compoundObj); } NotenizerDependency aux = sentence.Structure.GetDependencyByShortName( dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.AuxModifier, GrammaticalConstants.AuxModifierPassive); if (aux != null) { NoteParticle auxObj = new NoteParticle(aux, TokenType.Dependent); notePart.Add(auxObj); } NotenizerDependency cop = sentence.Structure.GetDependencyByShortName(dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.Copula); if (cop != null) { NoteParticle copObj = new NoteParticle(cop, TokenType.Dependent); notePart.Add(copObj); } List<NotenizerDependency> conjuctions = sentence.Structure.GetDependenciesByShortName( dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.Conjuction); String specific = String.Empty; if (conjuctions != null && conjuctions.Count > 0) { List<NotenizerDependency> filteredConjs = FilterByPOS(conjuctions, POSConstants.ConjustionPOS); foreach (NotenizerDependency filteredConjLoop in filteredConjs) { NotenizerDependency cc = sentence.Structure.GetDependencyByShortName(dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.CoordinatingConjuction); if (cc.Dependent.Word == filteredConjLoop.Relation.Specific && sentence.Structure.DependencyIndex(filteredConjLoop) > sentence.Structure.DependencyIndex(cc)) { NoteParticle ccObj = new NoteParticle(cc, TokenType.Dependent); NoteParticle filteredConjObj = new NoteParticle(filteredConjLoop, TokenType.Dependent); notePart.Add(ccObj); notePart.Add(filteredConjObj); } } } // <== NMODS ==> List<NotenizerDependency> nmodsList = sentence.Structure.GetDependenciesByShortName( dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.NominalModifier); if (nmodsList != null && nmodsList.Count > 0) { NotenizerDependency first = nmodsList.First(); NotenizerDependency neg = sentence.Structure.GetDependencyByShortName( first, ComparisonType.DependentToGovernor, GrammaticalConstants.NegationModifier); if (neg == null) { NoteParticle firstObj = new NoteParticle(first.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + first.Dependent.Word, first, TokenType.Dependent); notePart.Add(firstObj); } else { NoteParticle negObj = new NoteParticle(neg, TokenType.Dependent); NoteParticle firstObj = new NoteParticle(first.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + first.Dependent.Word, first, TokenType.Dependent); notePart.Add(negObj); notePart.Add(firstObj); } // second nmod depending on first one NotenizerDependency nmodSecond = sentence.Structure.GetDependencyByShortName( first, ComparisonType.DependentToGovernor, GrammaticalConstants.NominalModifier); if (nmodSecond != null) { neg = sentence.Structure.GetDependencyByShortName( first, ComparisonType.GovernorToGovernor, GrammaticalConstants.NegationModifier); if (neg == null) { NoteParticle secondObj = new NoteParticle(nmodSecond.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + nmodSecond.Dependent.Word, nmodSecond, TokenType.Dependent); notePart.Add(secondObj); } else { NoteParticle negObj = new NoteParticle(neg, TokenType.Dependent); NoteParticle secondObj = new NoteParticle(nmodSecond.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + nmodSecond.Dependent.Word, nmodSecond, TokenType.Dependent); notePart.Add(negObj); notePart.Add(secondObj); } } } else { // <== AMODS ==> NotenizerDependency amod1 = sentence.Structure.GetDependencyByShortName(dependencyLoop, ComparisonType.DependentToGovernor, GrammaticalConstants.AdjectivalModifier); // <== AMODS ==> NotenizerDependency amod2 = sentence.Structure.GetDependencyByShortName(dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.AdjectivalModifier); if (amod1 != null || amod2 != null) { if (amod1 != null) { NoteParticle amod1Obj = new NoteParticle(amod1, TokenType.Dependent); notePart.Add(amod1Obj); } if (amod2 != null) { NoteParticle amod2Obj = new NoteParticle(amod2, TokenType.Dependent); notePart.Add(amod2Obj); } } else { // <== NUMMODS ==> NotenizerDependency nummod1 = sentence.Structure.GetDependencyByShortName(dependencyLoop, ComparisonType.DependentToGovernor, GrammaticalConstants.NumericModifier); // <== NUMMODS ==> NotenizerDependency nummod2 = sentence.Structure.GetDependencyByShortName(dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.NumericModifier); if (nummod1 != null) { NoteParticle nummod1Obj = new NoteParticle(nummod1, TokenType.Dependent); notePart.Add(nummod1Obj); } if (nummod2 != null) { NoteParticle nummod2Obj = new NoteParticle(nummod2, TokenType.Dependent); notePart.Add(nummod2Obj); } } } NoteParticle governorObj = new NoteParticle(dependencyLoop, TokenType.Governor); notePart.Add(governorObj); } else if (POSConstants.VerbLikePOS.Contains(pos)) { NoteParticle gov = new NoteParticle(dependencyLoop, TokenType.Governor); notePart.Add(gov); NotenizerDependency dobj = sentence.Structure.GetDependencyByShortName(dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.DirectObject); if (dobj != null) { NoteParticle dobjObj = new NoteParticle(dobj, TokenType.Dependent); notePart.Add(dobjObj); NotenizerDependency neg = sentence.Structure.GetDependencyByShortName(dobj, ComparisonType.DependentToGovernor, GrammaticalConstants.NegationModifier); if (neg != null) { NoteParticle negObj = new NoteParticle(neg, TokenType.Dependent); notePart.Add(negObj); } } NotenizerDependency aux = sentence.Structure.GetDependencyByShortName( dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.AuxModifier, GrammaticalConstants.AuxModifierPassive); if (aux != null) { NoteParticle auxObj = new NoteParticle(aux, TokenType.Dependent); notePart.Add(auxObj); } // <== NMODS ==> List<NotenizerDependency> nmodsList = sentence.Structure.GetDependenciesByShortName( dependencyLoop, ComparisonType.GovernorToGovernor, GrammaticalConstants.NominalModifier); if (nmodsList != null && nmodsList.Count > 0) { NotenizerDependency first = nmodsList.First(); NotenizerDependency neg = sentence.Structure.GetDependencyByShortName(first, ComparisonType.DependentToGovernor, GrammaticalConstants.NegationModifier); if (neg == null) { NoteParticle firstObj = new NoteParticle(first.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + first.Dependent.Word, first, TokenType.Dependent); notePart.Add(firstObj); } else { NoteParticle negObj = new NoteParticle(neg, TokenType.Dependent); NoteParticle firstObj = new NoteParticle(first.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + first.Dependent.Word, first, TokenType.Dependent); notePart.Add(firstObj); notePart.Add(negObj); } // second nmod depending on first one NotenizerDependency nmodSecond = sentence.Structure.GetDependencyByShortName(first, ComparisonType.DependentToGovernor, GrammaticalConstants.NominalModifier); if (nmodSecond != null) { neg = sentence.Structure.GetDependencyByShortName(first, ComparisonType.GovernorToGovernor, GrammaticalConstants.NegationModifier); if (neg == null) { NoteParticle secondObj = new NoteParticle(nmodSecond.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + nmodSecond.Dependent.Word, nmodSecond, TokenType.Dependent); notePart.Add(secondObj); } else { NoteParticle negObj = new NoteParticle(neg, TokenType.Dependent); NoteParticle secondObj = new NoteParticle(nmodSecond.Relation.AdjustedSpecific + NotenizerConstants.WordDelimeter + nmodSecond.Dependent.Word, nmodSecond, TokenType.Dependent); notePart.Add(secondObj); notePart.Add(negObj); } } } } note.Add(notePart); } } return note; }
/// <summary> /// Gets rule for sentence /// </summary> /// <param name="sentence">Sentence to get rule for</param> /// <param name="matchedNote">Out matched note of sentence</param> /// <returns></returns> private NotenizerNoteRule GetRuleForSentence(NotenizerSentence sentence, out Note matchedNote) { Match match; Article article; Structure structure; Note matchedSentenceNote; NotenizerNoteRule matchedSentenceRule; NotenizerStructure matchedSentenceStructure; List<BsonDocument> sentencesWithSameStructure; matchedNote = null; structure = DocumentParser.GetHeighestMatch( sentence.Structure, DB.GetAll(DBConstants.StructuresCollectionName, DocumentCreator.CreateFilterByStructure(sentence)).Result, out match); if (structure == null) return null; matchedSentenceStructure = new NotenizerStructure(structure); sentencesWithSameStructure = DB.GetAll( DBConstants.SentencesCollectionName, DocumentCreator.CreateFilterById(DBConstants.StructureRefIdFieldName, matchedSentenceStructure.Structure.ID)).Result; nsNotenizerObjects.Sentence matchedSentence = null; if (sentencesWithSameStructure.Count > 0) { matchedSentence = DocumentParser.ParseSentence(sentencesWithSameStructure[0]); foreach (BsonDocument sentenceWithSameStructureLoop in sentencesWithSameStructure) { if (sentenceWithSameStructureLoop[DBConstants.TextFieldName].AsString.Trim() == sentence.Sentence.Text) { match.Value = 100.0; matchedSentence = DocumentParser.ParseSentence(sentenceWithSameStructureLoop); break; } } } if (matchedSentence == null) return null; article = DocumentParser.ParseArticle( DB.GetFirst( DBConstants.ArticlesCollectionName, DocumentCreator.CreateFilterById(sentence.Sentence.Article.ID)).Result); matchedSentenceNote = DocumentParser.ParseNote( DB.GetFirst( DBConstants.NotesCollectionName, DocumentCreator.CreateFilterById(matchedSentence.NoteID)).Result); matchedSentenceRule = DocumentParser.ParseRule( DB.GetFirst( DBConstants.RulesCollectionName, DocumentCreator.CreateFilterById(matchedSentence.RuleID)).Result); matchedSentenceRule.Structure = new NotenizerStructure( DocumentParser.ParseStructure( DB.GetFirst( DBConstants.StructuresCollectionName, DocumentCreator.CreateFilterById(matchedSentenceRule.StructureID)).Result)); matchedSentenceRule.Sentence = matchedSentence; matchedSentenceRule.Sentence.Article = article; matchedSentenceRule.Match = match; matchedNote = matchedSentenceNote; return matchedSentenceRule; }
/// <summary> /// Applies rule. /// Parses sentence by applied rule and part of note of original sentence. /// </summary> /// <param name="sentence">Sentence to apply rule to</param> /// <param name="rule">Rule for parsing to apply</param> /// <param name="notePart">Part of note</param> private void ApplyRule(NotenizerSentence sentence, NotenizerDependency rule, NotePart notePart) { NotenizerDependency dependency = null;// = sentence.FindDependency(rule); double match = 0.0; double currentMatch = 0.0; foreach (NotenizerDependency dependencyLoop in sentence.Structure.FindDependencies(rule)) { if (dependencyLoop == null) continue; if ((currentMatch = _comparsionManager.Compare(rule, dependencyLoop, sentence.Structure.Dependencies.Count)) > match) { match = currentMatch; dependency = dependencyLoop; } } if (dependency != null) { NoteParticle dependencyObj = new NoteParticle(dependency, rule.TokenType, rule.Position); notePart.Add(dependencyObj); } }
/// <summary> /// Parses the sentence. /// </summary> /// <param name="annotation"></param> /// <returns></returns> public List<NotenizerNote> Parse(Annotation annotation) { List<NotenizerNote> sentencesNoted = new List<NotenizerNote>(); List<NotenizerNote> notesToSave = new List<NotenizerNote>(); Article article = GetArticle(annotation.ToString().Trim()); // ================== REFACTORED PART HERE ====================== foreach (Annotation sentenceLoop in annotation.get(typeof(CoreAnnotations.SentencesAnnotation)) as ArrayList) { NotenizerSentence sentence = new NotenizerSentence(sentenceLoop, article); Note matchedNote; NotenizerNoteRule rule = GetRuleForSentence(sentence, out matchedNote); if (rule != null && rule.Structure.Dependencies != null && rule.Structure.Dependencies.Count > 0) { NotenizerNote parsedNote = ApplyRule(sentence, rule); parsedNote.Note = matchedNote; if (parsedNote.Note.AndRuleID != DBConstants.BsonNullValue) parsedNote.AndRule = GetAndRuleForSentence(rule, parsedNote.Note.AndRuleID); //Console.WriteLine("Parsed note: " + parsedNote.OriginalSentence + " ===> " + parsedNote.Text); sentencesNoted.Add(parsedNote); continue; } NotenizerNote note = _staticParser.Parse(sentence); notesToSave.Add(note); } // inserting into DB AFTER ALL sentences from article were processed // to avoid processed sentence to affect processing other sentences from article foreach (NotenizerNote sentenceNotedLoop in notesToSave) { // save rule's structure NotenizerNoteRule rule = sentenceNotedLoop.CreateRule(); sentenceNotedLoop.CreateStructure(); rule.Structure.Structure.ID = DB.InsertToCollection(DBConstants.StructuresCollectionName, DocumentCreator.CreateStructureDocument(rule)).Result; // save sentence's structure NotenizerStructure sentenceStructure = sentenceNotedLoop.OriginalSentence.Structure; sentenceStructure.Structure.ID = DB.InsertToCollection(DBConstants.StructuresCollectionName, DocumentCreator.CreateStructureDocument(sentenceStructure)).Result; // save rule rule.ID = DB.InsertToCollection(DBConstants.RulesCollectionName, DocumentCreator.CreateRuleDocument(rule)).Result; // save note Note note = sentenceNotedLoop.CreateNote(); note.ID = DB.InsertToCollection(DBConstants.NotesCollectionName, DocumentCreator.CreateNoteDocument( sentenceNotedLoop, rule.ID, String.Empty)).Result; // save sentence sentenceNotedLoop.OriginalSentence.Sentence.ID = DB.InsertToCollection(DBConstants.SentencesCollectionName, DocumentCreator.CreateSentenceDocument( sentenceNotedLoop.OriginalSentence, sentenceStructure.Structure.ID, article.ID, rule.ID, String.Empty, note.ID)).Result; Console.WriteLine("Parsed note: " + sentenceNotedLoop.OriginalSentence + " ===> " + sentenceNotedLoop.Text); sentencesNoted.Add(sentenceNotedLoop); } return sentencesNoted; }
/// <summary> /// Applies rule. /// Parses sentence by applied rule. /// </summary> /// <param name="sentence">Sentence to apply rule to</param> /// <param name="rule">Rule for parsing to apply</param> /// <returns></returns> public NotenizerNote ApplyRule(NotenizerSentence sentence, NotenizerRule rule) { NotenizerNote note = new NotenizerNote(sentence); NotePart notePart = new NotePart(sentence); foreach (NotenizerDependency ruleLoop in rule.Structure.Dependencies) { ApplyRule(sentence, ruleLoop, notePart); } note.Add(notePart); if (rule is NotenizerNoteRule) ApplyRule(note, rule as NotenizerNoteRule); else if (rule is NotenizerAndRule) ApplyRule(note, rule as NotenizerAndRule); //note.Note = rule.Note; note.Structure = rule.Structure; return note; }