public NotenizerRule(String id, NotenizerDependencies dependencies) { _id = id; _structure = new NotenizerStructure(dependencies); _dependencies = dependencies; _createdAt = DateTime.Now; _updatedAt = DateTime.Now; }
public NotenizerNote(NotenizerSentence originalSentence) { _text = String.Empty; _noteParts = new List<NotePart>(); _originalSentence = originalSentence; _createdAt = DateTime.Now; _updatedAt = DateTime.Now; _structure = new NotenizerStructure(); }
/// <summary> /// Gets structure of heighest match. /// </summary> /// <param name="structure"></param> /// <param name="persistedStructures"></param> /// <param name="m"></param> /// <returns></returns> public static Structure GetHeighestMatch(NotenizerStructure structure, List<BsonDocument> persistedStructures, out Match m) { Structure struc = null; Match match; Match heighestMatch = null; foreach (BsonDocument persistedStructureLoop in persistedStructures) { match = CalculateMatch(structure, persistedStructureLoop); if (heighestMatch == null || heighestMatch.Structure < match.Structure || (heighestMatch.Structure == match.Structure && heighestMatch.Content < match.Content)) { heighestMatch = match; struc = ParseStructure(persistedStructureLoop); } } m = heighestMatch; return struc; }
public NotenizerSentence(Annotation annotation, Article article) { _annotation = annotation; _sentence = new Sentence(this.ToString(), article); _structure = new NotenizerStructure(GetDepencencies(annotation)); }
/// <summary> /// Calculates the match between original sentence (from DB) and sentence that is being parsed. /// </summary> /// <param name="notenizerStructure"></param> /// <param name="structure"></param> /// <param name="persistedStructure"></param> /// <returns></returns> private static Match CalculateMatch(NotenizerStructure notenizerStructure, BsonDocument persistedStructure) { Double structureCompareCount = 5.0; Double contentCompareCount = 8.0; Double oneStructeCompareRating = NotenizerConstants.MaxMatchValue / structureCompareCount; Double oneContentCompareRating = NotenizerConstants.MaxMatchValue / contentCompareCount; Double oneStructureCompareTypeIterRating; Double oneContentComapareTypeIterRating; Double structureCounter = 0.0; Double contentCounter = 0.0; Double valueCounter = 0.0; Dictionary<String, Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>> structureDic = new Dictionary<string, Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>>(); int c = 0; oneStructureCompareTypeIterRating = oneStructeCompareRating / Double.Parse(persistedStructure[DBConstants.StructureDataFieldName].AsBsonArray.Count.ToString()); foreach (BsonDocument origDepDocLoop in persistedStructure[DBConstants.StructureDataFieldName].AsBsonArray) { String relationName = origDepDocLoop[DBConstants.RelationNameFieldName].AsString; if (notenizerStructure.CompressedDependencies[relationName].Count == origDepDocLoop[DBConstants.DependenciesFieldName].AsBsonArray.Count) { structureCounter += oneStructureCompareTypeIterRating; } c += origDepDocLoop[DBConstants.DependenciesFieldName].AsBsonArray.Count; } // Goes over all dependencies of original sentence // and gets the name of dependency (for example: compound) // and checks, if there is, in sentence that is parsed right now, // the dependency with same POS tag or same index at governor or dependent. oneStructureCompareTypeIterRating = oneStructeCompareRating / (double)(c); oneContentComapareTypeIterRating = oneContentCompareRating / (double)(c); foreach (BsonDocument origDepDocLoop in persistedStructure[DBConstants.StructureDataFieldName].AsBsonArray) { String relationName = origDepDocLoop[DBConstants.RelationNameFieldName].AsString; foreach (BsonDocument depLoop in origDepDocLoop[DBConstants.DependenciesFieldName].AsBsonArray) { /* ================= Structure match ================= */ if (!structureDic.ContainsKey(relationName)) structureDic.Add(relationName, new Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>()); Tuple<PartOfSpeechType, PartOfSpeechType> govPOSdepPOSKey = new Tuple<PartOfSpeechType, PartOfSpeechType>( PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString), PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)); if (!structureDic[relationName].ContainsKey(govPOSdepPOSKey)) structureDic[relationName].Add(govPOSdepPOSKey, 0); structureDic[relationName][govPOSdepPOSKey]++; if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Dependent.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)).FirstOrDefault() != null) { structureCounter += oneStructureCompareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Governor.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)).FirstOrDefault() != null) { structureCounter += oneStructureCompareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Dependent.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString) && x.Governor.POS.Type == PartOfSpeech.GetTypeFromTag(depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName].AsString)).FirstOrDefault() != null) { structureCounter += oneStructureCompareTypeIterRating; } /* ================= Content match ================= */ if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Dependent.Index == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Governor.Index == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x =>x.Governor.POS.Tag == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName] && x.Governor.Index == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Dependent.POS.Tag == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName] && x.Dependent.Index == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Dependent.POS.Tag == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.POSFieldName] && x.Dependent.Index == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.IndexFieldName] && x.Governor.POS.Tag == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.POSFieldName] && x.Governor.Index == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.IndexFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Dependent.NamedEntity.Value == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.NERFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Governor.NamedEntity.Value == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.NERFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } if (notenizerStructure.CompressedDependencies[relationName].Where( x => x.Dependent.NamedEntity.Value == depLoop[DBConstants.DependentFieldName].AsBsonDocument[DBConstants.NERFieldName] && x.Governor.NamedEntity.Value == depLoop[DBConstants.GovernorFieldName].AsBsonDocument[DBConstants.NERFieldName]).FirstOrDefault() != null) { contentCounter += oneContentComapareTypeIterRating; } } } int z = 0; int y = 0; foreach (KeyValuePair<String, Dictionary<Tuple<PartOfSpeechType, PartOfSpeechType>, int>> structDicKVPLoop in structureDic) { z += structDicKVPLoop.Value.Keys.Count; foreach (KeyValuePair<Tuple<PartOfSpeechType, PartOfSpeechType>, int> govPOSdepGOVCountKVPLoop in structDicKVPLoop.Value) { if (notenizerStructure.CompressedDependencies[structDicKVPLoop.Key].Where( x => x.Governor.POS.Type == govPOSdepGOVCountKVPLoop.Key.Item1 && x.Dependent.POS.Type == govPOSdepGOVCountKVPLoop.Key.Item2).Count() == govPOSdepGOVCountKVPLoop.Value) { y++; } } } structureCounter += oneStructeCompareRating / z * y; return new Match(structureCounter, contentCounter, valueCounter); }
/// <summary> /// Create structure of note. /// </summary> /// <returns></returns> public NotenizerStructure CreateStructure() { this._structure = new NotenizerStructure(this.Dependencies); this._rule.Structure = this._structure; return this._structure; }
/// <summary> /// Gets rule for sentence /// </summary> /// <param name="sentence">Sentence to get rule for</param> /// <param name="matchedNote">Out matched note of sentence</param> /// <returns></returns> private NotenizerNoteRule GetRuleForSentence(NotenizerSentence sentence, out Note matchedNote) { Match match; Article article; Structure structure; Note matchedSentenceNote; NotenizerNoteRule matchedSentenceRule; NotenizerStructure matchedSentenceStructure; List<BsonDocument> sentencesWithSameStructure; matchedNote = null; structure = DocumentParser.GetHeighestMatch( sentence.Structure, DB.GetAll(DBConstants.StructuresCollectionName, DocumentCreator.CreateFilterByStructure(sentence)).Result, out match); if (structure == null) return null; matchedSentenceStructure = new NotenizerStructure(structure); sentencesWithSameStructure = DB.GetAll( DBConstants.SentencesCollectionName, DocumentCreator.CreateFilterById(DBConstants.StructureRefIdFieldName, matchedSentenceStructure.Structure.ID)).Result; nsNotenizerObjects.Sentence matchedSentence = null; if (sentencesWithSameStructure.Count > 0) { matchedSentence = DocumentParser.ParseSentence(sentencesWithSameStructure[0]); foreach (BsonDocument sentenceWithSameStructureLoop in sentencesWithSameStructure) { if (sentenceWithSameStructureLoop[DBConstants.TextFieldName].AsString.Trim() == sentence.Sentence.Text) { match.Value = 100.0; matchedSentence = DocumentParser.ParseSentence(sentenceWithSameStructureLoop); break; } } } if (matchedSentence == null) return null; article = DocumentParser.ParseArticle( DB.GetFirst( DBConstants.ArticlesCollectionName, DocumentCreator.CreateFilterById(sentence.Sentence.Article.ID)).Result); matchedSentenceNote = DocumentParser.ParseNote( DB.GetFirst( DBConstants.NotesCollectionName, DocumentCreator.CreateFilterById(matchedSentence.NoteID)).Result); matchedSentenceRule = DocumentParser.ParseRule( DB.GetFirst( DBConstants.RulesCollectionName, DocumentCreator.CreateFilterById(matchedSentence.RuleID)).Result); matchedSentenceRule.Structure = new NotenizerStructure( DocumentParser.ParseStructure( DB.GetFirst( DBConstants.StructuresCollectionName, DocumentCreator.CreateFilterById(matchedSentenceRule.StructureID)).Result)); matchedSentenceRule.Sentence = matchedSentence; matchedSentenceRule.Sentence.Article = article; matchedSentenceRule.Match = match; matchedNote = matchedSentenceNote; return matchedSentenceRule; }