/// <summary> /// Pick between overlapping entities of different types. /// </summary> /// <param name="entities"></param> public void ResolveEntities(LucyEntitySet entities) { if (entities.Count > 1) { var workingEntities = new LucyEntitySet(entities); // merge entities which are overlapping. foreach (var entity in workingEntities) { foreach (var alternateEntity in workingEntities.Where(e => e.Type != entity.Type)) { if (entity.Contains(alternateEntity)) { entities.Remove(alternateEntity); } else if (alternateEntity.Contains(entity)) { entities.Remove(entity); } // if alternateEntity is bigger on both ends else if ((alternateEntity.Start < entity.Start && alternateEntity.End > entity.End) || // or overlaps on start (alternateEntity.Start <= entity.Start && alternateEntity.End >= entity.Start && alternateEntity.End <= entity.End) || // or overlaps on the end (alternateEntity.Start >= entity.Start && alternateEntity.Start < entity.End && alternateEntity.End >= entity.End)) { // then we need to pick which one to keep. if (entity.Score < alternateEntity.Score) { entities.Remove(entity); } else if (entity.Score > alternateEntity.Score) { entities.Remove(alternateEntity); } else if ((entity.End - entity.Start) > (alternateEntity.End - alternateEntity.Start)) { entities.Remove(alternateEntity); } else if ((entity.End - entity.Start) < (alternateEntity.End - alternateEntity.Start)) { entities.Remove(entity); } else { // Trace.WriteLine($"Identical scores for different entities:\n{entity}\n{alternateEntity}"); } } } } } }
/// <summary> /// merge any overlapping entities of the same type. /// </summary> /// <param name="entities"></param> public void MergeEntities(LucyEntitySet entities) { if (entities.Count > 1) { int count = 0; do { count = entities.Count; if (count > 1) { // get all types with more than one type foreach (var entitiesOfType in entities.GroupBy(e => e.Type).Where(g => g.Count() > 1)) { // merge entities which are overlapping. var removeEntities = new LucyEntitySet(); var newEntities = new LucyEntitySet(); foreach (var entity in entitiesOfType) { var tokenStart = this.GetFirstTokenEntity(entity.Start); var tokenNext = this.GetFirstTokenEntity(entity.End); // if it hasn't been merged already. if (!removeEntities.Contains(entity)) { // look to see if the alternative is contigious, has same type and resolution. foreach (var alternateEntity in entitiesOfType.Where(e => e != entity)) { // if alternate hasn't been merged already if (!removeEntities.Contains(alternateEntity)) { // if alternateEntity is bigger on both ends if (alternateEntity.Start < entity.Start && alternateEntity.End > entity.End) { // merge them removeEntities.Add(entity); removeEntities.Add(alternateEntity); newEntities.Add(Merge(entity, alternateEntity)); } // if offset overlapping at start or end else if ((alternateEntity.Start <= entity.Start && alternateEntity.End >= entity.Start && alternateEntity.End <= entity.End) || (alternateEntity.Start >= entity.Start && alternateEntity.Start < entity.End && alternateEntity.End >= entity.End)) { // merge them removeEntities.Add(entity); removeEntities.Add(alternateEntity); newEntities.Add(Merge(entity, alternateEntity)); } else if (entity.Resolution?.ToString() == alternateEntity.Resolution?.ToString()) { // if entity is next to alternateEntity var altTokenStart = this.GetFirstTokenEntity(alternateEntity.Start); var altTokenNext = this.GetFirstTokenEntity(alternateEntity.End); if (tokenNext == altTokenStart || altTokenNext == tokenStart) { removeEntities.Add(entity); removeEntities.Add(alternateEntity); newEntities.Add(Merge(entity, alternateEntity)); } } else { newEntities.Add(entity); } } } } } if (removeEntities.Any() || newEntities.Any()) { foreach (var entity in removeEntities) { entities.Remove(entity); } foreach (var entity in newEntities) { entities.Add(entity); } } } } }while (count != entities.Count); } }