private void Run(string contents, string[] traitsWithMultiplicity) { contents = contents.Replace("GUID1", "guid='" + Guid.NewGuid() + "'"); contents = contents.Replace("GUID2", "guid='" + Guid.NewGuid() + "'"); contents = contents.Replace("GUID3", "guid='" + Guid.NewGuid() + "'"); contents = contents.Replace("GUID4", "guid='" + Guid.NewGuid() + "'"); using (var input = new TempLiftFile(contents, "0.13")) { using (var repo = new LiftLexEntryRepository(input.Path)) { var ws = HomographMerger.GuessPrimarLexicalFormWritingSystem(repo, _progress); HomographMerger.Merge(repo, ws, traitsWithMultiplicity, _progress); } _resultDom.Load(input.Path); //removing these tombstones simplifies our assertions, later foreach (XmlNode deletedEntry in _resultDom.SelectNodes("//entry[@dateDeleted]")) { deletedEntry.ParentNode.RemoveChild(deletedEntry); } var bakPathname = input.Path + ".bak"; if (File.Exists(bakPathname)) { File.Delete(bakPathname); } } }
public static void Run(LiftLexEntryRepository repo, IProgress progress) { var ids = new List <RepositoryId>(repo.GetAllItems()); for (int i = 0; i < ids.Count; i++) { if (progress.CancelRequested) { throw new OperationCanceledException("User cancelled"); } var entry = repo.GetItem(ids[i]); bool foundFirstCawl = false; List <LexSense> sensesToSplitOff = new List <LexSense>(); foreach (var sense in entry.Senses) { var cawl = sense.GetProperty <MultiText>("SILCAWL"); if (cawl == null) { continue; } if (foundFirstCawl) { sensesToSplitOff.Add(sense); continue; } foundFirstCawl = true; } foreach (var lexSense in sensesToSplitOff) { SpinSenseOffToItsOwnEntry(repo, lexSense, progress); } } }
public static string GuessPrimarLexicalFormWritingSystem(LiftLexEntryRepository repo, IProgress progress) { progress.WriteMessage("Looking at 1st 1000 entries to determine which Writing System to use for matching..."); var choices = new Dictionary <string, Counter>(); var ids = repo.GetAllItems(); for (int i = 0; i < 1000 && i < ids.Length; i++) { var entry = repo.GetItem(ids[i]); foreach (var languageForm in entry.LexicalForm.Forms) { Counter counter; if (choices.TryGetValue(languageForm.WritingSystemId, out counter)) { ++counter.count; } else { choices.Add(languageForm.WritingSystemId, new Counter(languageForm.WritingSystemId)); } } } if (choices.Count == 0) { progress.WriteError("Could not determine a primary writing system for matching entries."); return(null); } var z = choices.OrderByDescending(p => p.Value.count).FirstOrDefault(); progress.WriteMessage("Will use '{0}' for matching.", z.Value.Id); return(z.Value.Id); }
public static void Run(LiftLexEntryRepository repo, IProgress progress) { var ids = new List<RepositoryId>(repo.GetAllItems()); for (int i = 0; i < ids.Count; i++) { if (progress.CancelRequested) { throw new OperationCanceledException("User cancelled"); } var entry = repo.GetItem(ids[i]); bool foundFirstCawl = false; List<LexSense> sensesToSplitOff = new List<LexSense>(); foreach (var sense in entry.Senses) { var cawl = sense.GetProperty<MultiText>("SILCAWL"); if (cawl == null) continue; if(foundFirstCawl) { sensesToSplitOff.Add(sense); continue; } foundFirstCawl = true; } foreach (var lexSense in sensesToSplitOff) { SpinSenseOffToItsOwnEntry(repo, lexSense,progress); } } }
public void Setup() { _tempfolder = new TemporaryFolder(); string persistedFilePath = _tempfolder.GetTemporaryFile(); _repository = new LiftLexEntryRepository(persistedFilePath); }
public void Setup() { _temporaryFolder = new TemporaryFolder(); string filePath = _temporaryFolder.GetTemporaryFile(); _repository = new LiftLexEntryRepository(filePath); _headwordWritingSystem = new WritingSystemDefinition("th"); }
public TestEnvironment() { _temporaryFolder = new TemporaryFolder("LiftLexEntryRepositoryTests"); string filePath = _temporaryFolder.GetTemporaryFile(); _repository = new LiftLexEntryRepository(filePath); _headwordWritingSystem = new WritingSystemDefinition("th") {DefaultCollation = new IcuRulesCollationDefinition("standard")}; }
/// <summary> /// Note, this isn't very ambitious. The only thing the new entry will have is the lexeme form and the new sense, not any other traits/fields /// </summary> /// <param name="repo"> </param> /// <param name="sense"></param> private static void SpinSenseOffToItsOwnEntry(LiftLexEntryRepository repo, LexSense sense, IProgress progress) { var existingEntry = (LexEntry) sense.Parent; progress.WriteMessage("Splitting off {0} ({1}) into its own entry", existingEntry.LexicalForm.GetFirstAlternative(), sense.Definition.GetFirstAlternative()); LexEntry newEntry = repo.CreateItem(); newEntry.LexicalForm.MergeIn(existingEntry.LexicalForm); existingEntry.Senses.Remove(sense); newEntry.Senses.Add(sense); sense.Parent = newEntry; repo.SaveItem(existingEntry); repo.SaveItem(newEntry); }
public void NewEntry_ByEntry_TriggersModifiedEntryAdded() { using (var f = new TemporaryFolder("eventTests")) { using (var r = new LiftLexEntryRepository(f.GetPathForNewTempFile(true))) { r.AfterEntryModified += OnEvent; LexEntry entry = r.CreateItem(); r.SaveItem(entry); Assert.IsTrue(_gotEventCall); } } }
/// <summary> /// it can happen that within a single entry, you can have mergable senses. /// </summary> private static void MergeSensesWithinEntries(LiftLexEntryRepository repo, string[] traitsWithMultiplicity, IProgress progress) { var ids = new List <RepositoryId>(repo.GetAllItems()); foreach (var id in ids) { if (progress.CancelRequested) { throw new OperationCanceledException("User cancelled"); } var entry = repo.GetItem(id); var senses = entry.Senses.ToArray(); if (senses.Length < 2) { continue; } var sensesToRemove = new List <LexSense>(); foreach (var sense in entry.Senses) { if (sensesToRemove.Any(s => ReferenceEquals(s, sense))) { continue; } foreach (var otherSense in entry.Senses) { if (ReferenceEquals(otherSense, sense)) // Don't try and compare with ourself. { continue; } if (sensesToRemove.Any(s => ReferenceEquals(s, sense))) { continue; } if (!SenseMerger.TryMergeSenseWithSomeExistingSense(sense, otherSense, traitsWithMultiplicity, progress)) { continue; } sensesToRemove.Add(otherSense); } } foreach (var sense in sensesToRemove) { entry.Senses.Remove(sense); entry.IsDirty = true; } if (entry.IsDirty) { repo.SaveItem(entry); } } }
/// <summary> /// Note, this isn't very ambitious. The only thing the new entry will have is the lexeme form and the new sense, not any other traits/fields /// </summary> /// <param name="repo"> </param> /// <param name="sense"></param> private static void SpinSenseOffToItsOwnEntry(LiftLexEntryRepository repo, LexSense sense, IProgress progress) { var existingEntry = (LexEntry)sense.Parent; progress.WriteMessage("Splitting off {0} ({1}) into its own entry", existingEntry.LexicalForm.GetFirstAlternative(), sense.Definition.GetFirstAlternative()); LexEntry newEntry = repo.CreateItem(); newEntry.LexicalForm.MergeIn(existingEntry.LexicalForm); existingEntry.Senses.Remove(sense); newEntry.Senses.Add(sense); sense.Parent = newEntry; repo.SaveItem(existingEntry); repo.SaveItem(newEntry); }
public void DeleteEntry_ByEntry_TriggersAfterEntryDeleted() { using (TemporaryFolder f = new TemporaryFolder("eventTests")) { using (LiftLexEntryRepository r = new LiftLexEntryRepository(f.GetPathForNewTempFile(true))) { r.AfterEntryDeleted += OnEvent; LexEntry entry = r.CreateItem(); r.SaveItem(entry); r.DeleteItem(entry); Assert.IsTrue(_gotEventCall); } } }
/// <summary> /// it can happen that within a single entry, you can have mergable senses. /// </summary> private static void MergeSensesWithinEntries(LiftLexEntryRepository repo, string[] traitsWithMultiplicity, IProgress progress) { var ids = new List<RepositoryId>(repo.GetAllItems()); foreach (var id in ids) { if (progress.CancelRequested) { throw new OperationCanceledException("User cancelled"); } var entry = repo.GetItem(id); var senses = entry.Senses.ToArray(); if(senses.Length < 2) { continue; } var sensesToRemove = new List<LexSense>(); foreach (var sense in entry.Senses) { if (sensesToRemove.Any(s=>ReferenceEquals(s, sense))) continue; foreach (var otherSense in entry.Senses) { if (ReferenceEquals(otherSense, sense)) // Don't try and compare with ourself. continue; if (sensesToRemove.Any(s => ReferenceEquals(s, sense))) continue; if (!SenseMerger.TryMergeSenseWithSomeExistingSense(sense, otherSense, traitsWithMultiplicity, progress)) continue; sensesToRemove.Add(otherSense); } } foreach (var sense in sensesToRemove) { entry.Senses.Remove(sense); entry.IsDirty = true; } if (entry.IsDirty) { repo.SaveItem(entry); } } }
private void Run(string contents, Action test) { using (var input = new TempLiftFile(contents, "0.13")) { using (var repo = new LiftLexEntryRepository(input.Path)) { EntrySplitter.Run(repo, _progress); } _resultDom.Load(input.Path); var bakPathname = input.Path + ".bak"; if (File.Exists(bakPathname)) { File.Delete(bakPathname); } } test(); }
public void Setup() { _tempfolder = new TemporaryFolder("LiftLexEntryRepositoryCachingTests"); _tempFile = _tempfolder.GetNewTempFile(true); _repository = new LiftLexEntryRepository(_tempFile.Path); }
public override void SetUp() { _tempFolder = new TemporaryFolder("LiftLexEntryRepositoryDeleteAllItemsTransitionTests"); _persistedFilePath = _tempFolder.GetTemporaryFile(); DataMapperUnderTest = new LiftLexEntryRepository(_persistedFilePath); }
public override void SetUp() { _tempFile = new TempFile(); _persistedFilePath = _tempFile.Path; DataMapperUnderTest = new LiftLexEntryRepository(_persistedFilePath); }
private void Run(string contents, string[] traitsWithMultiplicity) { contents = contents.Replace("GUID1", "guid='" + Guid.NewGuid() + "'"); contents = contents.Replace("GUID2", "guid='" + Guid.NewGuid() + "'"); contents = contents.Replace("GUID3", "guid='" + Guid.NewGuid() + "'"); contents = contents.Replace("GUID4", "guid='" + Guid.NewGuid() + "'"); using (var input = new TempLiftFile(contents, "0.13")) { using (var repo = new LiftLexEntryRepository(input.Path)) { var ws = HomographMerger.GuessPrimaryLexicalFormWritingSystem(repo, _progress); HomographMerger.Merge(repo, ws, traitsWithMultiplicity, _progress); } _resultDom.Load(input.Path); //removing these tombstones simplifies our assertions, later foreach (XmlNode deletedEntry in _resultDom.SelectNodes("//entry[@dateDeleted]")) { deletedEntry.ParentNode.RemoveChild(deletedEntry); } var bakPathname = input.Path + ".bak"; if (File.Exists(bakPathname)) File.Delete(bakPathname); } }
void OnEvent(object sender, LiftLexEntryRepository.EntryEventArgs e) { _gotEventCall = true; }
private void Run(string contents, Action test) { using (var input = new TempLiftFile(contents, "0.13")) { using (var repo = new LiftLexEntryRepository(input.Path)) { EntrySplitter.Run(repo, _progress); } _resultDom.Load(input.Path); var bakPathname = input.Path + ".bak"; if (File.Exists(bakPathname)) File.Delete(bakPathname); } test(); }
public static void Merge(LiftLexEntryRepository repo, string writingSystemIdForMatching, string[] traitsWithMultiplicity, IProgress progress) { var alreadyProcessed = new List <RepositoryId>(); var ids = new List <RepositoryId>(repo.GetAllItems()); for (int i = 0; i < ids.Count; i++) { if (progress.CancelRequested) { throw new OperationCanceledException("User cancelled"); } if (alreadyProcessed.Contains(ids[i])) { continue; } alreadyProcessed.Add(ids[i]); var entry = repo.GetItem(ids[i]); var writingSystemForMatching = WritingSystemDefinition.Parse(writingSystemIdForMatching); var matches = repo.GetEntriesWithMatchingLexicalForm( entry.LexicalForm.GetExactAlternative(writingSystemIdForMatching), writingSystemForMatching ); //at this point we have entries which match along a single ws axis. We may or may not be able to merge them... var lexicalForm = entry.LexicalForm.GetExactAlternative(writingSystemForMatching.Id); if (matches.Count > 1) //>1 becuase each will match itself { progress.WriteMessageWithColor("gray", "Found {0} homograph(s) for {1}", matches.Count, lexicalForm); } var mergeCount = 0; var matchAlreadyProcessed = new List <RepositoryId>(); foreach (RecordToken <LexEntry> incomingMatch in matches) { if (incomingMatch.Id == ids[i]) { continue; // The entry will match itself at least this time. } if (matchAlreadyProcessed.Contains(incomingMatch.Id)) { continue; //we'll be here at least as each element matches itself } matchAlreadyProcessed.Add(incomingMatch.Id); if (EntryMerger.TryMergeEntries(entry, incomingMatch.RealObject, traitsWithMultiplicity, progress)) { mergeCount++; alreadyProcessed.Add(incomingMatch.Id); repo.DeleteItem(incomingMatch.RealObject); repo.SaveItem(entry); } } if (matches.Count > 1) { if (mergeCount == 0) { //progress.WriteMessageWithColor("gray", "Not merged."); } else { progress.WriteMessageWithColor("black", "Merged {0} homographs of {1}.", 1 + mergeCount, lexicalForm); } progress.WriteMessage(""); //blank line } } MergeSensesWithinEntries(repo, traitsWithMultiplicity, progress); }
/// <summary> /// Guess the writing system for the primary lexical form. /// </summary> public static string GuessPrimaryLexicalFormWritingSystem(LiftLexEntryRepository repo, IProgress progress) { progress.WriteMessage("Looking at 1st 1000 entries to determine which Writing System to use for matching..."); var choices = new Dictionary<string, Counter>(); RepositoryId[] ids = repo.GetAllItems(); for (int i = 0; i < 1000 && i < ids.Length; i++) { var entry = repo.GetItem(ids[i]); foreach (var languageForm in entry.LexicalForm.Forms) { Counter counter; if (choices.TryGetValue(languageForm.WritingSystemId, out counter)) { ++counter.Count; } else { choices.Add(languageForm.WritingSystemId, new Counter(languageForm.WritingSystemId)); } } } if (choices.Count == 0) { progress.WriteError("Could not determine a primary writing system for matching entries."); return null; } KeyValuePair<string, Counter> z = choices.OrderByDescending(p => p.Value.Count).FirstOrDefault(); progress.WriteMessage("Will use '{0}' for matching.", z.Value.Id); return z.Value.Id; }
public override void SetUp() { _tempFile = new TempFile(); _persistedFilePath = LiftFileInitializer.MakeFile(_tempFile.Path); DataMapperUnderTest = new LiftLexEntryRepository(_persistedFilePath); }
/// <summary> /// Merge homographs. /// </summary> public static void Merge(LiftLexEntryRepository repo, string writingSystemIdForMatching, string[] traitsWithMultiplicity, IProgress progress) { var alreadyProcessed = new List<RepositoryId>(); var ids = new List<RepositoryId>(repo.GetAllItems()); foreach (RepositoryId id in ids) { if (progress.CancelRequested) { throw new OperationCanceledException("User cancelled"); } if (alreadyProcessed.Contains(id)) continue; alreadyProcessed.Add(id); var entry = repo.GetItem(id); var writingSystemForMatching = new WritingSystemDefinition(writingSystemIdForMatching) {DefaultCollation = new IcuRulesCollationDefinition("standard")}; var matches = repo.GetEntriesWithMatchingLexicalForm( entry.LexicalForm.GetExactAlternative(writingSystemIdForMatching), writingSystemForMatching ); //at this point we have entries which match along a single ws axis. We may or may not be able to merge them... var lexicalForm = entry.LexicalForm.GetExactAlternative(writingSystemForMatching.LanguageTag); if (matches.Count > 1) //>1 becuase each will match itself { progress.WriteMessageWithColor("gray", "Found {0} homograph(s) for {1}", matches.Count, lexicalForm); } var mergeCount = 0; var matchAlreadyProcessed = new List<RepositoryId>(); foreach (RecordToken<LexEntry> incomingMatch in matches) { if (incomingMatch.Id == id) continue; // The entry will match itself at least this time. if (matchAlreadyProcessed.Contains(incomingMatch.Id)) continue; //we'll be here at least as each element matches itself matchAlreadyProcessed.Add(incomingMatch.Id); if (EntryMerger.TryMergeEntries(entry, incomingMatch.RealObject, traitsWithMultiplicity, progress)) { mergeCount++; alreadyProcessed.Add(incomingMatch.Id); repo.DeleteItem(incomingMatch.RealObject); repo.SaveItem(entry); } } if (matches.Count > 1) { if (mergeCount == 0) { //progress.WriteMessageWithColor("gray", "Not merged."); } else { progress.WriteMessageWithColor("black", "Merged {0} homographs of {1}.", 1 + mergeCount, lexicalForm); } progress.WriteMessage(""); //blank line } } MergeSensesWithinEntries(repo, traitsWithMultiplicity, progress); }
protected override void CreateNewRepositoryFromPersistedData() { DataMapperUnderTest.Dispose(); DataMapperUnderTest = new LiftLexEntryRepository(_persistedFilePath); }
public override void SetUp() { _tempFolder = new TemporaryFolder("LiftLexEntryRepositoryCreatedFromPersistedData"); _persistedFilePath = LiftFileInitializer.MakeFile(_tempFolder.GetTemporaryFile()); DataMapperUnderTest = new LiftLexEntryRepository(_persistedFilePath); }