private void Read(string file) { string dir = "/home/david/Projects/Gedcom.NET/Data/tests"; string gedcomFile = Path.Combine(dir,file); long start = DateTime.Now.Ticks; _reader = new GedcomRecordReader(); bool success = _reader.ReadGedcom(gedcomFile); long end = DateTime.Now.Ticks; System.Console.WriteLine("Read time: " + TimeSpan.FromTicks(end - start).TotalSeconds + " seconds"); NUnit.Framework.Assert.AreEqual(true, success, "Failed to read " + gedcomFile); _individuals = 0; _families = 0; NUnit.Framework.Assert.Greater(_reader.Database.Count,0,"No records read"); foreach (DictionaryEntry entry in _reader.Database) { GedcomRecord record = entry.Value as GedcomRecord; if (record.RecordType == GedcomRecordType.Individual) { _individuals ++; } else if (record.RecordType == GedcomRecordType.Family) { _families ++; } } System.Console.WriteLine(gedcomFile + " contains " + _individuals + " individuals"); }
private void Read(string file) { string dir = "/home/david/Projects/Gedcom.NET/Data/tests"; string gedcomFile = Path.Combine(dir,file); _reader = new GedcomRecordReader(); _reader.ReadGedcom(gedcomFile); }
private void Read(string file) { string dir = "/home/david/Projects/Gedcom.NET/Data/tests"; string gedcomFile = Path.Combine(dir,file); _reader = new GedcomRecordReader(); _reader.ReadGedcom(gedcomFile); _database = _reader.Database; NUnit.Framework.Assert.Greater(_reader.Database.Count,0,"No records read"); }
private void Read(string file) { string dir = "/home/david/Projects/Gedcom.NET/Data/tests"; string gedcomFile = Path.Combine(dir,file); _reader = new GedcomRecordReader(); _reader.ReadGedcom(gedcomFile); NUnit.Framework.Assert.Greater(_reader.Database.Count,0,"No records read"); _parsedDates = 0; _notParsedDates = 0; foreach (DictionaryEntry entry in _reader.Database) { GedcomRecord record = entry.Value as GedcomRecord; if (record.RecordType == GedcomRecordType.Individual) { GedcomIndividualRecord indi = (GedcomIndividualRecord)record; foreach (GedcomIndividualEvent ev in indi.Attributes) { DateCheck(ev.Date); } foreach (GedcomIndividualEvent ev in indi.Events) { DateCheck(ev.Date); } } else if (record.RecordType == GedcomRecordType.Family) { GedcomFamilyRecord fam = (GedcomFamilyRecord)record; foreach (GedcomFamilyEvent ev in fam.Events) { DateCheck(ev.Date); } } } System.Console.WriteLine(gedcomFile + ": parsed " + _parsedDates + "\t unparsed " + _notParsedDates); NUnit.Framework.Assert.AreEqual(0,_notParsedDates,"Unparsed Dates"); }
private void DumpXML(string file) { string dir = "/home/david/Projects/Gedcom.NET/Data/tests"; string gedcomFile = Path.Combine(dir,file); _reader = new GedcomRecordReader(); _reader.ReadGedcom(gedcomFile); GedcomXMLGenerator gen = new GedcomXMLGenerator(); gen.Database = _reader.Database; XmlDocument doc = gen.GenerateXML(); string xmlOutput = Path.Combine(dir, "XmlOutput"); string xmlFile = Path.Combine(xmlOutput, file + ".xml"); doc.Save(xmlFile); }
private void Write(string file) { string dir = "/home/david/Projects/Gedcom.NET/Data/tests"; string gedcomFile = Path.Combine(dir,file); string outputDir = Path.Combine(dir,"Output"); string expectedDir = Path.Combine(dir,"Expected"); GedcomRecordReader reader = new GedcomRecordReader(); reader.ReadGedcom(gedcomFile); NUnit.Framework.Assert.Greater(reader.Database.Count,0,"No records read"); _writer = new GedcomRecordWriter(); _writer.Test = true; _writer.Database = reader.Database; _writer.GedcomFile = Path.Combine(outputDir,file); _writer.ApplicationName = "Gedcom.NET"; _writer.ApplicationSystemID = "Gedcom.NET"; _writer.ApplicationVersion = "Test Suite"; _writer.Corporation = "David A Knight"; _writer.WriteGedcom(); string expectedOutput = Path.Combine(expectedDir,file); if (!File.Exists(expectedOutput)) { File.Copy(_writer.GedcomFile,expectedOutput); } string written = File.ReadAllText(_writer.GedcomFile); string expected = File.ReadAllText(expectedOutput); NUnit.Framework.Assert.IsTrue(written == expected, "Output differs from expected"); }
public static void Main(string[] args) { string searchUri = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=large&start=0&q=" + args[1] + "%20filetype:ged"; string dataFolder = args[0];// "C:\\Users\\Rwarthen\\Documents\\Family Tree Maker\\SueGedCom\\GreggMaryland"; //C:\Users\rwarthen\Documents\Family Tree Maker\SueGedCom\GreggLeukemia // urls resulting in non gedcom files, ignore them when parsing, // leave the files there so we don't attempt to download again string[] blacklist = { "www.blickle.org/BLICKLE.ged", // blank "www.louisianacajun.com/gaudet.ged", // html "www.suttonfamily.biz/ascendant.ged", // html; "www.wildensee.de/wildensee.ged", // blank "www.braess.de/Braess_a.ged", // completly broken file, duplicate xrefs "www.ferdinandus.com/Limited76.ged", // Kith and Kin Pro seems to completly screw up OCCU, or somehow allowed a newline to get into the name "www.scotchman.us/nichols.ged", // blank "www.douglasweb.net/index.ged", // blank "alohatown.com/Fergie.GED", // Mono bug, StreamReader.ReadLine broken "mydouglas.net/index.ged", // blank "www.volny.cz/rodokmen.ged", // duplicate xref S1 on SUBM and SOUR, can we handle this somehow? }; bool doSearch = true; if (!Directory.Exists(dataFolder)) { Directory.CreateDirectory(dataFolder); } string badFolder = Path.Combine(dataFolder, "Bad"); if (!Directory.Exists(badFolder)) { Directory.CreateDirectory(badFolder); } if (doSearch) { int start = 0; while (start < 1280) { WebClient client = new WebClient(); string searchResult = client.DownloadString(searchUri.Replace("start=0", string.Format("start={0}", start))); JavaScriptSerializer s = new JavaScriptSerializer(); GoogleSearchResponse results = s.Deserialize<GoogleSearchResponse>(searchResult); GoogleResponseData data = results.responseData; foreach (GoogleResult result in data.results) { Uri url = new Uri(result.url); string domainFolder = dataFolder;// Path.Combine(dataFolder, url.Authority); //if (!Directory.Exists(domainFolder)) //{ // Directory.CreateDirectory(domainFolder); //} string path = url.AbsolutePath; string filename = url.Authority + "_" + path.Substring(path.LastIndexOf('/') + 1); string localFilename = Path.Combine(domainFolder, filename); if (!File.Exists(localFilename) && !File.Exists(Path.Combine(badFolder, Path.GetFileName(localFilename)))) { Console.WriteLine("Downloading {0} to {1}", result.visibleUrl, filename); try { client.DownloadFile(url, localFilename); } catch { Console.WriteLine("Failed to download {1} from {0}", result.visibleUrl, filename); } } else { Console.WriteLine("Already downloaded {0}", result.visibleUrl); } } bool pageSet = false; foreach (GooglePage page in data.cursor.pages) { if (page.start > start) { start = page.start; pageSet = true; break; } } if (!pageSet) { start = int.MaxValue; } } } // *.*, should only be .ged files. GetFiles is case sensitive though // we don't want to miss out on .GED .Ged etc. so use *.* // handle filtering out others in the foreach with an EndsWith string[] files = Directory.GetFiles(dataFolder, "*.*", SearchOption.AllDirectories); Console.WriteLine("Reading {0} GEDCOM files", files.Length); GedcomRecordReader reader = new GedcomRecordReader(); foreach (string gedcomFile in files) { if (gedcomFile.EndsWith(".ged", StringComparison.CurrentCultureIgnoreCase)) { bool blacklisted = false; foreach (string b in blacklist) { if (gedcomFile.EndsWith(b)) { blacklisted = true; break; } } if (!blacklisted) { int expectedIndividuals = 0; int expectedFamilies = 0; Console.WriteLine("-------------------------------"); Console.WriteLine("Scanning: " + gedcomFile); using (StreamReader sr = new StreamReader(gedcomFile)) { string line = null; while ((line = sr.ReadLine()) != null) { if (Regex.Match(line, "^[0-9]+[ \t]@[^@]+@[ \t]INDI[ \t]*$").Success) { expectedIndividuals ++; } else if (Regex.Match(line, "^[0-9]+[ \t]@[^@]+@[ \t]FAM[ \t]*$").Success) { expectedFamilies ++; } } } Console.WriteLine("Reading: " + gedcomFile); if (!reader.ReadGedcom(gedcomFile)) { Console.WriteLine("\tFailed to read: " + gedcomFile); File.Move(gedcomFile, Path.Combine(badFolder, Path.GetFileName(gedcomFile))); } else { Console.WriteLine("\tRead: " + gedcomFile); Console.WriteLine("\t\tIndividuals: " + reader.Database.Individuals.Count + " Expected: " + expectedIndividuals); Console.WriteLine("\t\tFamilies: " + reader.Database.Families.Count + " Expected: " + expectedFamilies); //Debug.Assert(reader.Database.Individuals.Count == expectedIndividuals); //Debug.Assert(reader.Database.Families.Count == expectedFamilies); if (reader.Database.Individuals.Count != expectedIndividuals) { File.Move(gedcomFile, badFolder); } } } } } Console.WriteLine("-------------------------------"); Console.WriteLine("Done!"); }