private void Read(string file)
		{
			string dir = "/home/david/Projects/Gedcom.NET/Data/tests";
			string gedcomFile = Path.Combine(dir,file);

			long start = DateTime.Now.Ticks;
			_reader = new GedcomRecordReader();
			bool success = _reader.ReadGedcom(gedcomFile);
			long end = DateTime.Now.Ticks;
			
			System.Console.WriteLine("Read time: " + TimeSpan.FromTicks(end - start).TotalSeconds + " seconds");
			
			NUnit.Framework.Assert.AreEqual(true, success, "Failed to read " + gedcomFile);
			
			_individuals = 0;
			_families = 0;
			
			NUnit.Framework.Assert.Greater(_reader.Database.Count,0,"No records read");
						
			foreach (DictionaryEntry entry in _reader.Database)
			{
				GedcomRecord record = entry.Value as GedcomRecord;
							
				if (record.RecordType == GedcomRecordType.Individual)
				{
					_individuals ++;	
				}
				else if (record.RecordType == GedcomRecordType.Family)
				{
					_families ++;
				}
			}

			System.Console.WriteLine(gedcomFile + " contains " + _individuals + " individuals");
		}
		private void Read(string file)
		{
			string dir = "/home/david/Projects/Gedcom.NET/Data/tests";
			string gedcomFile = Path.Combine(dir,file);
			
			_reader = new GedcomRecordReader();
			_reader.ReadGedcom(gedcomFile);
		}
		private void Read(string file)
		{
			string dir = "/home/david/Projects/Gedcom.NET/Data/tests";
			string gedcomFile = Path.Combine(dir,file);
			
			_reader = new GedcomRecordReader();
			_reader.ReadGedcom(gedcomFile);
			_database = _reader.Database;
			
			NUnit.Framework.Assert.Greater(_reader.Database.Count,0,"No records read");

		}
		private void Read(string file)
		{
			string dir = "/home/david/Projects/Gedcom.NET/Data/tests";
			string gedcomFile = Path.Combine(dir,file);
			
			_reader = new GedcomRecordReader();
			_reader.ReadGedcom(gedcomFile);
						
			NUnit.Framework.Assert.Greater(_reader.Database.Count,0,"No records read");
		
			_parsedDates = 0;
			_notParsedDates = 0;
			foreach (DictionaryEntry entry in _reader.Database)
			{
				GedcomRecord record = entry.Value as GedcomRecord;
							
				if (record.RecordType == GedcomRecordType.Individual)
				{
					GedcomIndividualRecord indi = (GedcomIndividualRecord)record;
					
					foreach (GedcomIndividualEvent ev in indi.Attributes)
					{
						DateCheck(ev.Date);
					}
					
					foreach (GedcomIndividualEvent ev in indi.Events)
					{
						DateCheck(ev.Date);
					}
				}
				else if (record.RecordType == GedcomRecordType.Family)
				{
					GedcomFamilyRecord fam = (GedcomFamilyRecord)record;
					
					foreach (GedcomFamilyEvent ev in fam.Events)
					{
						DateCheck(ev.Date);
					}
				}
			}

			System.Console.WriteLine(gedcomFile + ": parsed " + _parsedDates + "\t unparsed " + _notParsedDates);
			
			NUnit.Framework.Assert.AreEqual(0,_notParsedDates,"Unparsed Dates");
		}
		private void DumpXML(string file)
		{
			string dir = "/home/david/Projects/Gedcom.NET/Data/tests";
			string gedcomFile = Path.Combine(dir,file);
			
			_reader = new GedcomRecordReader();
			_reader.ReadGedcom(gedcomFile);
			
			GedcomXMLGenerator gen = new GedcomXMLGenerator();
			gen.Database = _reader.Database;
			
			XmlDocument doc = gen.GenerateXML();
			
			string xmlOutput = Path.Combine(dir, "XmlOutput");
			string xmlFile = Path.Combine(xmlOutput, file + ".xml");
			
			doc.Save(xmlFile);
		}
		private void Write(string file)
		{
			string dir = "/home/david/Projects/Gedcom.NET/Data/tests";
			string gedcomFile = Path.Combine(dir,file);
			
			string outputDir = Path.Combine(dir,"Output");
			string expectedDir = Path.Combine(dir,"Expected");
			
			GedcomRecordReader reader = new GedcomRecordReader();
			reader.ReadGedcom(gedcomFile);
			
			NUnit.Framework.Assert.Greater(reader.Database.Count,0,"No records read");
			
			_writer = new GedcomRecordWriter();
			_writer.Test = true;
			_writer.Database = reader.Database;
			_writer.GedcomFile = Path.Combine(outputDir,file);

			_writer.ApplicationName = "Gedcom.NET";
			_writer.ApplicationSystemID = "Gedcom.NET";
			_writer.ApplicationVersion = "Test Suite";
			_writer.Corporation = "David A Knight";
			
			_writer.WriteGedcom();
			
			string expectedOutput = Path.Combine(expectedDir,file);
			if (!File.Exists(expectedOutput))
			{
				File.Copy(_writer.GedcomFile,expectedOutput);	
			}
			
			string written = File.ReadAllText(_writer.GedcomFile);
			string expected = File.ReadAllText(expectedOutput);
								
			NUnit.Framework.Assert.IsTrue(written == expected, "Output differs from expected");
			
		}
Exemple #7
0
		public static void Main(string[] args)
		{
			string searchUri = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=large&start=0&q=" + args[1] + "%20filetype:ged";
			string dataFolder = args[0];// "C:\\Users\\Rwarthen\\Documents\\Family Tree Maker\\SueGedCom\\GreggMaryland";

			//C:\Users\rwarthen\Documents\Family Tree Maker\SueGedCom\GreggLeukemia
			// urls resulting in non gedcom files, ignore them when parsing,
			// leave the files there so we don't attempt to download again
			string[] blacklist =
			{
				"www.blickle.org/BLICKLE.ged",  // blank
				"www.louisianacajun.com/gaudet.ged", // html
				"www.suttonfamily.biz/ascendant.ged", // html;
				"www.wildensee.de/wildensee.ged", // blank
				"www.braess.de/Braess_a.ged", // completly broken file, duplicate xrefs
				"www.ferdinandus.com/Limited76.ged", // Kith and Kin Pro seems to completly screw up OCCU, or somehow allowed a newline to get into the name
				"www.scotchman.us/nichols.ged", // blank
				"www.douglasweb.net/index.ged", // blank
				"alohatown.com/Fergie.GED", // Mono bug, StreamReader.ReadLine broken
				"mydouglas.net/index.ged", // blank
				"www.volny.cz/rodokmen.ged", // duplicate xref S1 on SUBM and SOUR, can we handle this somehow?
			};
			
			bool doSearch = true;

			if (!Directory.Exists(dataFolder))
			{
				Directory.CreateDirectory(dataFolder);
			}
			string badFolder = Path.Combine(dataFolder, "Bad");
			if (!Directory.Exists(badFolder))
			{
				Directory.CreateDirectory(badFolder);
			}
			
			if (doSearch)
			{
				int start = 0;

				while (start < 1280)
				{
					WebClient client = new WebClient();
					string searchResult = client.DownloadString(searchUri.Replace("start=0", string.Format("start={0}", start)));
					
					JavaScriptSerializer s = new JavaScriptSerializer();
					GoogleSearchResponse results = s.Deserialize<GoogleSearchResponse>(searchResult);
		
					GoogleResponseData data = results.responseData;
	
					foreach (GoogleResult result in data.results)
					{
						Uri url = new Uri(result.url);

						string domainFolder = dataFolder;// Path.Combine(dataFolder, url.Authority);
						//if (!Directory.Exists(domainFolder))
						//{
						//	Directory.CreateDirectory(domainFolder);
						//}
		
						string path = url.AbsolutePath;
						string filename = url.Authority + "_" + path.Substring(path.LastIndexOf('/') + 1);

						string localFilename = Path.Combine(domainFolder, filename);
		
						if (!File.Exists(localFilename) && !File.Exists(Path.Combine(badFolder, Path.GetFileName(localFilename))))
						{
							Console.WriteLine("Downloading {0} to {1}", result.visibleUrl, filename);
							try
							{
								client.DownloadFile(url, localFilename);
							}
							catch
							{
								Console.WriteLine("Failed to download {1} from {0}", result.visibleUrl, filename);
							}
						}
						else
						{
							Console.WriteLine("Already downloaded {0}", result.visibleUrl);
						}
					}

					bool pageSet = false;
					foreach (GooglePage page in data.cursor.pages)
					{
						if (page.start > start)
						{
							start = page.start;
							pageSet = true;
							break;
						}
					}
					if (!pageSet)
					{
						start = int.MaxValue;
					}
				}
			}

			// *.*, should only be .ged files.  GetFiles is case sensitive though
			// we don't want to miss out on .GED .Ged etc. so use *.*
			// handle filtering out others  in the foreach with an EndsWith
			string[] files = Directory.GetFiles(dataFolder, "*.*", SearchOption.AllDirectories);

			Console.WriteLine("Reading {0} GEDCOM files", files.Length);
			
			GedcomRecordReader reader = new GedcomRecordReader();
			foreach (string gedcomFile in files)
			{
				if (gedcomFile.EndsWith(".ged", StringComparison.CurrentCultureIgnoreCase))
				{
					bool blacklisted = false;
					foreach (string b in blacklist)
					{
						if (gedcomFile.EndsWith(b))
						{
							blacklisted = true;
							break;
						}
					}
					if (!blacklisted)
					{
						int expectedIndividuals = 0;
						int expectedFamilies = 0;
		
						Console.WriteLine("-------------------------------");
						Console.WriteLine("Scanning: " + gedcomFile);
		
						using (StreamReader sr = new StreamReader(gedcomFile))
						{
							string line = null;
							
							while ((line = sr.ReadLine()) != null)
							{
								if (Regex.Match(line, "^[0-9]+[ \t]@[^@]+@[ \t]INDI[ \t]*$").Success)
								{
									expectedIndividuals ++;
								}
								else if (Regex.Match(line, "^[0-9]+[ \t]@[^@]+@[ \t]FAM[ \t]*$").Success)
								{
									expectedFamilies ++;
								}
							}
						}
						
						Console.WriteLine("Reading: " + gedcomFile);
						if (!reader.ReadGedcom(gedcomFile))
						{
							Console.WriteLine("\tFailed to read: " + gedcomFile);
							File.Move(gedcomFile, Path.Combine(badFolder, Path.GetFileName(gedcomFile)));
						}
						else
						{
							Console.WriteLine("\tRead: " + gedcomFile);
							Console.WriteLine("\t\tIndividuals: " + reader.Database.Individuals.Count + " Expected: " + expectedIndividuals);
							Console.WriteLine("\t\tFamilies: " + reader.Database.Families.Count + " Expected: " + expectedFamilies);

							//Debug.Assert(reader.Database.Individuals.Count == expectedIndividuals);
							//Debug.Assert(reader.Database.Families.Count == expectedFamilies);
							if (reader.Database.Individuals.Count != expectedIndividuals)
							{
								File.Move(gedcomFile, badFolder);
							}
						}
					}
				}
			}

			Console.WriteLine("-------------------------------");
			Console.WriteLine("Done!");
		}