private Tuple <object, GedParse> Make(GedRecord rec) { // 1. The first line in the rec should start with '0' var head = rec.FirstLine(); gs.Split(head, ' '); char lvl = gs.Level(head); //int firstDex = LineUtil.FirstChar(head); //if (head[firstDex] != '0') if (lvl != '0') { var rec2 = new Unknown(rec, null, gs.Tag(head)); //rec2.Error = UnkRec.ErrorCode.InvLevel; return(new Tuple <object, GedParse>(rec2, null)); //throw new Exception("record head not zero"); // TODO should this be an error record instead? } // 2. search for and find the tag //LineUtil.LineData ld = new LineUtil.LineData(); // TODO static? //LineUtil.LevelTagAndRemain(ld, head); //gs.Split(head, ' '); // 3. create a GedCommon derived class var remain = new string(gs.Remain(head)); return(GedRecFactory(rec, gs.Ident(head), gs.Tag(head), remain)); //return GedRecFactory(rec, ld.Ident, ld.Tag, ld.Remain); }
protected virtual void Dispose(bool disposing) { // TODO any explicit disposal required? Parser = null; Data = null; Errors = null; _currRec = null; }
// Common method for unit testing (instream != null) or file reading (gedPath != null) /// <summary> /// Read GEDCOM data into memory. /// </summary> /// <param name="gedPath">The path to the file. If null, will attempt to read from instream instead.</param> /// <param name="instream">An input stream to read from instead of a file path.</param> public void ReadGed(string gedPath, StreamReader instream = null) { _emptyLineSeen = 0; FilePath = gedPath; Errors = new List <UnkRec>(); GedReader _reader = new GedReader(); _reader.BufferSize = _bufferSize; _reader.ProcessALine = ProcessLine; _reader.ErrorTracker = DoError; // Processor context Parser = new GedParser(FilePath); Data = new List <GEDCommon>(); if (Errors == null) { Errors = new List <UnkRec>(); } _currRec = new GedRecord(); #if SQLITE var foo = SQLite.Instance; #elif LITEDB var foo = SharpGEDParser.Parser.LiteDB.Instance; #elif NOTESTREAM var foo = NoteStream.Instance; #endif #if XREFTRACK var bar = XrefTrack.Instance; #endif try { if (gedPath == null) { _reader.ReadFile(instream); } else { _reader.ReadFile(gedPath); } EndOfFile(); } catch (Exception) { UnkRec err = new UnkRec(); err.Error = UnkRec.ErrorCode.Exception; err.Beg = _lineNum; // TODO err.Error = string.Format("Exception: {0} line {1} | {2}", ex.Message, _lineNum, ex.StackTrace); Errors.Add(err); } Parser.FinishUp(); GatherRecords(); GatherErrors(); _lineNum = _reader._lineNum; _currRec = null; }
private Tuple <object, GedParse> GedRecFactory(GedRecord rec, string ident, string tag, string remain) { if (string.IsNullOrWhiteSpace(tag)) { var foo = new Unknown(rec, ident, ""); foo.Errors.Add(new UnkRec { Error = UnkRec.ErrorCode.MissTag }); return(new Tuple <object, GedParse>(foo, null)); } // Parse 'top level' records. Parsing of some record types (e.g. NOTE, SOUR, etc) are likely to be in 'common' with sub-record parsing // TODO Very much brute force. If/until this is found to be optimizable switch (tag.ToUpper()) { case "INDI": return(new Tuple <object, GedParse>(new IndiRecord(rec, ident, remain), _IndiParseSingleton)); case "FAM": return(new Tuple <object, GedParse>(new FamRecord(rec, ident, remain), _FamParseSingleton)); case "SOUR": return(new Tuple <object, GedParse>(new SourceRecord(rec, ident, remain), _SourParseSingleton)); case "REPO": return(new Tuple <object, GedParse>(new Repository(rec, ident, remain), _RepoParseSingleton)); case "NOTE": { string remainLS = gs.RemainLS(rec.FirstLine()); return(new Tuple <object, GedParse>(new NoteRecord(rec, ident, remainLS), _NoteParseSingleton)); } case "OBJE": return(new Tuple <object, GedParse>(new MediaRecord(rec, ident, remain), _MediaParseSingleton)); case "HEAD": return(new Tuple <object, GedParse>(new HeadRecord(rec), _HeadParseSingleton)); case "TRLR": return(null); case "SUBM": // TODO temp ignore case "SUBN": // TODO temp ignore return(new Tuple <object, GedParse>(new DontCare(rec, tag), null)); default: { var foo = new Unknown(rec, ident, tag); return(new Tuple <object, GedParse>(foo, null)); } } }
/// <summary> /// Deal with a single line. It either starts with '0' and is to be a new record, /// or is accumulated into a record. /// </summary> /// <param name="line"></param> /// <param name="lineNum"></param> private bool ProcessLine(char [] line, int lineNum) { int len = line.Length; int dex = LineUtil.FirstChar(line, 0, len); if (dex < 0) { if (_emptyLineSeen == 0) { DoError(UnkRec.ErrorCode.EmptyLine, lineNum); } _emptyLineSeen++; return(true); // empty line } if (len > 255) // TODO anything special for UTF-16? { DoError(UnkRec.ErrorCode.LineTooLong, lineNum); // proceed anyway } char level = line[dex]; if (level < '0' || level > '9') { DoError(UnkRec.ErrorCode.InvLevel, lineNum); return(false); // cannot proceed } // NOTE: do NOT warn about leading spaces "GEDCOM readers should ignore it when it occurs". if (level == '0' && _currRec.LineCount > 0) { // start of a new record. deal with the previous record first // TODO records should go into a 'to parse' list and asynchronously turned into head/indi/fam/etc var parsed = Parser.Parse(_currRec); if (parsed == null) { return(false); } Data.Add(parsed); _currRec = new GedRecord(lineNum, line); } else { _currRec.AddLine(line); } return(true); }
public void Parse(GEDCommon rec, GedRecord Lines, GSFactory gsfact) { ParseContext2 ctx = new ParseContext2(); ctx.gs = gsfact.Alloc(); // new GEDSplitter(GedParser._masterTagCache); ctx.tagCache = GedParser._masterTagCache; ctx.Lines = Lines; ctx.Parent = rec; int max = Lines.Max; for (int i = 1; i < max; i++) { var line = Lines.GetLine(i); ctx.Begline = i; ctx.Endline = i; // assume it is one line long, parser might change it ctx.gs.LevelTagAndRemain(line, ctx); TagProc2 tagProc; if (ctx.Tag != null && _tagSet2.TryGetValue(ctx.Tag, out tagProc)) { tagProc(ctx); } else { // Custom and invalid treated as 'unknowns': let the consumer figure it out // TODO gedr5419_blood_type_events.ged has garbage characters in SOUR/ABBR tags: incorrect line terminator, blank lines etc. LookAhead(ctx); rec.Unknowns.Add(new UnkRec(ctx.Tag, Lines.Beg + ctx.Begline, Lines.Beg + ctx.Endline)); } i = ctx.Endline; } // TODO post parse error checking on sub-structures PostCheck(ctx.Parent); // post parse error checking gsfact.Free(ctx.gs); ctx.gs = null; }
public GEDCommon Parse(GedRecord rec) { // Given a glop of lines which represent a 'record', parse it into GED data (INDI/FAM/NOTE/OBJE/REPO/SOUR/etc) Tuple <object, GedParse> parseSet = Make(rec); if (parseSet == null) { return(null); // EOF } if (parseSet.Item2 == null) { return(parseSet.Item1 as GEDCommon); // unknown or NYI record type } GEDCommon recC2 = parseSet.Item1 as GEDCommon; #if PARALLEL _allTasks.Add(Task.Run(() => parseSet.Item2.Parse(recC2, rec, _GedSplitFactory))); #else parseSet.Item2.Parse(recC2, rec, _GedSplitFactory); #endif return(parseSet.Item1 as GEDCommon); }