Пример #1
0
        private Tuple <object, GedParse> Make(GedRecord rec)
        {
            // 1. The first line in the rec should start with '0'
            var head = rec.FirstLine();

            gs.Split(head, ' ');
            char lvl = gs.Level(head);

            //int firstDex = LineUtil.FirstChar(head);
            //if (head[firstDex] != '0')
            if (lvl != '0')
            {
                var rec2 = new Unknown(rec, null, gs.Tag(head));
                //rec2.Error = UnkRec.ErrorCode.InvLevel;
                return(new Tuple <object, GedParse>(rec2, null));
                //throw new Exception("record head not zero"); // TODO should this be an error record instead?
            }

            // 2. search for and find the tag
            //LineUtil.LineData ld = new LineUtil.LineData(); // TODO static?
            //LineUtil.LevelTagAndRemain(ld, head);

            //gs.Split(head, ' ');

            // 3. create a GedCommon derived class
            var remain = new string(gs.Remain(head));

            return(GedRecFactory(rec, gs.Ident(head), gs.Tag(head), remain));
            //return GedRecFactory(rec, ld.Ident, ld.Tag, ld.Remain);
        }
Пример #2
0
 protected virtual void Dispose(bool disposing)
 {
     // TODO any explicit disposal required?
     Parser   = null;
     Data     = null;
     Errors   = null;
     _currRec = null;
 }
Пример #3
0
        // Common method for unit testing (instream != null) or file reading (gedPath != null)
        /// <summary>
        /// Read GEDCOM data into memory.
        /// </summary>
        /// <param name="gedPath">The path to the file. If null, will attempt to read from instream instead.</param>
        /// <param name="instream">An input stream to read from instead of a file path.</param>
        public void ReadGed(string gedPath, StreamReader instream = null)
        {
            _emptyLineSeen = 0;
            FilePath       = gedPath;
            Errors         = new List <UnkRec>();
            GedReader _reader = new GedReader();

            _reader.BufferSize   = _bufferSize;
            _reader.ProcessALine = ProcessLine;
            _reader.ErrorTracker = DoError;

            // Processor context
            Parser = new GedParser(FilePath);
            Data   = new List <GEDCommon>();
            if (Errors == null)
            {
                Errors = new List <UnkRec>();
            }
            _currRec = new GedRecord();
#if SQLITE
            var foo = SQLite.Instance;
#elif LITEDB
            var foo = SharpGEDParser.Parser.LiteDB.Instance;
#elif NOTESTREAM
            var foo = NoteStream.Instance;
#endif
#if XREFTRACK
            var bar = XrefTrack.Instance;
#endif
            try
            {
                if (gedPath == null)
                {
                    _reader.ReadFile(instream);
                }
                else
                {
                    _reader.ReadFile(gedPath);
                }
                EndOfFile();
            }
            catch (Exception)
            {
                UnkRec err = new UnkRec();
                err.Error = UnkRec.ErrorCode.Exception;
                err.Beg   = _lineNum;
                // TODO err.Error = string.Format("Exception: {0} line {1} | {2}", ex.Message, _lineNum, ex.StackTrace);
                Errors.Add(err);
            }

            Parser.FinishUp();
            GatherRecords();
            GatherErrors();

            _lineNum = _reader._lineNum;
            _currRec = null;
        }
Пример #4
0
        private Tuple <object, GedParse> GedRecFactory(GedRecord rec, string ident, string tag, string remain)
        {
            if (string.IsNullOrWhiteSpace(tag))
            {
                var foo = new Unknown(rec, ident, "");
                foo.Errors.Add(new UnkRec {
                    Error = UnkRec.ErrorCode.MissTag
                });
                return(new Tuple <object, GedParse>(foo, null));
            }

            // Parse 'top level' records. Parsing of some record types (e.g. NOTE, SOUR, etc) are likely to be in 'common' with sub-record parsing

            // TODO Very much brute force. If/until this is found to be optimizable
            switch (tag.ToUpper())
            {
            case "INDI":
                return(new Tuple <object, GedParse>(new IndiRecord(rec, ident, remain), _IndiParseSingleton));

            case "FAM":
                return(new Tuple <object, GedParse>(new FamRecord(rec, ident, remain), _FamParseSingleton));

            case "SOUR":
                return(new Tuple <object, GedParse>(new SourceRecord(rec, ident, remain), _SourParseSingleton));

            case "REPO":
                return(new Tuple <object, GedParse>(new Repository(rec, ident, remain), _RepoParseSingleton));

            case "NOTE":
            {
                string remainLS = gs.RemainLS(rec.FirstLine());
                return(new Tuple <object, GedParse>(new NoteRecord(rec, ident, remainLS), _NoteParseSingleton));
            }

            case "OBJE":
                return(new Tuple <object, GedParse>(new MediaRecord(rec, ident, remain), _MediaParseSingleton));

            case "HEAD":
                return(new Tuple <object, GedParse>(new HeadRecord(rec), _HeadParseSingleton));

            case "TRLR":
                return(null);

            case "SUBM":     // TODO temp ignore
            case "SUBN":     // TODO temp ignore
                return(new Tuple <object, GedParse>(new DontCare(rec, tag), null));

            default:
            {
                var foo = new Unknown(rec, ident, tag);
                return(new Tuple <object, GedParse>(foo, null));
            }
            }
        }
Пример #5
0
        /// <summary>
        /// Deal with a single line. It either starts with '0' and is to be a new record,
        /// or is accumulated into a record.
        /// </summary>
        /// <param name="line"></param>
        /// <param name="lineNum"></param>
        private bool ProcessLine(char [] line, int lineNum)
        {
            int len = line.Length;
            int dex = LineUtil.FirstChar(line, 0, len);

            if (dex < 0)
            {
                if (_emptyLineSeen == 0)
                {
                    DoError(UnkRec.ErrorCode.EmptyLine, lineNum);
                }
                _emptyLineSeen++;
                return(true); // empty line
            }
            if (len > 255)    // TODO anything special for UTF-16?
            {
                DoError(UnkRec.ErrorCode.LineTooLong, lineNum);
                // proceed anyway
            }

            char level = line[dex];

            if (level < '0' || level > '9')
            {
                DoError(UnkRec.ErrorCode.InvLevel, lineNum);
                return(false); // cannot proceed
            }

            // NOTE: do NOT warn about leading spaces "GEDCOM readers should ignore it when it occurs".

            if (level == '0' && _currRec.LineCount > 0)
            {
                // start of a new record. deal with the previous record first

                // TODO records should go into a 'to parse' list and asynchronously turned into head/indi/fam/etc
                var parsed = Parser.Parse(_currRec);
                if (parsed == null)
                {
                    return(false);
                }

                Data.Add(parsed);
                _currRec = new GedRecord(lineNum, line);
            }
            else
            {
                _currRec.AddLine(line);
            }

            return(true);
        }
Пример #6
0
        public void Parse(GEDCommon rec, GedRecord Lines, GSFactory gsfact)
        {
            ParseContext2 ctx = new ParseContext2();

            ctx.gs       = gsfact.Alloc(); // new GEDSplitter(GedParser._masterTagCache);
            ctx.tagCache = GedParser._masterTagCache;

            ctx.Lines  = Lines;
            ctx.Parent = rec;
            int max = Lines.Max;

            for (int i = 1; i < max; i++)
            {
                var line = Lines.GetLine(i);
                ctx.Begline = i;
                ctx.Endline = i; // assume it is one line long, parser might change it

                ctx.gs.LevelTagAndRemain(line, ctx);
                TagProc2 tagProc;
                if (ctx.Tag != null && _tagSet2.TryGetValue(ctx.Tag, out tagProc))
                {
                    tagProc(ctx);
                }
                else
                {
                    // Custom and invalid treated as 'unknowns': let the consumer figure it out
                    // TODO gedr5419_blood_type_events.ged has garbage characters in SOUR/ABBR tags: incorrect line terminator, blank lines etc.
                    LookAhead(ctx);
                    rec.Unknowns.Add(new UnkRec(ctx.Tag, Lines.Beg + ctx.Begline, Lines.Beg + ctx.Endline));
                }
                i = ctx.Endline;
            }

            // TODO post parse error checking on sub-structures
            PostCheck(ctx.Parent); // post parse error checking

            gsfact.Free(ctx.gs);
            ctx.gs = null;
        }
Пример #7
0
        public GEDCommon Parse(GedRecord rec)
        {
            // Given a glop of lines which represent a 'record', parse it into GED data (INDI/FAM/NOTE/OBJE/REPO/SOUR/etc)
            Tuple <object, GedParse> parseSet = Make(rec);

            if (parseSet == null)
            {
                return(null); // EOF
            }
            if (parseSet.Item2 == null)
            {
                return(parseSet.Item1 as GEDCommon); // unknown or NYI record type
            }
            GEDCommon recC2 = parseSet.Item1 as GEDCommon;

#if PARALLEL
            _allTasks.Add(Task.Run(() => parseSet.Item2.Parse(recC2, rec, _GedSplitFactory)));
#else
            parseSet.Item2.Parse(recC2, rec, _GedSplitFactory);
#endif
            return(parseSet.Item1 as GEDCommon);
        }