public uint ReadLine(ref GedcomParserProgress progress)
        {
            ResetLine();

            bool endOfLine      = false;
            uint noOfChars      = 0;
            int  lfCnt          = 0;
            bool lineEndStarted = false;

            while ((progress.position < progress.size) && !endOfLine)
            {
                char ch;
                ch = (char)progress.data[progress.position++];

                if (lineEndStarted || parser.IsNewLine(ch))
                {
                    if (lineFeedString[lfCnt++] != ch)
                    {
                        DebugStringAdd("Line:" + lineNo + " Inconsistent line feeds!");
                        if ((progress.data.Length > progress.position) && (parser.IsNewLine((char)progress.data[progress.position + 1])))
                        {
                            progress.position++;
                        }
                        endOfLine = true;
                    }
                    if (lfCnt == lineFeedString.Length)
                    {
                        endOfLine = true;
                    }
                    lineEndStarted = true;
                }
                else
                {
                    AddChar(ch);
                    noOfChars++;
                }
            }

            //trace.TraceInformation("ReadLine end");
            return(noOfChars);
        }
Пример #2
0
        private void Parse(ref FileImportResult importResult)
        {
            bool             bomFound       = false;
            GedcomLineObject rootLineObject = new GedcomLineObject(0);

            GedcomParserProgress progress = new GedcomParserProgress(fileBuffer.GetBuffer(), fileBuffer.GetSize());

            double           lastPrintPercent = 0.0;
            double           printPercent;
            GedcomLineObject currentLineObject = rootLineObject;
            GedcomLineData   prevLineData      = null;
            int  lineDiff = 0;
            uint lineLength;

            //string importResult = "";
            bomFound = CheckBomMark(ref progress, ref importResult);

            GedcomTreeDecoderClass treeDecoder = new GedcomTreeDecoderClass(ref familyTree, ref importResult);

            treeDecoder.SetCharacterSet(characterSet);

            while (!progress.IsEndOfFile())
            {
                GedcomLineData lineData;

                line.SetDebugString(importResult);
                lineLength   = line.ReadLine(ref progress);
                lineData     = line.DecodeLine();
                importResult = line.GetDebugString();
                parsedLines++;

                if ((lineData != null) && lineData.valid)
                {
                    if (lineData.level == (currentLineObject.GetLevel() + 1))
                    {
                        GedcomLineObject subLineObject = new GedcomLineObject(lineData.level);

                        subLineObject.parent = currentLineObject;

                        if (prevLineData != null)
                        {
                            prevLineData.child = subLineObject;
                        }

                        currentLineObject = subLineObject;
                    }

                    /*else if (currentLineObject.GetLevel() == lineData.level)
                     * {
                     * }*/
                    else if (lineData.level < currentLineObject.GetLevel())
                    {
                        bool decodeDone = false;
                        printPercent = 100.0 * (double)progress.position / (double)progress.size;

                        if ((printPercent - lastPrintPercent) > 0.10)
                        {
                            if (backgroundWorker != null)
                            {
                                backgroundWorker.ReportProgress((int)printPercent, "Working...");
                            }
                            lastPrintPercent = printPercent;

                            if (trace.Switch.Level.HasFlag(SourceLevels.Information))
                            {
                                trace.TraceInformation("Decode position 1 " + progress.position + " (" + progress.size + ") " + DateTime.Now.ToString());
                                trace.TraceInformation("Lines " + parsedLines + " (" + treeDecoder.GetDecodedLines() + ") " + printPercent.ToString("F") + "%");
                                trace.TraceData(TraceEventType.Verbose, 0, familyTree.GetShortTreeInfo());
                                //familyTree.PrintShort();
                            }
                        }
                        do
                        {
                            if (currentLineObject.parent != null)
                            {
                                currentLineObject = currentLineObject.parent;

                                if ((currentLineObject.GetLevel() == 0) && (parsedLines > 0))
                                {
                                    treeDecoder.DecodeObject(currentLineObject);

                                    currentLineObject.Clear();

                                    if (treeDecoder.GetCharacterSet() != characterSet)
                                    {
                                        if (bomFound)
                                        {
                                            treeDecoder.DebugStringAdd("Warning! BOM and character set in Gedcom part mismatches! " + treeDecoder.GetCharacterSet() + "," + characterSet);
                                        }
                                        SetCharacterSet(treeDecoder.GetCharacterSet());
                                    }
                                }
                            }
                            else
                            {
                                if ((parsedLines - 1) - treeDecoder.GetDecodedLines() - 1 > lineDiff)
                                {
                                    treeDecoder.DebugStringAdd("Decode position " + progress.position + " (" + progress.size + ")");
                                    treeDecoder.DebugStringAdd("Lines " + parsedLines + " (" + treeDecoder.GetDecodedLines() + "," + lineDiff + ")");
                                    treeDecoder.DebugStringAdd("New undecoded lines: " + (parsedLines - treeDecoder.GetDecodedLines() - lineDiff) + "!");
                                    lineDiff = parsedLines - treeDecoder.GetDecodedLines();
                                }
                                if (trace.Switch.Level.HasFlag(SourceLevels.Information))
                                {
                                    trace.TraceInformation("Decode position 2 " + progress.position + " (" + progress.size + ")");
                                    trace.TraceInformation("Lines " + parsedLines + " (" + treeDecoder.GetDecodedLines() + ")");
                                    trace.TraceData(TraceEventType.Information, 0, familyTree.GetShortTreeInfo());
                                    //familyTree.PrintShort();
                                }

                                currentLineObject.gedcomLines.Clear();
                                decodeDone = true;
                            }
                        } while ((currentLineObject.GetLevel() > lineData.level) && !decodeDone);
                    }

                    prevLineData = lineData;

                    currentLineObject.gedcomLines.Add(lineData);
                }
                else
                {
                    if (lineData != null)
                    {
                        treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + " Error bad gedcom line [" + lineData + "]");
                    }
                    else
                    {
                        treeDecoder.DebugStringAdd("Line: " + parsedLines + ": Error bad gedcom line:no data found ");
                    }
                }

                if (progress.IsEndOfFile())
                {
                    treeDecoder.DecodeObject(currentLineObject);
                    if (treeDecoder.GetCharacterSet() != characterSet)
                    {
                        if (bomFound)
                        {
                            treeDecoder.DebugStringAdd("Warning! BOM and character set in Gedcom part mismatches! " + treeDecoder.GetCharacterSet() + "," + characterSet);
                        }
                        SetCharacterSet(treeDecoder.GetCharacterSet());
                    }
                    treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + " end of file " + parsedLines);

                    if (lineData.level != 0)
                    {
                        treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + "Error: The Gedcom file did not end correctly! Was it inomplete? = " + currentLineObject.gedcomLines.Count);
                    }
                    if (!treeDecoder.DecodingCompleted())
                    {
                        treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + "Error: The Gedcom file did not end correctly! No trailer detected = " + currentLineObject.gedcomLines.Count);
                    }

                    treeDecoder.ShowUnknownTags();
                }
            }
            backgroundWorker = null;
            treeDecoder.DebugStringAdd("Gedcom file parsing finished at " + currentLineObject.gedcomLines.Count);
            importResult = treeDecoder.GetImportResult();
        }
Пример #3
0
        private bool CheckBomMark(ref GedcomParserProgress progress, ref FileImportResult importResult)
        {
            Byte[] fileDataBuffer = fileBuffer.GetBuffer();

            Byte[] UTF_8_BOM    = { 0xEF, 0xBB, 0xBF };
            Byte[] UTF_16BE_BOM = { 0xFE, 0xFF };
            Byte[] UTF_16LE_BOM = { 0xFF, 0xFE };

            bool match;

            match = true;
            for (int i = 0; match && (i < UTF_8_BOM.Length); i++)
            {
                if (UTF_8_BOM[i] != fileDataBuffer[i])
                {
                    match = false;
                }
            }
            if (match)
            {
                importResult.AddString("BOM says UTF-8!");
                //trace.TraceInformation();
                SetCharacterSet(GedcomFileCharacterSet.Utf8);
                progress.position = UTF_8_BOM.Length;
                return(true);
            }
            match = true;
            for (int i = 0; match && (i < UTF_16BE_BOM.Length); i++)
            {
                if (UTF_16BE_BOM[i] != fileDataBuffer[i])
                {
                    match = false;
                }
            }
            if (match)
            {
                importResult.AddString("BOM says UTF-8!");
                //trace.TraceInformation("BOM says UTF-16-BE!");
                SetCharacterSet(GedcomFileCharacterSet.Utf16BE);
                progress.position = UTF_16BE_BOM.Length;
                return(true);
            }

            match = true;
            for (int i = 0; match && (i < UTF_16LE_BOM.Length); i++)
            {
                if (UTF_16LE_BOM[i] != fileDataBuffer[i])
                {
                    match = false;
                }
            }
            if (match)
            {
                importResult.AddString("BOM says UTF-16-LE!");
                //trace.TraceInformation("BOM says UTF-16-LE!");
                SetCharacterSet(GedcomFileCharacterSet.Utf16LE);
                progress.position = UTF_16LE_BOM.Length;
                return(true);
            }
            importResult.AddString("BOM not found!");

            return(false);
        }