public uint ReadLine(ref GedcomParserProgress progress) { ResetLine(); bool endOfLine = false; uint noOfChars = 0; int lfCnt = 0; bool lineEndStarted = false; while ((progress.position < progress.size) && !endOfLine) { char ch; ch = (char)progress.data[progress.position++]; if (lineEndStarted || parser.IsNewLine(ch)) { if (lineFeedString[lfCnt++] != ch) { DebugStringAdd("Line:" + lineNo + " Inconsistent line feeds!"); if ((progress.data.Length > progress.position) && (parser.IsNewLine((char)progress.data[progress.position + 1]))) { progress.position++; } endOfLine = true; } if (lfCnt == lineFeedString.Length) { endOfLine = true; } lineEndStarted = true; } else { AddChar(ch); noOfChars++; } } //trace.TraceInformation("ReadLine end"); return(noOfChars); }
private void Parse(ref FileImportResult importResult) { bool bomFound = false; GedcomLineObject rootLineObject = new GedcomLineObject(0); GedcomParserProgress progress = new GedcomParserProgress(fileBuffer.GetBuffer(), fileBuffer.GetSize()); double lastPrintPercent = 0.0; double printPercent; GedcomLineObject currentLineObject = rootLineObject; GedcomLineData prevLineData = null; int lineDiff = 0; uint lineLength; //string importResult = ""; bomFound = CheckBomMark(ref progress, ref importResult); GedcomTreeDecoderClass treeDecoder = new GedcomTreeDecoderClass(ref familyTree, ref importResult); treeDecoder.SetCharacterSet(characterSet); while (!progress.IsEndOfFile()) { GedcomLineData lineData; line.SetDebugString(importResult); lineLength = line.ReadLine(ref progress); lineData = line.DecodeLine(); importResult = line.GetDebugString(); parsedLines++; if ((lineData != null) && lineData.valid) { if (lineData.level == (currentLineObject.GetLevel() + 1)) { GedcomLineObject subLineObject = new GedcomLineObject(lineData.level); subLineObject.parent = currentLineObject; if (prevLineData != null) { prevLineData.child = subLineObject; } currentLineObject = subLineObject; } /*else if (currentLineObject.GetLevel() == lineData.level) * { * }*/ else if (lineData.level < currentLineObject.GetLevel()) { bool decodeDone = false; printPercent = 100.0 * (double)progress.position / (double)progress.size; if ((printPercent - lastPrintPercent) > 0.10) { if (backgroundWorker != null) { backgroundWorker.ReportProgress((int)printPercent, "Working..."); } lastPrintPercent = printPercent; if (trace.Switch.Level.HasFlag(SourceLevels.Information)) { trace.TraceInformation("Decode position 1 " + progress.position + " (" + progress.size + ") " + DateTime.Now.ToString()); trace.TraceInformation("Lines " + parsedLines + " (" + treeDecoder.GetDecodedLines() + ") " + printPercent.ToString("F") + "%"); trace.TraceData(TraceEventType.Verbose, 0, familyTree.GetShortTreeInfo()); //familyTree.PrintShort(); } } do { if (currentLineObject.parent != null) { currentLineObject = currentLineObject.parent; if ((currentLineObject.GetLevel() == 0) && (parsedLines > 0)) { treeDecoder.DecodeObject(currentLineObject); currentLineObject.Clear(); if (treeDecoder.GetCharacterSet() != characterSet) { if (bomFound) { treeDecoder.DebugStringAdd("Warning! BOM and character set in Gedcom part mismatches! " + treeDecoder.GetCharacterSet() + "," + characterSet); } SetCharacterSet(treeDecoder.GetCharacterSet()); } } } else { if ((parsedLines - 1) - treeDecoder.GetDecodedLines() - 1 > lineDiff) { treeDecoder.DebugStringAdd("Decode position " + progress.position + " (" + progress.size + ")"); treeDecoder.DebugStringAdd("Lines " + parsedLines + " (" + treeDecoder.GetDecodedLines() + "," + lineDiff + ")"); treeDecoder.DebugStringAdd("New undecoded lines: " + (parsedLines - treeDecoder.GetDecodedLines() - lineDiff) + "!"); lineDiff = parsedLines - treeDecoder.GetDecodedLines(); } if (trace.Switch.Level.HasFlag(SourceLevels.Information)) { trace.TraceInformation("Decode position 2 " + progress.position + " (" + progress.size + ")"); trace.TraceInformation("Lines " + parsedLines + " (" + treeDecoder.GetDecodedLines() + ")"); trace.TraceData(TraceEventType.Information, 0, familyTree.GetShortTreeInfo()); //familyTree.PrintShort(); } currentLineObject.gedcomLines.Clear(); decodeDone = true; } } while ((currentLineObject.GetLevel() > lineData.level) && !decodeDone); } prevLineData = lineData; currentLineObject.gedcomLines.Add(lineData); } else { if (lineData != null) { treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + " Error bad gedcom line [" + lineData + "]"); } else { treeDecoder.DebugStringAdd("Line: " + parsedLines + ": Error bad gedcom line:no data found "); } } if (progress.IsEndOfFile()) { treeDecoder.DecodeObject(currentLineObject); if (treeDecoder.GetCharacterSet() != characterSet) { if (bomFound) { treeDecoder.DebugStringAdd("Warning! BOM and character set in Gedcom part mismatches! " + treeDecoder.GetCharacterSet() + "," + characterSet); } SetCharacterSet(treeDecoder.GetCharacterSet()); } treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + " end of file " + parsedLines); if (lineData.level != 0) { treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + "Error: The Gedcom file did not end correctly! Was it inomplete? = " + currentLineObject.gedcomLines.Count); } if (!treeDecoder.DecodingCompleted()) { treeDecoder.DebugStringAdd("Line:" + lineData.lineNo + "Error: The Gedcom file did not end correctly! No trailer detected = " + currentLineObject.gedcomLines.Count); } treeDecoder.ShowUnknownTags(); } } backgroundWorker = null; treeDecoder.DebugStringAdd("Gedcom file parsing finished at " + currentLineObject.gedcomLines.Count); importResult = treeDecoder.GetImportResult(); }
private bool CheckBomMark(ref GedcomParserProgress progress, ref FileImportResult importResult) { Byte[] fileDataBuffer = fileBuffer.GetBuffer(); Byte[] UTF_8_BOM = { 0xEF, 0xBB, 0xBF }; Byte[] UTF_16BE_BOM = { 0xFE, 0xFF }; Byte[] UTF_16LE_BOM = { 0xFF, 0xFE }; bool match; match = true; for (int i = 0; match && (i < UTF_8_BOM.Length); i++) { if (UTF_8_BOM[i] != fileDataBuffer[i]) { match = false; } } if (match) { importResult.AddString("BOM says UTF-8!"); //trace.TraceInformation(); SetCharacterSet(GedcomFileCharacterSet.Utf8); progress.position = UTF_8_BOM.Length; return(true); } match = true; for (int i = 0; match && (i < UTF_16BE_BOM.Length); i++) { if (UTF_16BE_BOM[i] != fileDataBuffer[i]) { match = false; } } if (match) { importResult.AddString("BOM says UTF-8!"); //trace.TraceInformation("BOM says UTF-16-BE!"); SetCharacterSet(GedcomFileCharacterSet.Utf16BE); progress.position = UTF_16BE_BOM.Length; return(true); } match = true; for (int i = 0; match && (i < UTF_16LE_BOM.Length); i++) { if (UTF_16LE_BOM[i] != fileDataBuffer[i]) { match = false; } } if (match) { importResult.AddString("BOM says UTF-16-LE!"); //trace.TraceInformation("BOM says UTF-16-LE!"); SetCharacterSet(GedcomFileCharacterSet.Utf16LE); progress.position = UTF_16LE_BOM.Length; return(true); } importResult.AddString("BOM not found!"); return(false); }