private void Test2() { var col = new IndexedKeyCollection(); string tmp; tmp = col["HEAD", 0, 4]; tmp = col["SOUR", 0, 4]; tmp = col["VERS", 0, 4]; tmp = col["NAME", 0, 4]; tmp = col["CORP", 0, 4]; tmp = col["ADDR", 0, 4]; tmp = col["CONT", 0, 4]; tmp = col["PHON", 0, 4]; tmp = col["DEST", 0, 4]; tmp = col["DATE", 0, 4]; tmp = col["CHAR", 0, 4]; tmp = col["SUBM", 0, 4]; tmp = col["FILE", 0, 4]; tmp = col["GEDC", 0, 4]; tmp = col["FORM", 0, 4]; tmp = col["INDI", 0, 4]; System.Console.WriteLine(col.ToString()); int pos = -1; bool found = col.Find("NAME", 0, 4, out pos); Assert.True(found, "NOT FOUND!"); string msg = "expected 11\tgot " + pos; Assert.True(pos == 11, msg); }
private void Test1() { var col = new IndexedKeyCollection(); string tmp; tmp = col["I01", 0, 3]; tmp = col["F1", 0, 2]; tmp = col["F2", 0, 2]; tmp = col["I02", 0, 3]; tmp = col["I03", 0, 3]; tmp = col["I04", 0, 3]; tmp = col["NOTE1", 0, 5]; tmp = col["I012", 0, 4]; System.Console.WriteLine(col.ToString()); int pos = -1; bool found = col.Find("I012", 0, 4, out pos); Assert.True(found, "NOT FOUND!"); string msg = "expected 3\tgot " + pos; Assert.True(pos == 3, msg); }
private void Test4() { IndexedKeyCollection col = new IndexedKeyCollection(); string[] names = new string[] { "Grinning", "Sydney Mary ", "Day", "Susan ", "Vince", "Wayne Oakleigh ", "Bone", "Anna M ", "Annie", }; foreach (string s in names) { string tmp = col[s, 0, s.Length]; } System.Console.WriteLine(col.ToString()); int pos = -1; bool found = col.Find("Grinning", 0, 8, out pos); Assert.True(found, "NOT FOUND!"); string msg = "expected 4\tgot " + pos; Assert.True(pos == 4, msg); }
/// <summary> /// Initializes a new instance of the <see cref="GedcomDatabase"/> class. /// </summary> public GedcomDatabase() { Table = new Hashtable(); individuals = new List <GedcomIndividualRecord>(); families = new List <GedcomFamilyRecord>(); sources = new List <GedcomSourceRecord>(); repositories = new List <GedcomRepositoryRecord>(); media = new List <GedcomMultimediaRecord>(); notes = new List <GedcomNoteRecord>(); submitters = new List <GedcomSubmitterRecord>(); placeNameCollection = new IndexedKeyCollection(); surnames = new Dictionary <string, int>(); }
private void Test3() { IndexedKeyCollection col = new IndexedKeyCollection(); string[] names = new string[] { "Abiathar", "Alice Blanch", "Almedia J", "Anna M", "Annie", "Anthony Desmond", "Anthony James", "Barber", "Barton William", "Board", "Bone", "Burchell", "Caswell", "Collins", "Corrinna", "Cullister Anne", "Day", "Deborah", "Dennis", "Duckett", "Edward", "Edwin", "Elizabeth", "Eric Gwyn", "Farrington", "Fiona M", "Forrester", "Frances C", "Geoffrey", "George", "George F", "German", "Gillian", "Grinning", "Groves", "Hannah", "Harry", "Hatfield", "Helen Elizabeth", "Henry James", "Horton", "Horwood", "Hughes", "Hyde", "Ira Walter", "Irene Winifred", "Iris Mary", "Ivor Harding", "Jane Rosemary", "Jean M A", "Jean Maud", "Jennifer Nancy", "Jenny", "Jeremy D J", "Joanne", "John", "John Werrett", "Kate A", "Kathleen Rose Lucy", "Keziah", "Knight", "Locke", "Margaret Mary", "Mary Jane", "Mavis Jean", "Michael Bruce", "Neville", "Paul V", "Pete", "Peter Bryan", "Phillipa", "Prewett", "Raymond Holloway", "Richard G", "Sarah F", "Slim", "Susan", "Suzanne Alison", "Sybil Beatrice", "Sydney Mary", "Tewkesbury", "Thomas", "Timothy", "Timothy J", "Toby", "Tomlinson", "Tracy Jane", "Valerie C", "Vince", "Walker", "Wallace", "Wayne Oakleigh", "Wendy", "Werrett", "Whereatt", "Wherrett", "Wherritt", "White", "Wilbur", "William", "William John", "Yendell", "Young", }; foreach (string s in names) { string tmp = col[s, 0, s.Length]; } int pos = -1; bool found = col.Find("John", 0, 4, out pos); Assert.True(found, "NOT FOUND!"); string msg = "expected 55\tgot " + pos; Assert.True(pos == 55, msg); }
/// <summary> /// Parses the given data, which should be 1 or more lines, multiple /// calls can be made with multiple lines. /// Events are triggered upon reading a line, or on an error. /// If TagCollection and XrefTagCollection haven't been set /// prior to calling default IndexedKeyCollection objects will be /// used. To support replacing XRefs you need to set XrefTagCollection /// to an instance of XRefIndexedKeyCollection before calling. /// </summary> /// <param name="data">Data to parse, expected to be unicode</param> /// <returns>The last error encountered.</returns> public GedcomErrorState GedcomParse(string data) { ErrorState = GedcomErrorState.NoError; int i = 0; int len = data.Length; // Tags are always the same, data.Substring was allocating lots // of memory, instead use a special collection which matches via // array index, e.g tagCollection[str, index, length] to avoid // the extra allocations, and caches the resulting string for // use again without having to substring if (TagCollection == null) { TagCollection = new IndexedKeyCollection(); } // same for Xrefs if (XrefCollection == null) { XrefCollection = new IndexedKeyCollection(); } while (i < len) { int temp = i; switch (State) { case GedcomState.Level: // eat up leading white space while (temp < len && char.IsWhiteSpace(data[temp])) { temp++; } bool hadLevel = false; int lvl = 0; while (temp < len && IsDigit(data[temp])) { hadLevel = true; lvl *= 10; lvl += data[temp++] - '0'; } // possible we had data after eating white space // but that it wasn't a digit if (!hadLevel) { if (ApplyConcContOnNewLineHack && (previousTag == "CONC" || previousTag == "CONT")) { Level = previousLevel; Tag = "CONC"; State = GedcomState.LineValue; } else { ErrorState = GedcomErrorState.LevelExpected; } } else if (temp == i) { if (!char.IsWhiteSpace(data[i])) { ErrorState = GedcomErrorState.LevelExpected; } else { i++; } } else { Level = lvl; if (Level > MaxLevel) { ErrorState = GedcomErrorState.LevelInvalid; Level = -1; } else { i = temp; } } // move to next state if we have a level // and we are still in a level state (may not be // if we have some hacks active) if (Level != -1 && State == GedcomState.Level) { if (IsDelim(data[i])) { i++; if (IgnoreInvalidDelim) { while (i < len && IsDelim(data[i])) { i++; } State = GedcomState.XrefID; } else if (IsDelim(data[i])) { ErrorState = GedcomErrorState.InvalidDelim; } else { State = GedcomState.XrefID; } } else { ErrorState = GedcomErrorState.LevelMissingDelim; } } break; case GedcomState.XrefID: // no optional xref id just move to next state // otherwise extract pointer if (IsXrefID(data, temp)) { // bypass first @ i++; temp = i; while (temp < len && data[temp] != '@') { temp++; } if ((temp - i) > MaxXRefLength) { ErrorState = GedcomErrorState.XrefIDTooLong; } else { XrefID = XrefCollection[data, i, temp - i]; i = temp + 1; if (IsDelim(data[i])) { i++; if (IgnoreInvalidDelim) { while (i < len && IsDelim(data[i])) { i++; } State = GedcomState.Tag; } else if (IsDelim(data[i])) { ErrorState = GedcomErrorState.InvalidDelim; } else { State = GedcomState.Tag; } } else { ErrorState = GedcomErrorState.XrefIDMissingDelim; } } } else { State = GedcomState.Tag; } break; case GedcomState.Tag: while (temp < len && (IsAlphaNum(data[temp]) || (AllowHyphenOrUnderscoreInTag && (data[temp] == '-' || data[temp] == '_')))) { temp++; } if (temp == i) { ErrorState = GedcomErrorState.TagExpected; } else { Tag = TagCollection[data, i, temp - i]; i = temp; } if (Tag != string.Empty) { if (Tag == "TRLR" && i == len) { FoundTag(); } else { if (i < len && IsDelim(data[i])) { i++; State = GedcomState.LineValue; } // not else if so we can handle tags with a trailing space but no line value if (i == len || IsTerminator(data[i])) { FoundTag(); while (i < len && IsTerminator(data[i])) { i++; } } else if (State != GedcomState.LineValue && !IgnoreMissingTerms) { ErrorState = GedcomErrorState.TagMissingDelimOrTerm; } } } break; case GedcomState.LineValue: if (IsPointer(data, temp)) { // bypass first @ i++; temp = i; while (temp < len && data[temp] != '@') { temp++; } if ((temp - i) > 0) { LineValue = XrefCollection[data, i, temp - i]; i = temp + 1; LineValueType = GedcomLineValueType.PointerType; } // GEDCOM only allows a single XREF for a pointer // Genopro ignores this and puts a comma separated // list of XREFs in the mess it pretends is GEDCOM. // This causes us to get stuck in the LineValue state // (this could of cause happen with anything after the // pointer) if (i < len) { // we will allow white space, but nothing else while (i < len && IsDelim(data[i])) { i++; } if (i < len && !IsTerminator(data[i])) { ErrorState = GedcomErrorState.LineValueInvalid; } } } else { while (ErrorState == GedcomErrorState.NoError && LineValue == string.Empty) { if (temp < len && IsAnyChar(data, temp)) { temp++; } else if (temp < len && IsEscape(data, temp)) { // bypass @# temp += 2; while (temp < len && data[temp] != '@') { temp++; } temp++; } // hack for presidents.ged, email address // is used in PHON on line 13 with a single @ // this isn't valid GEDCOM // Should be escaped as @@ but handle it anyway // Same thing occurs in user supplied file TOUT200801_unicode.ged // with RELA @INDI:BAPM else if (temp < len && data[temp] == '@') { temp++; } else if (temp != i) { if ((temp < len) && !IsTerminator(data[temp])) { ErrorState = GedcomErrorState.LineValueInvalid; } else { temp = Math.Min(temp, len); string dup = data.Substring(i, temp - i); // unescape @@ LineValue = dup.Replace("@@", "@"); LineValueType = GedcomLineValueType.DataType; } i = temp; } // TODO: no line value, but have hit the terminator // what should this be allowed for? // Family Tree Maker outputs emtpy CONT (and CONC?) else if (Tag == "CONT" || Tag == "CONC") { LineValue = " "; } else { // hit a terminator break; } } } if (ErrorState == GedcomErrorState.NoError) { // can't use FoundTag here, may not want to reset previousLevel = Level; previousTag = Tag; if (TagFound != null) { TagFound(this, EventArgs.Empty); } if (i == len || IsTerminator(data[i])) { while (i < len && IsTerminator(data[i])) { i++; } // reset states ResetParseState(false); } else if (!IgnoreMissingTerms) { ErrorState = GedcomErrorState.LineValueMissingTerm; } } break; } if (ErrorState != GedcomErrorState.NoError) { ParserError?.Invoke(this, EventArgs.Empty); break; } } // reset parse status for more input ResetParseState(false); return(ErrorState); }