List <SubStringInstance> CheckSubstrings(string id, string data, TextParserSettings parserSettings) { List <SubStringInstance> subList = new List <SubStringInstance>(); foreach (EventDataString str in parserSettings.eventList) { int offset = 0; string newSubString = FindSubString(data, ref offset, str, ref subList); while (newSubString != null) { newSubString = FindSubString(newSubString, ref offset, str, ref subList); } } //trace.TraceInformation(data); trace.TraceInformation("subs:" + subList.Count); //int totLength = 0; string usage = ""; for (int i = 0; i < data.Length; i++) { usage += " "; } SubStringCompare comparer = new SubStringCompare(); subList.Sort(comparer); int lastStart = -1; foreach (SubStringInstance inst in subList) { trace.TraceInformation(inst.type + " start:" + inst.start + " end:" + inst.end); if (lastStart >= 0) { if (lastStart < inst.end) { inst.end = lastStart; } } for (int i = inst.start; i < inst.end; i++) { string oldStr = usage.Substring(i, 1); if (oldStr != " ") { trace.TraceInformation("warning overlap at " + i); } usage = usage.Substring(0, i) + inst.type.ToString()[0] + usage.Substring(i + 1); //usage. = 'k'; //(char)inst.type.ToString()[0]; } lastStart = inst.start; } trace.TraceInformation(id); trace.TraceInformation(data); trace.TraceInformation(usage); return(subList); }
public NewLineTextParser(TextParserSettings settings) { _textWhiteSpace = settings.WhiteSpaceHandling == WhiteSpaceHandling.Remove ? (ITextWhiteSpace) new SkipTextWhiteSpace() : new RetainTextWhiteSpace(); _textSeparator = new NewLineTextSeparator(); }
public CommaDelimitedTextParser(TextParserSettings settings) { _textWhiteSpace = settings.WhiteSpaceHandling == WhiteSpaceHandling.Remove ? (ITextWhiteSpace) new SkipTextWhiteSpace() : new RetainTextWhiteSpace(); _textSeparator = new DelimiterTextSeparator(','); }
TextParserSettings GetParserSettings(string filename) { TextParserSettings settings = null; FileStream readSettings; try { readSettings = new FileStream(filename, FileMode.Open); } catch (FileNotFoundException e) { trace.TraceInformation("FileNotFoundException:" + e.ToString()); readSettings = null; } if (readSettings != null) { DataContractSerializer serializer = new DataContractSerializer(typeof(TextParserSettings)); try { settings = (TextParserSettings)serializer.ReadObject(readSettings); } catch (SerializationException e) { trace.TraceInformation("SerializationException:" + e.ToString()); } readSettings.Close(); } if (settings == null) { settings = new TextParserSettings(); settings.eventList = new List <EventDataString>(); settings.eventList.Add(new EventDataString(ParsePersonState.Birth, "Född ", "")); settings.eventList.Add(new EventDataString(ParsePersonState.Baptism, "Döpt ", "")); settings.eventList.Add(new EventDataString(ParsePersonState.Move, "Flyttade ", ". ")); settings.eventList.Add(new EventDataString(ParsePersonState.Lived, "Levde ", "")); settings.eventList.Add(new EventDataString(ParsePersonState.Occupation, "(Yrke) ", "")); settings.eventList.Add(new EventDataString(ParsePersonState.Death, "Död ", "")); settings.eventList.Add(new EventDataString(ParsePersonState.Burial, "Begravd ", ". ")); settings.eventList.Add(new EventDataString(ParsePersonState.Source, "Källa", "")); //settings.eventList.Add(new EventDataString(ParsePersonState.Fosterchild, "Fosterbarn ", ". ")); settings.eventList.Add(new EventDataString(ParsePersonState.SpouseFamily, "[Partner i gifte ", ".] ")); settings.eventList.Add(new EventDataString(ParsePersonState.ChildFamily, "[Barn i gifte ", ".] ")); settings.eventList.Add(new EventDataString(ParsePersonState.Changed, "Ändrad ", "")); settings.pageBreakStrings = new List <string>(); settings.pageBreakStrings.Add(" "); settings.pageBreakStrings.Add("------------"); FileStream storeSettings = new FileStream(filename, FileMode.Create); DataContractSerializer serializer = new DataContractSerializer(typeof(TextParserSettings)); serializer.WriteObject(storeSettings, (TextParserSettings)settings); storeSettings.Close(); } return(settings); }
private void Parse(string decodeFilename) { TextParseState state = TextParseState.Start1; int dashCount = 0; int lineFeedCount = 0; string parsedId = ""; string parseString = ""; FamilyUtility utility = new FamilyUtility(); TextParserSettings parserSettings = GetParserSettings(utility.GetCurrentDirectory() + "\\TextDecoderSettings.xml"); BufferParseState parseState = new BufferParseState(fileBuffer, parserSettings.pageBreakStrings); System.IO.StreamWriter personFile = new System.IO.StreamWriter(decodeFilename, false, Encoding.UTF8, 4096); while (!parseState.EndOfFile()) { char ch = parseState.GetNextChar(); switch (state) { case TextParseState.Start1: if (ch == '-') { dashCount++; } else if (ch != ' ') { dashCount = 0; } if ((dashCount >= 3) && (ch == ' ')) { state = TextParseState.Start2; dashCount = 0; } break; case TextParseState.Start2: if ((ch >= '0') && (ch <= '9') || (ch == '.') || (ch == ':')) { parsedId += ch; } if ((parsedId != "") && (ch == ' ')) { state = TextParseState.Start3; } break; case TextParseState.Start3: if (ch == '-') { dashCount++; } if (dashCount >= 3) { if (ch == '\r') { state = TextParseState.ReadPerson; dashCount = 0; } } break; case TextParseState.ReadPerson: if ((ch != '\r') && (ch != '\n')) { parseString += ch; lineFeedCount = 0; } else if (ch == '\r') { if (lineFeedCount++ == 0) { parseString += " "; } /*if(lineFeedCount >= 3) * { * lineFeedCount = 0; * state = TextParseState.DecodePerson; * }*/ } if (ch == '-') { dashCount++; } else { dashCount = 0; } if (dashCount >= 3) { dashCount = 0; state = TextParseState.DecodePerson; } break; case TextParseState.DecodePerson: if (parseString.LastIndexOf("---") == (parseString.Length - 3)) { parseString = parseString.Substring(0, parseString.Length - 3); //filePos -= 5; parseState.MoveToPreviousLineStart(); } personFile.WriteLine(parsedId + ":" + parseString); string newXrefId = xrefMapLists.GetMapper(XrefType.Individual).GetXRef(parsedId, true); ParsePerson(newXrefId, parseString, parserSettings); //backgroundWorker;progresschanged parsedId = ""; parseString = ""; state = TextParseState.Start1; break; //case TextParseState.End: // break; } if (parseState.UpdateProgress()) { backgroundWorker.ReportProgress((int)parseState.GetProgress(), "Importing..."); } } if ((parsedId.Length > 0) && (parseString.Length > 0)) { string newXrefId = xrefMapLists.GetMapper(XrefType.Individual).GetXRef(parsedId, true); personFile.WriteLine(parsedId + ":" + newXrefId + ":" + parseString); ParsePerson(newXrefId, parseString, parserSettings); parsedId = ""; parseString = ""; } personFile.Close(); backgroundWorker = null; trace.TraceInformation("Text file parsing finished at "); }
void ParsePerson(string id, string data, TextParserSettings parserSettings) { ParsePersonState state = ParsePersonState.Name; IndividualClass person = new IndividualClass(); person.SetXrefName(id); int strPos = 0; string nameStr = ""; PersonalNameClass name = new PersonalNameClass(); List <SubStringInstance> subList = CheckSubstrings(id, data, parserSettings); foreach (SubStringInstance item in subList) { EventDataString thisType = null; foreach (EventDataString str in parserSettings.eventList) { if (str.type == item.type) { thisType = str; } } if (thisType != null) { DecodeEvent(ref person, item.type, data.Substring(item.start + thisType.start.Length, item.end - item.start - thisType.start.Length - thisType.end.Length)); } } while (strPos < data.Length) { //string token = GetToken(ref strPos); char ch = data[strPos++]; switch (state) { case ParsePersonState.Name: if (ch == '.') { int firstNameStart; int firstNameLength; int lastNameStart; int lastNameLength; if (nameStr.IndexOf(',') >= 0) { firstNameStart = nameStr.IndexOf(',') + 1; while ((firstNameStart < nameStr.Length) && (nameStr[firstNameStart] == ' ')) { firstNameStart++; } firstNameLength = nameStr.Length - firstNameStart; lastNameStart = 0; lastNameLength = nameStr.IndexOf(','); } else { firstNameStart = 0; if (nameStr.LastIndexOf(' ') >= 0) { firstNameLength = nameStr.LastIndexOf(' '); lastNameStart = firstNameLength + 1; lastNameLength = nameStr.Length - firstNameLength - 1; } else { firstNameLength = nameStr.Length; lastNameStart = firstNameLength; lastNameLength = 0; } } if (firstNameLength > 0) { name.SetName(PersonalNameClass.PartialNameType.GivenName, nameStr.Substring(firstNameStart, firstNameLength)); } if (lastNameLength > 0) { name.SetName(PersonalNameClass.PartialNameType.BirthSurname, nameStr.Substring(lastNameStart, lastNameLength)); } person.SetPersonalName(name); state = ParsePersonState.EventToken; } else { nameStr += ch; } break; } } person.Print(); familyTree.AddIndividual(person); }