/// <summary> /// Creates XML file from Numenera items extracted from PDF /// </summary> /// <param name="info"></param> /// <param name="sourcebookName"></param> /// <param name="objectsName"></param> /// <param name="objectName"></param> private static void NumeneraCreateXML(PDFTextFileInfo info, string sourcebookName, string objectsName, string objectName) { var linesFromFile = File.ReadAllLines(info.ItemsFileName); var itemsLines = new List <string>(linesFromFile); itemsLines = PDFTextParser.FixBrokenWords(itemsLines); itemsLines = PDFTextParser.SplitLinesByKeywords(itemsLines, info.KeywordsList); var objects = PDFTextParser.SplitItemsToObjects(info, itemsLines.ToArray()); var smthToWrite = new List <string>(); objects.ForEach(x => x.ForEach(s => smthToWrite.Add(s))); File.WriteAllLines(info.ItemsFileName + "_Cleared.txt", smthToWrite); var parsedDic = PDFTextParser.CreateDictionariesFromObjects(objects, info.KeywordsList); // and make XML from it var xmlInfo = new PDFTextXmlInfo() { XmlFileName = Path.GetDirectoryName(info.ItemsFileName) + @"\OUTPUT_" + objectsName + "_" + sourcebookName + ".xml", ObjectsName = objectsName, ObjectName = objectName, Source = sourcebookName }; PDFTextXmlCreator.CreateXMLWithRollTable(xmlInfo, parsedDic, info.TableKeyword); }
/// <summary> /// Loads Keywords from file and check if there is any TableKeyword marked by symbol # /// </summary> /// <param name="info"></param> public static void LoadKeywordsFromFile(PDFTextFileInfo info, string keywordsFilename) { if (string.IsNullOrEmpty(keywordsFilename)) { throw new ArgumentException("Argument is null", nameof(keywordsFilename)); } string[] keywords; try { keywords = File.ReadAllLines(keywordsFilename); } catch (Exception ex) { ex.Data["UserMessage"] += "There is an error with loading keywords from file;"; throw; } info.KeywordsList = new List <string>(keywords); if (info.TableKeyword == null) { // check for # and note table keyword for (int i = 0; i < info.KeywordsList.Count; i++) { if (info.KeywordsList[i].First() == '#') { info.KeywordsList[i] = info.KeywordsList[i].Substring(1); info.TableKeyword = info.KeywordsList[i]; } } } }
/// <summary> /// Parse file with Numenera items to XML /// </summary> /// <param name="itemsFileName"></param> /// <param name="keywordsFileName"></param> /// <param name="sourcebookName"></param> /// <param name="itemsNames"></param> /// <param name="itemName"></param> public static void NumeneraItemParseFileToXML(string itemsFileName, string keywordsFileName, string sourcebookName, string itemsNames, string itemName) { var info = new PDFTextFileInfo() { ItemsFileName = itemsFileName, TableKeyword = null }; PDFTextParser.LoadKeywordsFromFile(info, keywordsFileName); NumeneraCreateXML(info, sourcebookName, itemsNames, itemName); }
/// <summary> /// Parses file with Artefacts copied from PDF /// </summary> /// <param name="sourcebookName"></param> /// <param name="itemsFileName"></param> public static void ArtefactsParseFileToXML(string itemsFileName, string sourcebookName) { var info = new PDFTextFileInfo() { ItemsFileName = itemsFileName, KeywordsList = new List <string>() { "Level:", "Form:", "Effect:", "Table:", "Depletion:" }, TableKeyword = "Table:" }; NumeneraCreateXML(info, sourcebookName, "Artefacts", "Artefact"); }
/// <summary> /// Parses file with Cyphers copied from PDF /// </summary> /// <param name="sourcebookName"></param> /// <param name="itemsFileName"></param> public static void CyphersParseFileToXML(string itemsFileName, string sourcebookName) { var info = new PDFTextFileInfo() { ItemsFileName = itemsFileName, KeywordsList = new List <string>() { "Level:", "Internal:", "Wearable:", "Usable:", "Effect:", "Table:" }, TableKeyword = "Table:" }; NumeneraCreateXML(info, sourcebookName, "Cyphers", "Cypher"); }
/// <summary> /// Splits lines to different Numenera objects, each line - one keyword /// </summary> /// <param name="info"></param> /// <param name="lines"></param> /// <returns></returns> public static List <List <string> > SplitItemsToObjects(PDFTextFileInfo info, string[] lines, string nameKeyWord = "") { var result = new List <List <string> >(); var curObj = new List <string>(); for (int i = 0; i < lines.Length; i++) { // start with the title curObj.Add(nameKeyWord + lines[i]); i = BuildCurrentObject(lines, i, curObj, info); // add current object to the result result.Add(new List <string>(curObj)); curObj.Clear(); } return(result); }
/// <summary> /// Example of using /// </summary> /// <param name="info"></param> /// <param name="outputFilename"></param> public static void CreateClearedFileFromMessedUp(PDFTextFileInfo info, string outputFilename) { var linesFromFile = File.ReadAllLines(info.ItemsFileName); var itemsLines = new List <string>(linesFromFile); itemsLines = PDFTextParser.FixBrokenWords(itemsLines); File.WriteAllLines(info.ItemsFileName + "_FixedWords.txt", itemsLines); itemsLines = PDFTextParser.SplitLinesByKeywords(itemsLines, info.KeywordsList); File.WriteAllLines(info.ItemsFileName + "_SplitByKeyWords.txt", itemsLines); var objects = PDFTextParser.SplitItemsToObjects(info, itemsLines.ToArray()); var smthToWrite = new List <string>(); objects.ForEach(x => x.ForEach(s => smthToWrite.Add(s))); File.WriteAllLines(info.ItemsFileName + "_Cleared.txt", smthToWrite); }
/// <summary> /// Helper: Creates features for the object except it's title /// </summary> /// <param name="lines"></param> /// <param name="index"></param> /// <param name="curObj"></param> /// <param name="info"></param> /// <returns></returns> private static int BuildCurrentObject(string[] lines, int index, List <string> curObj, PDFTextFileInfo info) { // read current object body starting from the next string for (int j = index + 1; j < lines.Length; j++) { if (string.IsNullOrEmpty(lines[j])) { if (j + 1 == lines.Length) { return(j); } continue; } // if next line is not the end if (j + 1 < lines.Length) { // If this line is title - start new object if (lines[j + 1].Contains(info.KeywordsList.First())) { return(j - 1); } } else { // if it was the last line, check for keyword for the last time // or add to exixsting object and finish if (info.KeywordsList.Any(s => lines[j].Contains(s))) { curObj.Add(lines[j]); } else { curObj[curObj.Count - 1] += lines[j] + " "; } return(j); } // check line for Keyword and add new line if (info.KeywordsList.Any(s => lines[j].Contains(s))) { curObj.Add(""); } // if it's table, than build it OR add normal keyword/line if (info.TableKeyword != null && lines[j].Contains(info.TableKeyword)) { curObj[curObj.Count - 1] += lines[j] + " "; j = BuildTable(lines, j, curObj, info.KeywordsList); } else { curObj[curObj.Count - 1] += lines[j] + " "; } } return(-1); }