// ==================================== METHODS ======================================= public XmlNode getNextLinkGrp() { XmlNode ndBack = null; try { // Validate if (!bInit) { return(null); } // (1) Walk through the bare folia input file while (!rdFileXml.EOF && rdFileXml.Read()) { // (2) Check the input element if (rdFileXml.IsStartElement("linkGrp")) { // (3) Read this element as a string String sWholeS = rdFileXml.ReadOuterXml(); // (4) Place this into a new xml Document pdxThis.LoadXml(sWholeS); // Return the correct link ndBack = pdxThis.SelectSingleNode("./descendant-or-self::linkGrp"); return(ndBack); } } // Getting here means: we are too far return(null); } catch (Exception ex) { errHandle.DoError("getNextLinkGrp", ex); // Provide standard error message return(null); } }
// ------------------------------------------------------------------------------------ // Name: DoLike // Goal: Perform the "Like" function using the pattern (or patterns) stored in [strPattern] // There can be more than 1 pattern in [strPattern], which must be separated // by a vertical bar: | // History: // 17-06-2010 ERK Created // ------------------------------------------------------------------------------------ private bool DoLike(string strText, string strPattern) { string[] arPattern = null; // Array of patterns int intI = 0; // Counter try { // Reduce the [strPattern] strPattern = strPattern.Trim(); // ============== DEBUG ============== // If (strPattern = "a") Then Stop // =================================== // SPlit the [strPattern] into different ones arPattern = strPattern.Split(new string[] { "|" }, StringSplitOptions.None); // Perform the "Like" operation for all needed patterns for (intI = 0; intI < arPattern.Length; intI++) { // See if something positive comes out of this comparison if (strText.IsLike(strPattern)) { return(true); } } // No match has happened, so return false return(false); } catch (Exception ex) { // Show error errHandle.DoError("XpathExt/DoLike", ex); // Return failure return(false); } }
// ---------------------------------------------------------------------------------------------------------- // Name : AddEtreeChild // Goal : Add an <eTree> child under [ndxParent] // History: // 26-04-2011 ERK Created // ---------------------------------------------------------------------------------------------------------- public XmlNode AddEtreeChild(ref XmlNode ndxParent, int intId, string strLabel, int intSt, int intEn) { try { // Add the child return(oXmlTools.AddXmlChild(ndxParent, "eTree", "Id", intId.ToString(), "attribute", "Label", strLabel, "attribute", "from", intSt.ToString(), "attribute", "to", intEn.ToString(), "attribute")); } catch (Exception ex) { // Warn the user errHandle.DoError("modXmlNode/AddEtreeChild", ex); // Return failure return(null); } }
/* ------------------------------------------------------------------------------------- * Name: getInfo * Goal: Use the 'omdbapi' site to retrieve information on the movie with the indicated @imdbId * Parameters: sImdbId - The ImdbId of the movie we are interested in * History: * 24/feb/2016 ERK Created * ------------------------------------------------------------------------------------- */ public MovieInfo getInfo(String sImdbId) { try { // Create the object we will return MovieInfo oBack = new MovieInfo(); // Create the imdbid string: "tt" + exactly 7 numbers, possibly prepended by '0' int iImdbId = Convert.ToInt32(sImdbId); sImdbId = "tt" + iImdbId.ToString("D7"); // Create the request string String sRequest = sApiStart.Replace("@tt", sImdbId); // Make the request WebRequest request = WebRequest.Create(sRequest); request.Method = "GET"; WebResponse response = request.GetResponse(); StringBuilder sbReply = new StringBuilder(); // Process the result using (Stream strResponse = response.GetResponseStream()) using (StreamReader rdThis = new StreamReader(strResponse)) { Char[] readBuff = new Char[iBufSize]; int iCount = rdThis.Read(readBuff, 0, iBufSize); while (iCount > 0) { // Append the information to the stringbuilder sbReply.Append(new String(readBuff, 0, iCount)); // Make a follow-up request iCount = rdThis.Read(readBuff, 0, iBufSize); } } // Convert the XML reply to a processable object XmlDocument pdxReply = new XmlDocument(); pdxReply.LoadXml(sbReply.ToString()); // Get to the information XmlNode ndxInfo = pdxReply.SelectSingleNode("./descendant-or-self::movie"); if (ndxInfo == null) { return(null); } // Fill the object we will return // Iterate over all the properties of object 'MovieInfo' // NOTE: they have to be implemented as 'properties' of the class... foreach (PropertyInfo prop in typeof(MovieInfo).GetProperties()) { // Set the value of oBack's property using the information in the Xml node prop.SetValue(oBack, ndxInfo.Attributes[prop.Name].Value); } // Read the reply as a return(oBack); } catch (Exception ex) { errHandle.DoError("oprConv/getInfo", ex); return(null); } }
// ================= Methods for this class ================================================== /* ------------------------------------------------------------------------------------- * Name: getFoliaHeader * Goal: Get the <metadata> header of the .folia.xml file @sFile * Parameters: sFile - File to be processed * ndxHeader - Returned XmlNode to the <metadata> header * History: * 1/feb/2016 ERK Created * ------------------------------------------------------------------------------------- */ public bool getFoliaHeader(String sFile, ref XmlNode ndxHeader, ref XmlNamespaceManager nsFolia) { try { // Validate if (!File.Exists(sFile)) { return(false); } // Create a new XmlDocument pdxThis = new XmlDocument(); // Initialisations ndxHeader = null; // Start reading file using (StreamReader rdFileTmp = new StreamReader(sFile)) using (XmlReader rdFolia = XmlReader.Create(rdFileTmp)) { // (1) Walk through the bare folia input file while (!rdFolia.EOF && rdFolia.Read()) { // (2) Check the input element if (rdFolia.IsStartElement("metadata")) { // (3) Read this as string String sWholeS = rdFolia.ReadOuterXml(); // (4) Place this into a new xml Document pdxThis.LoadXml(sWholeS); // (2) Create a namespace mapping for the opensubtitles *source* xml document nsFolia = new XmlNamespaceManager(pdxThis.NameTable); nsFolia.AddNamespace("f", pdxThis.DocumentElement.NamespaceURI); // (5) Return the header ndxHeader = pdxThis.SelectSingleNode("./descendant-or-self::f:metadata", nsFolia); break; } } } // Return success return(true); } catch (Exception ex) { errHandle.DoError("getFoliaHeader", ex); // Provide standard error message return(false); } }
// ================================ METHODS =================================================== /* ------------------------------------------------------------------------------------- * Name: repairOneFolia * Goal: Check and repair one folia file * History: * 21/mar/2016 ERK Created * ------------------------------------------------------------------------------------- */ public bool repairOneFolia(String sFileFoliaGz) { try { // Get the unzipped file name String sFileFolia = sFileFoliaGz.Replace(".gz", ""); // Unzip the file if (!General.DecompressFile(sFileFoliaGz, sFileFolia)) { errHandle.DoError("cmbConv/repairOneFolia", "Could not decompress"); return(false); } // Check for empty begintime/endtime String[] arLine = File.ReadAllLines(sFileFolia); /* * if (sFileFoliaGz.Contains("S-O_00214801")) { * int iStop = 2; * } */ bool bChanged = false; int iChanges = 0; for (int i = 0; i < arLine.Length; i++) { String sLine = arLine[i]; // Check the syntax of the begintime and the endtime if (adaptTime(ref sLine, sLine.IndexOf("begintime="))) { bChanged = true; } if (adaptTime(ref sLine, sLine.IndexOf("endtime="))) { bChanged = true; } /* * if (sLine.Contains("begintime=\"\"")) { * sLine = sLine.Replace("begintime=\"\"", "begintime=\"00:00:00.000\""); * bChanged = true; * } * if (sLine.Contains("endtime=\"\"")) { * sLine = sLine.Replace("endtime=\"\"", "endtime=\"00:00:00.000\""); * bChanged = true; * } */ if (bChanged) { arLine[i] = sLine; iChanges++; bChanged = false; } } // Only save results if something changed if (iChanges > 0) { File.WriteAllLines(sFileFolia, arLine); // Show repair log errHandle.Status("Repaired file: [" + sFileFoliaGz + "] (" + iChanges + " repairs)"); // And compress into .gz if (!General.CompressFile(sFileFolia, sFileFoliaGz)) { errHandle.DoError("cmbConv/repairOneFolia", "Could not compress"); return(false); } } else { // Show repair log errHandle.Status("Unchanged: [" + sFileFoliaGz + "]"); } // Remove the unzipped file again File.Delete(sFileFolia); // Return positively return(true); } catch (Exception ex) { errHandle.DoError("cmdConv/repairOneFolia", ex); return(false); } }
// ------------------------------------------------------------------------------------ // Name: AddXmlChild // Goal: Make a new XmlNode element of type [strTag] using the [arValue] values // These values consist of: // (a) itemname // (b) itemvalue // (c) itemtype: "attribute" or "child" // Append this node as child under [ndxParent] // Return: The XmlNode element that has been made is returned // History: // 22-09-2010 ERK Created // ------------------------------------------------------------------------------------ public XmlNode AddXmlChild(XmlNode ndxParent, string strTag, params string[] arValue) { XmlNode ndxThis = null; // Working node XmlNode ndxChild = null; // Child node XmlAttribute atxChild = null; // The attribute we are looking for int intI = 0; // Counter try { // Validate (NB: we DO allow empty parents) if ((string.IsNullOrEmpty(strTag)) || (pdxDoc == null)) { return(null); } // Make a new XmlNode in the local XML document if (string.IsNullOrEmpty(strNs)) { ndxThis = pdxDoc.CreateNode(XmlNodeType.Element, strTag, null); } else { ndxThis = pdxDoc.CreateNode(XmlNodeType.Element, strTag, strNs); } // Validate if (ndxThis == null) { return(null); } // Do we have a parent? if (ndxParent == null) { // Take the document as starting point pdxDoc.AppendChild(ndxThis); } else { // Just append it ndxParent.AppendChild(ndxThis); } // Walk through the values for (intI = 0; intI <= arValue.GetUpperBound(0); intI += 3) { // Action depends on the type of value switch (arValue[intI + 2]) { case "attribute": // Is the attribute there already? atxChild = ndxThis.Attributes[arValue[intI]]; if (atxChild == null) { // Create attribute atxChild = pdxDoc.CreateAttribute(arValue[intI]); // Append attribute to this node ndxThis.Attributes.Append(atxChild); } // Fill in value of this attribute atxChild.Value = arValue[intI + 1]; break; case "child": // Create this node if (string.IsNullOrEmpty(strNs)) { ndxChild = pdxDoc.CreateNode(XmlNodeType.Element, arValue[intI], null); } else { ndxChild = pdxDoc.CreateNode(XmlNodeType.Element, arValue[intI], strNs); } // Append this node as child ndxThis.AppendChild(ndxChild); // Fill in the value of this node ndxChild.InnerText = arValue[intI + 1]; break; case "text": // Add the text as inner text ndxThis.InnerText = arValue[intI + 1]; break; default: // There is no other option yet, so return failure return(null); } } // Return the new node return(ndxThis); } catch (Exception ex) { // Warn user errHandle.DoError("modXmlNode/AddXmlChild", ex); // Return failure return(null); } }
static List <String> lstCmdi = null; // List of .cmdi.xml files in the [sCmdi] directory // Command-line entry point + argument handling static void Main(string[] args) { String sLanguage = ""; // Which language to take (two-letter code 'en' or 'nl') String sFolia = ""; // Base directory to take (e.g. /vol/tensusers/ekomen) String sSubtiel = ""; // Base directory for the output (e.g: /vol/tensusers/ekomen/subtiel) String sCmdi = ""; // Base directory for CMDI files String sAction = "combi"; // The particular type of action expected. Default "combi" bool bIsDebug = false; // Debugging try { // Check command-line options for (int i = 0; i < args.Length; i++) { // get this argument String sArg = args[i]; if (sArg.StartsWith("-")) { // Check out the arguments switch (sArg.Substring(1)) { case "f": // Root directory under which the .folia.xml.gz files are located sFolia = Path.GetFullPath(args[++i]); break; case "c": // Root directory where the CMDI files are located sCmdi = Path.GetFullPath(args[++i]); break; case "o": // Root directory where the output files are going to be stored sSubtiel = Path.GetFullPath(args[++i]); break; case "d": // Debugging bIsDebug = true; break; case "l": // Language (three letter code) sLanguage = args[++i]; break; case "r": // INSTEAD of 'combi', perform 'REPAIR' action sAction = "repair"; break; } } else { // Throw syntax error and leave SyntaxError("1 - i=" + i + " args=" + args.Length + " argCurrent=[" + sArg + "]"); return; } } // Check presence of input/output if (sFolia == "" || !Directory.Exists(sFolia)) { SyntaxError("No (valid) base directory for FoLiA input"); return; } // Initialise the Treebank Xpath functions, which may make use of tb:matches() XPathFunctions.conTb.AddNamespace("tb", XPathFunctions.TREEBANK_EXTENSIONS); // Create a new instance of the combination class cmbConv oConv = new cmbConv(errHandle); // Other directories depend on ACTION switch (sAction) { case "repair": // No further directories need be present oConv.output = sFolia; // Process all the directories with this action WalkDirectoryTree(sFolia, "*.folia.xml.gz", sAction, ref oConv); break; default: if (sCmdi == "" || !Directory.Exists(sCmdi)) { SyntaxError("No (valid) cmdi directory"); return; } if (sSubtiel == "") { SyntaxError("No subtiel directory"); return; } // If the target directory is not there, create it if (!Directory.Exists(sSubtiel)) { Directory.CreateDirectory(sSubtiel); } oConv.output = sSubtiel; // find .cmdi.xml files errHandle.Status("Finding .cmdi.xml files..."); lstCmdi = Directory.GetFiles(sCmdi, "*.cmdi.xml", SearchOption.AllDirectories).ToList(); // Convert the list of files into a dictionary oConv.cmdi(lstCmdi); // Find .folia.xml.gz files errHandle.Status("Finding directories in " + sFolia); // Walk all directories errHandle.Status("Processing .folia.xml.gz files..."); // Output the header oConv.doHeaderCsv(); // Resursive call for each subdirectory. WalkDirectoryTree(sFolia, "*.folia.xml.gz", sAction, ref oConv); // Save the harvested information to an xml file if (!oConv.harvestSaveXml(sSubtiel + "/harvest.xml")) { errHandle.DoError("Main", "Could not create harvest summary xml file"); return; } break; } // Exit the program errHandle.Status("Ready"); } catch (Exception ex) { errHandle.DoError("Main", ex); // Provide standard error message throw; } }
// =================== Local variables =============================================== // Command-line entry point + argument handling static void Main(string[] args) { String sDutch = ""; // Input directory for DUTCH subtitles String sEnglish = ""; // Input directory for ENGLISH subtitles String sOutput = ""; // Output directory String sParallel = ""; // File in .xml format that contains the parallel between EN-NL bool bIsDebug = false; // Debugging bool bForce = false; // Force String sAction = "english"; // Type of action to be taken try { // Check command-line options for (int i = 0; i < args.Length; i++) { // get this argument String sArg = args[i]; if (sArg.StartsWith("-")) { // Check out the arguments switch (sArg.Substring(1)) { case "p": // Xml file containing the parallels between Eng and NL sParallel = args[++i]; break; case "n": // Input directory for Dutch: .folia.xml and .cmdi.xml files sDutch = args[++i]; break; case "e": // Input directory for English: .folia.xml and .cmdi.xml files sEnglish = args[++i]; break; case "o": // Top output directory sOutput = args[++i]; break; case "f": // Force bForce = true; break; case "a": // Action sAction = args[++i].ToLower(); if (sAction != "english") { SyntaxError("The only action right now is 'english'"); return; } break; case "d": // Debugging bIsDebug = true; break; default: // Throw syntax error and leave SyntaxError("Unknown option: [" + sArg + "]"); return; } } else { // Throw syntax error and leave SyntaxError("1 - i=" + i + " args=" + args.Length + " argCurrent=[" + sArg + "]"); return; } } // Check presence of input/output if (sDutch == "" || sEnglish == "") { SyntaxError("Both dutch and english input must be specified"); return; } if (sParallel == "") { SyntaxError("The XML file containing the Dutch-English parallels must be specified"); return; } // Check input directory and parallels file if (!Directory.Exists(sDutch)) { errHandle.DoError("Main", "Cannot find Dutch input file(s) in: " + sDutch); return; } if (!Directory.Exists(sEnglish)) { errHandle.DoError("Main", "Cannot find English input file(s) in: " + sEnglish); return; } if (!File.Exists(sParallel)) { errHandle.DoError("Main", "Cannot find parallel file in: " + sParallel); return; } // Initialize the main entry point for the conversion engConv objConv = new engConv(errHandle); // Set directories where input is situated and output should come objConv.dutch = sDutch; objConv.english = sEnglish; objConv.output = sOutput; // Initialise the Treebank Xpath functions, which may make use of tb:matches() opsub.util.XPathFunctions.conTb.AddNamespace("tb", opsub.util.XPathFunctions.TREEBANK_EXTENSIONS); // Start reading the parallels XML ParReader oParallel = new ParReader(sParallel, errHandle); // Walk all the <linkGrp> elements XmlReader rdLinkGrp = null; while (oParallel.getNextLinkGrp(ref rdLinkGrp)) { // Process this one if (!objConv.ConvertOneEngToFolia(rdLinkGrp, bForce, bIsDebug)) { errHandle.DoError("Main", "Could not convert English"); return; } } /* * XmlNode ndxLinkGrp = oParallel.getNextLinkGrp(); * while (ndxLinkGrp!= null) { * // Process this one * if (!objConv.ConvertOneEngToFolia(ndxLinkGrp, bForce, bIsDebug)) { errHandle.DoError("Main", "Could not convert English"); return; } * // Go to the next one * ndxLinkGrp = oParallel.getNextLinkGrp(); * } */ // Exit the program errHandle.Status("Ready"); } catch (Exception ex) { errHandle.DoError("Main", ex); // Provide standard error message throw; } }