/* ------------------------------------------------------------------------------------- * Name: ConvertOneOpsToCmdi * Goal: Create a .cmdi file with metadata for the indicated input file * Note: the .cmdi file will be placed in the same directory as FileIn * Parameters: sFileIn - File to be processed * bForce - Create result, even though it exists already * bIsDebug - Debugging mode on or off * History: * 1/feb/2016 ERK Created ------------------------------------------------------------------------------------- */ public bool ConvertOneOpsToCmdi(String sFileIn, ref osrMovie objMovie, bool bForce, bool bIsDebug) { try { // Validate if (!File.Exists(sFileIn)) return false; // Check if this is a symbolic link if (File.GetAttributes(sFileIn).HasFlag(FileAttributes.ReparsePoint) || !util.General.CanReadFile(sFileIn)) { // The input file is a symbolic link --> skip it, because it should already have been done return true; } // Get the output directory String sDirOut = Path.GetDirectoryName(sFileIn); // Determine file names String sFileInXml = sFileIn.Replace(".gz", ""); String sFileCmdi = sFileIn.Replace(".folia.xml.gz", ".cmdi.xml"); // Do we need to continue? if (!bForce && File.Exists(sFileCmdi)) { errHandle.Status("OpsToCmdi skips: " + sFileInXml); return true; } // Decompress input .gz file if (!util.General.DecompressFile(sFileIn, sFileInXml)) return false; // Read the input file's header as xml conv.XmlConv oConv = new conv.XmlConv(this.errHandle); XmlNode ndxHeader = null; XmlNamespaceManager nsFolia = null; if (!oConv.getFoliaHeader(sFileInXml, ref ndxHeader, ref nsFolia)) return false; if (ndxHeader != null) { // Get the subtitle id and the idmovie String sIdSubtitle = ndxHeader.SelectSingleNode("./child::f:meta[@id = 'idsubtitle']", nsFolia).InnerText; String sIdMovie = ndxHeader.SelectSingleNode("./child::f:meta[@id = 'idmovie']", nsFolia).InnerText; // Check if we already have information from this idmovie XmlNodeList ndxList = null; XmlNode ndxMovie = null; if (objMovie.getInformation(sIdMovie, ref ndxList, ref ndxMovie)) { bool bHaveInfo = false; XmlNode ndxSubtitle = null; // Walk the list to get the correct subtitle production for this movie for (int i=0;i<ndxList.Count;i++) { // Access this <subtitle> object XmlNode ndxSubThis = ndxList[i].SelectSingleNode("./child::IDSubtitle"); // Got any results? if (ndxSubThis != null) { // Is this the correct one? if (ndxSubThis.InnerText == sIdSubtitle) { // We have the correct one ndxSubtitle = ndxList[i]; // Mark this bHaveInfo = true; // Escape the for-loop break; } } } // ============= DEBUGGING === if (!bHaveInfo && sIdMovie == "36797") { int iDebug = 1; } // Validate if (bHaveInfo) { if (sIdMovie == "36797") { int iDebug = 1; } // Get the information we need String sUserId = oTools.getXmlChildValue(ref ndxSubtitle, "UserID"); String sUserNickName = oTools.getXmlChildValue(ref ndxSubtitle, "UserNickName"); String sUserClass = oTools.getXmlChildValue(ref ndxSubtitle, "UserClass"); String sMovieName = oTools.getXmlChildValue(ref ndxSubtitle, "MovieName"); String sMovieYear = oTools.getXmlChildValue(ref ndxSubtitle, "MovieYear"); String sMovieImdbId = oTools.getXmlChildValue(ref ndxSubtitle, "MovieImdbID"); String sMovieReleaseName = oTools.getXmlChildValue(ref ndxSubtitle, "MovieReleaseName"); String sLanguageName = oTools.getXmlChildValue(ref ndxSubtitle, "LanguageName"); String sSubDate = oTools.getXmlChildValue(ref ndxSubtitle, "SubDate"); String sSeriesSeason = oTools.getXmlChildValue(ref ndxSubtitle, "SeriesSeason"); String sSeriesEpisode = oTools.getXmlChildValue(ref ndxSubtitle, "SeriesEpisode"); String sSeriesImdbParent = oTools.getXmlChildValue(ref ndxSubtitle, "SeriesIMDBParent"); String sMovieKind = oTools.getXmlChildValue(ref ndxSubtitle, "MovieKind"); String sSubTranslator = oTools.getXmlChildValue(ref ndxSubtitle, "SubTranslator"); String sSubLanguage = oTools.getXmlChildValue(ref ndxSubtitle, "ISO639"); // Additional information from the MOVIE String sSeriesName = oTools.getXmlChildValue(ref ndxMovie, "SeriesName"); String sSeriesRootName = oTools.getXmlChildValue(ref ndxMovie, "SeriesRootName"); String sEpisodeName = oTools.getXmlChildValue(ref ndxMovie, "EpisodeName"); String sMoviePlot = oTools.getXmlChildValue(ref ndxMovie, "MoviePlot"); XmlNode ndxAKA = ndxMovie.SelectSingleNode("./descendant::MovieAKA"); // Possibly get information from other places if (sMovieYear == "") sMovieYear = oTools.getXmlChildValue(ref ndxMovie, "MovieYear"); if (sMovieKind == "") sMovieKind = oTools.getXmlChildValue(ref ndxMovie, "MovieKind"); if (sSeriesSeason == "") sSeriesSeason = oTools.getXmlChildValue(ref ndxMovie, "SeriesSeason"); if (sSeriesEpisode == "") sSeriesEpisode = oTools.getXmlChildValue(ref ndxMovie, "SeriesEpisode"); // Progress errHandle.Status("Processing movie " + sIdMovie + " subtitle " + sIdSubtitle); // Create the .cmdi information var oSubtiel = new CMD(); // Add header oSubtiel.Header = new CMDHeader(); oSubtiel.Resources = new CMDResources(); oSubtiel.Resources.JournalFileProxyList = new CMDResourcesJournalFileProxyList(); oSubtiel.Resources.ResourceProxyList = new CMDResourcesResourceProxyList(); oSubtiel.Resources.ResourceRelationList = new CMDResourcesResourceRelationList(); // Access the main component oSubtiel.Components = new CMDComponents(); // Add header oSubtiel.Components.SUBTIEL = new CMDComponentsSUBTIEL(); CMDComponentsSUBTIEL oSubt = oSubtiel.Components.SUBTIEL; // (2) add the information above to the correct parts // (2a) Populate the MOVIE part oSubtiel.Components.SUBTIEL.Movie = new CMDComponentsSUBTIELMovie(); oSubt.Movie.MovieId = sIdMovie; oSubt.Movie.Name = sMovieName; oSubt.Movie.Year = sMovieYear; oSubt.Movie.ImdbId = sMovieImdbId; oSubt.Movie.Kind = sMovieKind; oSubt.Movie.Plot = sMoviePlot; // (2a') add one or more alternative name parts oSubtiel.Components.SUBTIEL.Movie.AltNameList = new CMDComponentsSUBTIELMovieAltNameList(); if (ndxAKA != null) { // Find alternative names List<String> lstAlt = new List<string>(); while (ndxAKA != null) { lstAlt.Add(ndxAKA.InnerText); // Find next name ndxAKA = ndxAKA.SelectSingleNode("./following-sibling::MovieAKA"); } // Add this list of names oSubt.Movie.AltNameList.AltName = lstAlt.ToArray(); } // (2b) Should we add a Series part? if (sSeriesSeason != "" || sSeriesEpisode != "" || sSeriesImdbParent != "") { // Add a Series part oSubtiel.Components.SUBTIEL.Movie.Series = new CMDComponentsSUBTIELMovieSeries(); oSubt.Movie.Series.Name = sSeriesName; oSubt.Movie.Series.RootName = sSeriesRootName; oSubt.Movie.Series.Season = new CMDComponentsSUBTIELMovieSeriesSeason(); oSubt.Movie.Series.Season.Value = sSeriesSeason; oSubt.Movie.Series.Season.Name = ""; oSubt.Movie.Series.Episode = new CMDComponentsSUBTIELMovieSeriesEpisode(); oSubt.Movie.Series.Episode.Value = sSeriesEpisode; oSubt.Movie.Series.Episode.Name = sEpisodeName; oSubt.Movie.Series.ParentImdbId = sSeriesImdbParent; } // (2c) Add a Release part oSubtiel.Components.SUBTIEL.Release = new CMDComponentsSUBTIELRelease(); oSubt.Release.Name = sMovieReleaseName; oSubt.Release.countryCode = ""; // To be filled in later // (2d) Build the Subtitle part oSubtiel.Components.SUBTIEL.Subtitle = new CMDComponentsSUBTIELSubtitle(); oSubt.Subtitle.SubtitleId = sIdSubtitle; oSubt.Subtitle.languageCode = sSubLanguage; oSubt.Subtitle.targetCountry = ""; // To be determined later oSubt.Subtitle.Date = sSubDate; oSubt.Subtitle.textHash = ""; // similarity hash // (2e) Create a licence part oSubtiel.Components.SUBTIEL.Subtitle.License = new CMDComponentsSUBTIELSubtitleLicense(); oSubt.Subtitle.License.LicenseCode = ""; // To be determined oSubt.Subtitle.License.LicenseDate = ""; // To be determined oSubt.Subtitle.License.LicenseDetails = sSubTranslator; String sSubLicense = (sSubTranslator == "") ? "" : "subtranslator"; oSubt.Subtitle.License.LicenseType = sSubLicense; // (2f) Create a Subtitler/Author part oSubtiel.Components.SUBTIEL.Subtitle.Author = new CMDComponentsSUBTIELSubtitleAuthor(); oSubt.Subtitle.Author.Age = ""; oSubt.Subtitle.Author.Pseudonym = sUserNickName; oSubt.Subtitle.Author.Name = ""; oSubt.Subtitle.Author.UserClass = sUserClass; oSubt.Subtitle.Author.UserID = sUserId; // (2g) Create a residence place for the author oSubtiel.Components.SUBTIEL.Subtitle.Author.ResidencePlace = new CMDComponentsSUBTIELSubtitleAuthorResidencePlace(); oSubt.Subtitle.Author.ResidencePlace.countryCode = ""; oSubt.Subtitle.Author.ResidencePlace.Town = ""; // TODO: calculate hash and statistics... oSubtiel.Components.SUBTIEL.Subtitle.Statistics = new CMDComponentsSUBTIELSubtitleStatistics(); oSubt.Subtitle.Statistics.nSentences = 0; oSubt.Subtitle.Statistics.nWords = 0; oSubtiel.Components.SUBTIEL.Subtitle.StatusInfo = new CMDComponentsSUBTIELSubtitleStatusInfo(); oSubt.Subtitle.StatusInfo.link = "none"; // Serialize into output var serializer = new System.Xml.Serialization.XmlSerializer(typeof(CMD)); using (var stream = new StreamWriter(sFileCmdi)) serializer.Serialize(stream, oSubtiel); } else { errHandle.Status("OpsToCmdi no information for: " + sFileInXml); } } } // Remove the xml file again File.Delete(sFileInXml); return true; } catch (Exception ex) { errHandle.DoError("oprConv/ConvertOneOpsToCmdi", ex); return false; } }
// Command-line entry point + argument handling static void Main(string[] args) { String sInput = ""; // Input file or dir String sOutput = "/scratch/ekomen/out/"; // Output directory, if specified String sLanguage = "dut"; // This is the language abbreviation used in [osrMovie.cs] for sBaseUrl String sDict = ""; // Movie dictionary bool bIsDebug = false; // Debugging bool bForce = false; // Force bool bOview = false; // Make overview or not bool bSkip = false; // Skip everything that has *not* been made String sAction = "cmdi"; // Type of action to be taken try { // Check command-line options for (int i = 0; i < args.Length; i++) { // get this argument String sArg = args[i].Trim(); if (sArg.StartsWith("-")) { errHandle.Status("Processing argument [" + sArg + "]"); // Check out the arguments switch (sArg.Substring(1)) { case "i": // Input file or directory with .folia.xml files sInput = args[++i]; break; case "f": // Force bForce = true; break; case "s": // Skip bSkip = true; break; case "m": // Movie dictionary -- Tab-separated list from opensubtitles.org sDict = args[++i]; break; case "o": // Output directory sOutput = args[++i]; break; case "h": // Calculate hashes and add them to existing .cmdi.xml files sAction = "hash"; break; case "v": // Make an overview bOview = true; break; case "d": // Debugging bIsDebug = true; break; case "l": // Language (three letter code) sLanguage = args[++i]; break; } } else if (sArg == "" || sArg == "\r") { // Do nothing } else { // Throw syntax error and leave SyntaxError("1 - i=" + i + " args=" + args.Length + " argCurrent=[" + sArg + "]"); return; } } // Check presence of input/output if (sInput == "") { SyntaxError("2"); return; } // Initialize the main entry point for the conversion oprConv objConv = new oprConv(errHandle); osrMovie objMovie = new osrMovie(errHandle, sLanguage); omdbapi objOmdb = new omdbapi(errHandle); // Set directory for conversion objConv.dirRoot(sOutput); // Load the movie dictionary if (!objConv.loadMovieDictionary(sDict)) { errHandle.DoError("Main", "Could not load movie dictionary from [" + sDict + "]"); return; } // Initialise the Treebank Xpath functions, which may make use of tb:matches() util.XPathFunctions.conTb.AddNamespace("tb", util.XPathFunctions.TREEBANK_EXTENSIONS); // Check if the input is a directory or file if (Directory.Exists(sInput)) { WalkDirectoryTree(sInput, "*.folia.xml.gz", sInput, bForce, bSkip, bIsDebug, sAction, ref objConv, ref objMovie); } else { // Show we don't have input file errHandle.DoError("Main", "Cannot find input file(s) in: " + sInput); } // Calculate for each file which others are close to it // - try to determine the license information for the best matching .cmdi.xml files // - add some more meta-information to the .cmdi.xml files objConv.findDuplicates(ref lSubInst, 3, ref objOmdb); // Create an overview - if required if (bOview) { String sOview = objConv.getDistanceOview(); // Save it in a standard file String sFileCsv = Path.GetDirectoryName(sInput) + "/oview.csv"; File.WriteAllText(sFileCsv, sOview); } // Exit the program Console.WriteLine("Ready"); } catch (Exception ex) { errHandle.DoError("Main", ex); // Provide standard error message throw; } }
// Command-line entry point + argument handling static void Main(string[] args) { String sInput = ""; // Input file or dir String sOutput = "/scratch/ekomen/out/"; // Output directory, if specified String sLanguage = "dut"; // This is the language abbreviation used in [osrMovie.cs] for sBaseUrl String sDict = ""; // Movie dictionary bool bIsDebug = false; // Debugging bool bForce = false; // Force bool bOview = false; // Make overview or not bool bSkip = false; // Skip everything that has *not* been made String sAction = "cmdi"; // Type of action to be taken try { // Check command-line options for (int i = 0; i < args.Length; i++) { // get this argument String sArg = args[i].Trim(); if (sArg.StartsWith("-")) { errHandle.Status("Processing argument ["+sArg+"]"); // Check out the arguments switch (sArg.Substring(1)) { case "i": // Input file or directory with .folia.xml files sInput = args[++i]; break; case "f": // Force bForce = true; break; case "s": // Skip bSkip = true; break; case "m": // Movie dictionary -- Tab-separated list from opensubtitles.org sDict = args[++i]; break; case "o": // Output directory sOutput = args[++i]; break; case "h": // Calculate hashes and add them to existing .cmdi.xml files sAction = "hash"; break; case "v": // Make an overview bOview = true; break; case "d": // Debugging bIsDebug = true; break; case "l": // Language (three letter code) sLanguage = args[++i]; break; } } else if (sArg == "" || sArg == "\r") { // Do nothing } else { // Throw syntax error and leave SyntaxError("1 - i=" + i + " args=" + args.Length + " argCurrent=[" + sArg + "]"); return; } } // Check presence of input/output if (sInput == "" ) { SyntaxError("2"); return; } // Initialize the main entry point for the conversion oprConv objConv = new oprConv(errHandle); osrMovie objMovie = new osrMovie(errHandle, sLanguage); omdbapi objOmdb = new omdbapi(errHandle); // Set directory for conversion objConv.dirRoot(sOutput); // Load the movie dictionary if (!objConv.loadMovieDictionary(sDict)) { errHandle.DoError("Main", "Could not load movie dictionary from [" + sDict + "]"); return; } // Initialise the Treebank Xpath functions, which may make use of tb:matches() util.XPathFunctions.conTb.AddNamespace("tb", util.XPathFunctions.TREEBANK_EXTENSIONS); // Check if the input is a directory or file if (Directory.Exists(sInput)) { WalkDirectoryTree(sInput, "*.folia.xml.gz", sInput, bForce, bSkip, bIsDebug, sAction, ref objConv, ref objMovie); } else { // Show we don't have input file errHandle.DoError("Main", "Cannot find input file(s) in: " + sInput); } // Calculate for each file which others are close to it // - try to determine the license information for the best matching .cmdi.xml files // - add some more meta-information to the .cmdi.xml files objConv.findDuplicates(ref lSubInst, 3, ref objOmdb); // Create an overview - if required if (bOview) { String sOview = objConv.getDistanceOview(); // Save it in a standard file String sFileCsv = Path.GetDirectoryName(sInput) + "/oview.csv"; File.WriteAllText(sFileCsv, sOview); } // Exit the program Console.WriteLine("Ready"); } catch (Exception ex) { errHandle.DoError("Main", ex); // Provide standard error message throw; } }
/// <summary> /// WalkDirectoryTree -- /// Recursively walk the directory starting with @sStartDir /// Execute conversion on any .gz file encountered using @objConv /// </summary> /// <param name="sStartDir"></param> /// <param name="sFilter"></param> /// <param name="sInput"></param> /// <param name="bForce"></param> /// <param name="bIsDebug"></param> /// <param name="sAction">The action to be taken: "cmdi", "hash"</param> /// <param name="objConv"></param> /// <param name="objMovie"></param> /// <param name="objOmdb"></param> static void WalkDirectoryTree(String sStartDir, String sFilter, String sInput, bool bForce, bool bSkip, bool bIsDebug, String sAction, ref oprConv objConv, ref osrMovie objMovie) { String[] arFiles = null; String[] arSubDirs = null; // Exclude 'raw' if (sStartDir.Contains("/raw/") || sStartDir.Contains("\\raw\\")) { return; } // First, process all the files directly under this folder try { arFiles = Directory.GetFiles(sStartDir, sFilter); } // This is thrown if even one of the files requires permissions greater // than the application provides. catch (UnauthorizedAccessException e) { // Only give warning errHandle.Status(e.Message); } catch (System.IO.DirectoryNotFoundException e) { errHandle.Status(e.Message); } // Check if all is valid if (arFiles != null) { // bSkip = true; // Walk all files in this directory foreach (String sFile in arFiles) { // What we do here depends on the action identified switch (sAction) { case "cmdi": // Parse this input file to the output directory if (!objConv.ConvertOneOpsToCmdi(sFile, ref objMovie, bForce, bIsDebug)) { errHandle.DoError("Main", "Could not convert file [" + sFile + "]"); return; } break; case "hash": if (bSkip) { if (!objConv.HarvestHashFromCmdi(sFile, ref lSubInst)) { errHandle.DoError("Main", "Could not harvest hash for file [" + sFile + "]"); return; } } else { // Calculate the HASH of this .folia.xml file, and put it into the existing CMDI if (!objConv.CalculateHashToCmdi(sFile, ref lSubInst, bIsDebug)) { errHandle.DoError("Main", "Could not calculate hash for file [" + sFile + "]"); return; } } break; } } // Now find all the subdirectories under this directory. arSubDirs = Directory.GetDirectories(sStartDir); // Walk all directories foreach (String sDirName in arSubDirs) { // Resursive call for each subdirectory. WalkDirectoryTree(sDirName, sFilter, sInput, bForce, bSkip, bIsDebug, sAction, ref objConv, ref objMovie); } } }
/// <summary> /// WalkDirectoryTree -- /// Recursively walk the directory starting with @sStartDir /// Execute conversion on any .gz file encountered using @objConv /// </summary> /// <param name="sStartDir"></param> /// <param name="sFilter"></param> /// <param name="sInput"></param> /// <param name="bForce"></param> /// <param name="bIsDebug"></param> /// <param name="sAction">The action to be taken: "cmdi", "hash"</param> /// <param name="objConv"></param> /// <param name="objMovie"></param> /// <param name="objOmdb"></param> static void WalkDirectoryTree(String sStartDir, String sFilter, String sInput, bool bForce, bool bSkip, bool bIsDebug, String sAction, ref oprConv objConv, ref osrMovie objMovie) { String[] arFiles = null; String[] arSubDirs = null; // Exclude 'raw' if (sStartDir.Contains("/raw/") || sStartDir.Contains("\\raw\\")) return; // First, process all the files directly under this folder try { arFiles = Directory.GetFiles(sStartDir, sFilter); } // This is thrown if even one of the files requires permissions greater // than the application provides. catch (UnauthorizedAccessException e) { // Only give warning errHandle.Status(e.Message); } catch (System.IO.DirectoryNotFoundException e) { errHandle.Status(e.Message); } // Check if all is valid if (arFiles != null) { // bSkip = true; // Walk all files in this directory foreach (String sFile in arFiles) { // What we do here depends on the action identified switch(sAction) { case "cmdi": // Parse this input file to the output directory if (!objConv.ConvertOneOpsToCmdi(sFile, ref objMovie, bForce, bIsDebug)) { errHandle.DoError("Main", "Could not convert file [" + sFile + "]"); return; } break; case "hash": if (bSkip) { if (!objConv.HarvestHashFromCmdi(sFile, ref lSubInst)) { errHandle.DoError("Main", "Could not harvest hash for file [" + sFile + "]"); return; } } else { // Calculate the HASH of this .folia.xml file, and put it into the existing CMDI if (!objConv.CalculateHashToCmdi(sFile, ref lSubInst, bIsDebug)) { errHandle.DoError("Main", "Could not calculate hash for file [" + sFile + "]"); return; } } break; } } // Now find all the subdirectories under this directory. arSubDirs = Directory.GetDirectories(sStartDir); // Walk all directories foreach (String sDirName in arSubDirs) { // Resursive call for each subdirectory. WalkDirectoryTree(sDirName, sFilter, sInput, bForce, bSkip, bIsDebug, sAction, ref objConv, ref objMovie); } } }