public static IReadOnlyList <ItemStandard> Extract(ItemIdentifier ii, IXPathNavigable metadata) { // Get the primary standard var result = new List <ItemStandard>(); Extract(ii, metadata, "PrimaryStandard", result); if (result.Count == 0) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "No PrimaryStandard found in metadata."); } if (result.Count != 1) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "Found more than one PrimaryStandard.", $"count='{result.Count}'"); } // Get any secondary standards Extract(ii, metadata, "SecondaryStandard", result); // Do not return an empty result - make a blank one if necessary if (result.Count == 0) { result.Add(new ItemStandard()); } return(result); }
private static void SetCheckMatch(ItemIdentifier ii, string fieldName, string standard, ref string rDest, string value) { if (string.IsNullOrEmpty(rDest)) { rDest = value; return; } if (!string.Equals(rDest, value, System.StringComparison.Ordinal)) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, $"Standard publications specify conflicting metadata.", $"property='{fieldName}' val1='{rDest}' val1='{value}' standards='{standard}'"); } }
public static void Validate(ItemContext it, IXPathNavigable xml, int englishCharacterCount, StatAccumulator accumulator) { var attachmentFilename = FileUtility.GetAttachmentFilename(it, xml, "ASL"); if (string.IsNullOrEmpty(attachmentFilename)) { return; } ValidateFilename(attachmentFilename, it); FileFile file; if (!it.FfItem.TryGetFile(attachmentFilename, out file)) { return; } double videoSeconds; using (var stream = file.Open()) { videoSeconds = Mp4VideoUtility.GetDuration(stream) / 1000.0; } if (videoSeconds <= 0.0) { return; } double secondToCountRatio = videoSeconds / englishCharacterCount; var highStandard = TabulatorSettings.AslMean + TabulatorSettings.AslStandardDeviation * TabulatorSettings.AslToleranceInStdev; var lowStandard = TabulatorSettings.AslMean - TabulatorSettings.AslStandardDeviation * TabulatorSettings.AslToleranceInStdev; if (secondToCountRatio > highStandard || secondToCountRatio < lowStandard) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, "ASL video length doesn't correlate with text length; possible mismatch.", $"videoSeconds={videoSeconds:F3} characterCount={englishCharacterCount} ratio={secondToCountRatio:F3} meanRatio={TabulatorSettings.AslMean} tolerance={TabulatorSettings.AslToleranceInStdev*TabulatorSettings.AslStandardDeviation:F3}"); } accumulator.AddDatum(secondToCountRatio); }
private static void ValidateFilename(string fileName, ItemContext itemContext) { const string pattern = @"((stim)|(passage)|(item))_(\d+)_ASL.*\.mp4"; var match = Regex.Match(fileName, pattern, RegexOptions.IgnoreCase); if (match.Success) { if (itemContext.IsStimulus && match.Groups[1].Value.Equals("passage", StringComparison.OrdinalIgnoreCase)) { // Should be stim, but is passage ReportingUtility.ReportError(itemContext, ErrorCategory.Item, ErrorSeverity.Benign, "ASL video filename for stim is titled as 'passsage' instead of 'stim'", $"Filename: {fileName}"); } if (!match.Groups[5].Value.Equals(itemContext.ItemId.ToString(), StringComparison.OrdinalIgnoreCase)) { // Incorrect ItemId ReportingUtility.ReportError(itemContext, ErrorCategory.Item, ErrorSeverity.Severe, "ASL video filename contains an incorrect ID", $"Filename: {fileName} Expected ID: {itemContext.ItemId}"); } if (itemContext.IsStimulus && match.Groups[1].Value.Equals("item", StringComparison.OrdinalIgnoreCase)) { // Item video in stim ReportingUtility.ReportError(itemContext, ErrorCategory.Item, ErrorSeverity.Severe, "ASL video filename indicates item, but base folder is a stim", $"Filename: {fileName}"); } else if (!itemContext.IsStimulus && (match.Groups[1].Value.Equals("stim", StringComparison.OrdinalIgnoreCase) || match.Groups[1].Value.Equals("passage", StringComparison.OrdinalIgnoreCase))) { // Stim video in an item ReportingUtility.ReportError(itemContext, ErrorCategory.Item, ErrorSeverity.Severe, "ASL video filename indicates stim, but base folder is a item", $"Filename: {fileName}"); } } else { ReportingUtility.ReportError(itemContext, ErrorCategory.Item, ErrorSeverity.Degraded, "ASL video filename does not match expected pattern", $"Filename: {fileName} Pattern: {pattern}"); } }
//<summary>This method takes a <img> element tag and determines whether //the provided <img> element contains a valid "alt" attribute </summary> //<param name="image"> The <img> tag to be validated </param> private static bool ImgElementHasValidAltReference(ItemContext it, XPathNavigator contentElement, XPathNavigator imgEle, bool brailleSupported) { bool foundId = false; bool foundReadAloud = false; bool foundBrailleText = !brailleSupported; // Suppress errors if braill not supported CheckAltReference(contentElement, imgEle, ref foundId, ref foundReadAloud, ref foundBrailleText); // If not found on the image element itself, check its parent if (!foundId || !foundReadAloud || !foundBrailleText) { var parentEle = imgEle.Clone(); if (parentEle.MoveToParent()) { CheckAltReference(contentElement, parentEle, ref foundId, ref foundReadAloud, ref foundBrailleText); } } if (!foundId) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, "Img element does not contain an id attribute necessary to provide alt text.", $"Value: {StartTagXml(imgEle)}"); } else { if (!foundReadAloud) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, "Img element does not reference alt text for text-to-speech (no corresponding readAloud element).", $"Value: {StartTagXml(imgEle)}"); } if (!foundBrailleText) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, "Img element does not reference alt text for braille presentation (no corresponding brailleText element).", $"Value: {StartTagXml(imgEle)}"); } } return(foundId && foundReadAloud && foundBrailleText); }
static bool ElementsFreeOfProhibitedAttributes(ItemContext it, XPathNavigator root) { bool valid = true; XPathNavigator ele = root.Clone(); while (ele.MoveToFollowing(XPathNodeType.Element)) { if (s_prohibitedElements.TryGetValue(ele.Name, out string interferesWith)) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, $"Item content has element that may interfere with {interferesWith}.", $"element='{StartTagXml(ele)}'"); valid = false; } var attribute = ele.Clone(); if (attribute.MoveToFirstAttribute()) { do { // Check for prohibited attribute if (s_prohibitedAttributes.TryGetValue(attribute.Name, out interferesWith)) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, $"Item content has attribute that may interfere with {interferesWith}.", $"attribute='{attribute.Name}' element='{StartTagXml(ele)}'"); valid = false; } // Check for prohibited style properties else if (attribute.Name.Equals("style")) { string[] styleProps = attribute.Value.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); foreach (string prop in styleProps) { int ieq = prop.IndexOf(':'); string name; string value; if (ieq >= 0) { name = prop.Substring(0, ieq).Trim().ToLower(); value = prop.Substring(ieq + 1).Trim(); } else { name = prop.Trim().ToLower(); value = string.Empty; } // Special case for "background-color". Transparent is acceptable. if (name.Equals("background-color", StringComparison.Ordinal)) { if (!value.Equals("transparent", StringComparison.OrdinalIgnoreCase)) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, $"Item content has style property that may interfere with color contrast.", $"style='{name}' element='{StartTagXml(ele)}'"); } } // Special handling for "font". Look for any component with a prohibited suffix else if (name.Equals("font", StringComparison.Ordinal)) { foreach (string part in value.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { if (HasProhibitedUnitSuffix(part)) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, $"Item content has style property that may interfere with zoom.", $"style='{name}' element='{StartTagXml(ele)}'"); } } } // Check for prohibited style properties else if (s_prohibitedStyleProperties.TryGetValue(name, out interferesWith)) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, $"Item content has style property that may interfere with {interferesWith}.", $"style='{name}' element='{StartTagXml(ele)}'"); valid = false; } // Check whether size properties use prohibited units else if (s_styleSizeProperties.Contains(name)) { if (HasProhibitedUnitSuffix(value)) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Degraded, $"Item content has style property that may interfere with zoom.", $"style='{name}' element='{StartTagXml(ele)}'"); } } } } }while (attribute.MoveToNextAttribute()); } } return(valid); }
private static void Extract(ItemIdentifier ii, IXPathNavigable metadata, string standard, List <ItemStandard> result) { XPathNavigator root = metadata.CreateNavigator(); ItemStandard std = new ItemStandard(); // A new standard has empty string for all values HashSet <string> stdEncountered = new HashSet <string>(); HashSet <string> pubEncountered = new HashSet <string>(); // Look at all values for the specified Primary or Secondary standard. // Merge values if different publications. Add values if same publication. foreach (XPathNavigator node in root.Select($".//sa:{standard}", s_nsMetadata)) { // Check whether we have processed this standard yet. Skip if so. if (!stdEncountered.Add(node.Value)) { continue; } var parts = node.Value.Split(c_standardDelimiters); if (parts.Length < 2) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, $"{standard} metadata does not match expected format.", $"standard='{node.Value}'"); continue; } // If this publication has been encountered and the standard is not empty // add existing value to the list if (pubEncountered.Contains(parts[0])) { if (!std.IsEmpty) { result.Add(std); std = new ItemStandard(); } pubEncountered.Clear(); } // Set the common field if (string.IsNullOrEmpty(std.Standard)) { std.Standard = node.Value; } else { std.Standard = string.Concat(std.Standard, ";", node.Value); } // Parse out the standard according to which publication switch (parts[0]) { case "SBAC-MA-v4": case "SBAC-MA-v5": std.Subject = cSubjectMath; SetCheckMatch(ii, "Claim", std.Standard, ref std.Claim, parts, 1); SetCheckMatch(ii, "ContentDomain", std.Standard, ref std.ContentDomain, parts, 2); SetTargetCheckMatch(ii, std.Standard, std, parts, 3); SetCheckMatch(ii, "Emphasis", std.Standard, ref std.Emphasis, parts, 4); SetCheckMatch(ii, "CCSS", std.Standard, ref std.CCSS, parts, 5); break; case "SBAC-MA-v6": std.Subject = cSubjectMath; SetCheckMatch(ii, "Claim", std.Standard, ref std.Claim, parts, 1); SetCheckMatch(ii, "ContentCategory", std.Standard, ref std.ContentCategory, parts, 2); SetCheckMatch(ii, "TargetSet", std.Standard, ref std.TargetSet, parts, 3); SetTargetCheckMatch(ii, std.Standard, std, parts, 4); break; case "SBAC-ELA-v1": std.Subject = cSubjectEla; SetCheckMatch(ii, "Claim", std.Standard, ref std.Claim, parts, 1); SetTargetCheckMatch(ii, std.Standard, std, parts, 2); SetCheckMatch(ii, "CCSS", std.Standard, ref std.CCSS, parts, 3); break; } pubEncountered.Add(parts[0]); } if (!std.IsEmpty) { result.Add(std); } }
public static ReportingStandard ValidateAndSummarize(ItemIdentifier ii, IReadOnlyList <ItemStandard> standards, string expectedSubject, string expectedGrade) { // Validate each of the standards in the list foreach (var standard in standards) { // Validate claim if (!sValidClaims.Contains(standard.Claim)) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Degraded, "Unexpected claim value (should be 1, 2, 3, or 4 with possible suffix).", $"Claim='{standard.Claim}'"); } // Validate subject if (!standard.Subject.Equals(expectedSubject, StringComparison.OrdinalIgnoreCase)) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "Metadata standard publication indicates subject different from item.", $"ItemAttributeSubject='{expectedSubject}' MetadataSubject='{standard.Subject}'"); } // Validate grade (derived from target suffix) if (!standard.Grade.Equals(expectedGrade, System.StringComparison.Ordinal) && Program.gValidationOptions.IsEnabled("tgs")) { if (string.IsNullOrEmpty(standard.Grade)) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "Grade level target suffix not included in standard reference.", $"ItemAttributeGrade='{expectedGrade}' StandardString='{standard.Standard}'"); } else { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "Target suffix indicates a different grade from item attribute.", $"ItemAttributeGrade='{expectedGrade}' TargetSuffixGrade='{standard.Grade}' StandardString='{standard.Standard}'"); } } } // === Extract the Primary CCSS === // Special case for Math claims 2,3,4. In those cases the primary CCSS is // supplied on a secondary standard string with claim 1. string primaryCCSS = string.Empty; int primaryCcssIndex = -1; if (standards[0].Subject.Equals(cSubjectMath, StringComparison.OrdinalIgnoreCase) && standards[0].Claim.Length > 0 && standards[0].Claim[0] >= '2' && standards[0].Claim[0] <= '4') { // If empty CCSS (which should be the case) find the CCSS on a claim 1 standard if (string.IsNullOrEmpty(standards[0].CCSS) || standards[0].CCSS.Equals(cValueNA, StringComparison.OrdinalIgnoreCase)) { for (int i = 1; i < standards.Count; ++i) { if (standards[i].Claim.StartsWith("1") && !string.IsNullOrEmpty(standards[i].CCSS) && !standards[i].CCSS.Equals(cValueNA, StringComparison.OrdinalIgnoreCase)) { primaryCCSS = standards[i].CCSS; primaryCcssIndex = i; break; } } if (primaryCcssIndex < 0) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "Math Claim 2, 3, 4 primary alignment should be paired with a claim 1 secondary alignment.", $"claim='{standards[0].Claim}'"); } else if (string.IsNullOrEmpty(primaryCCSS) || primaryCCSS.Equals(cValueNA, StringComparison.OrdinalIgnoreCase)) { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "Math Claim 2, 3, 4 primary alignment is missing CCSS standard.", $"claim='{standards[0].Claim}'"); } } else { ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "Expected blank CCSS for Math Claim 2, 3, or 4", $"claim='{standards[0].Claim}' CCSS='{standards[0].CCSS}'"); } } // Only accept value if it's non-empty and not NA else if (!string.IsNullOrEmpty(standards[0].CCSS) && !standards[0].CCSS.Equals(cValueNA, StringComparison.OrdinalIgnoreCase)) { primaryCCSS = standards[0].CCSS; primaryCcssIndex = 0; } // Otherwise empty else { // primaryCCSS is already set to string.Empty; ReportingUtility.ReportError(ii, ErrorCategory.Metadata, ErrorSeverity.Tolerable, "CCSS standard is missing from item.", $"claim='{standards[0].Claim}' standard='{standards[0].Standard}'"); } // === Extract the Secondary CCSS === var secondaryCcss = new StringBuilder(); for (int i = 0; i < standards.Count; ++i) { if (i == primaryCcssIndex) { continue; } if (!string.IsNullOrEmpty(standards[i].CCSS) && !standards[i].CCSS.Equals(cValueNA, StringComparison.OrdinalIgnoreCase)) { if (secondaryCcss.Length > 0) { secondaryCcss.Append(';'); } secondaryCcss.Append(standards[i].CCSS); } } // Return the summary value return(new ReportingStandard( primaryCCSS, CombineClaimsContentTargets(standards, 0, 1), secondaryCcss.ToString(), CombineClaimsContentTargets(standards, 1))); }
private void TabulateWordList(ItemIdentifier ii) { // Get the item context ItemContext it; if (!ItemContext.TryCreate(mPackage, ii, out it)) { ReportingUtility.ReportError(ii, ErrorCategory.Item, ErrorSeverity.Severe, "WordList not found in package."); return; } // Read the item XML XmlDocument xml = new XmlDocument(sXmlNt); if (!TryLoadXml(it.FfItem, it.FfItem.Name + ".xml", xml)) { ReportingUtility.ReportError(it, ErrorCategory.Item, ErrorSeverity.Severe, "Invalid wordlist file.", LoadXmlErrorDetail); return; } // Count this wordlist ++mWordlistCount; // See if the wordlist has been referenced int refCount = mWordlistRefCounts.Count(it.ToString()); if (refCount == 0 && !(mPackage is SingleItemPackage)) { ReportingUtility.ReportError(it, ErrorCategory.Wordlist, ErrorSeverity.Benign, "Wordlist is not referenced by any item."); } // Zero the counts int termcount = 0; int maxgloss = 0; int mingloss = int.MaxValue; int totalgloss = 0; // Enumerate all terms and count glossary entries foreach (XmlNode kwNode in xml.SelectNodes("itemrelease/item/keywordList/keyword")) { ++mGlossaryTermCount; ++termcount; // Count this instance of the term string term = kwNode.XpEval("@text"); mTermCounts.Increment(term); int glosscount = 0; foreach (XmlNode htmlNode in kwNode.SelectNodes("html")) { ++glosscount; } if (maxgloss < glosscount) { maxgloss = glosscount; } if (mingloss > glosscount) { mingloss = glosscount; } totalgloss += glosscount; } if (mingloss == int.MaxValue) { mingloss = 0; } //Folder,WIT_ID,RefCount,TermCount,MaxGloss,MinGloss,AvgGloss mWordlistReport.WriteLine(string.Join(",", CsvEncode(it.FolderDescription), it.BankKey.ToString(), it.ItemId.ToString(), refCount.ToString(), termcount.ToString(), maxgloss.ToString(), mingloss.ToString(), (termcount > 0) ? (((double)totalgloss) / ((double)termcount)).ToString("f2") : "0")); }
// This is kind of ugly with so many parameters but it's the cleanest way to handle this task that's repeated multiple times void ProcessGlossaryAttachment(string filename, ItemContext itemIt, ItemIdentifier ii, int termIndex, string listType, bool termReferenced, List <string> wordlistTerms, Dictionary <string, long> attachmentFiles, Dictionary <string, TermAttachmentReference> attachmentToTerm, ref string type, ref long size) { long fileSize = 0; if (!attachmentFiles.TryGetValue(filename, out fileSize)) { // Look for case-insensitive match (file will not be found on Linux systems) // (This is a linear search but it occurs rarely so not a significant issue) string caseMismatchFilename = null; foreach (var pair in attachmentFiles) { if (string.Equals(filename, pair.Key, StringComparison.OrdinalIgnoreCase)) { caseMismatchFilename = pair.Key; break; } } if (termReferenced) { if (caseMismatchFilename == null) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Severe, "Wordlist attachment not found.", "filename='{0}' term='{1}' termIndex='{2}'", filename, wordlistTerms[termIndex], termIndex); } else { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Degraded, "Wordlist audio filename differs in capitalization (will fail on certain platforms).", "referenceFilename='{0}' actualFilename='{1}' termIndex='{2}'", filename, caseMismatchFilename, termIndex); } } else if (Program.gValidationOptions.IsEnabled("mwa")) // Term not referenced { if (caseMismatchFilename == null) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Benign, "Wordlist attachment not found. Benign because corresponding term is not referenced.", "filename='{0}' term='{1}' termIndex='{2}'", filename, wordlistTerms[termIndex], termIndex); } else { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Benign, "Wordlist attachment filename differs in capitalization. Benign because corresponding term is not referenced.", "referenceFilename='{0}' actualFilename='{1}' termIndex='{2}'", filename, caseMismatchFilename, termIndex); } } } // See if this attachment has previously been referenced TermAttachmentReference previousTerm = null; if (attachmentToTerm.TryGetValue(filename, out previousTerm)) { // Error if different terms (case insensitive) if (!string.Equals(wordlistTerms[termIndex], wordlistTerms[previousTerm.TermIndex], StringComparison.InvariantCultureIgnoreCase)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Severe, "Two different wordlist terms reference the same attachment.", "filename='{0}' termA='{1}' termB='{2}' termIndexA='{3}' termIndexB='{4}", filename, wordlistTerms[previousTerm.TermIndex], wordlistTerms[termIndex], previousTerm.TermIndex, termIndex); } // Error if different listTypes (language or image) if (!string.Equals(listType, previousTerm.ListType, StringComparison.Ordinal)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Severe, "Same wordlist attachment used for different languages or types.", "filename='{0}' term='{1}' typeA='{2}' typeB='{3}' termIndexA='{4}' termIndexB='{5}", filename, wordlistTerms[termIndex], previousTerm.ListType, listType, previousTerm.TermIndex, termIndex); } } else { attachmentToTerm.Add(filename, new TermAttachmentReference(termIndex, listType, filename)); } size += fileSize; string extension = Path.GetExtension(filename); if (extension.Length > 1) { extension = extension.Substring(1); // Remove dot from extension } if (string.IsNullOrEmpty(type)) { type = extension.ToLower(); } else { type = string.Concat(type, ";", extension.ToLower()); } }
// Validate the wordlist vocabulary for a particular item. // Returns the aggregate translation Bitflags private GlossaryTypes ValidateWordlistVocabulary(string bankKey, string wordlistId, ItemContext itemIt, List <int> termIndices, List <string> terms) { // Make sure the wordlist exists ItemIdentifier ii = new ItemIdentifier(cItemTypeWordlist, bankKey, wordlistId); FileFolder ff; if (!mPackage.TryGetItem(ii, out ff)) { if (!(mPackage is SingleItemPackage)) { ReportingUtility.ReportError(itemIt, ErrorCategory.Item, ErrorSeverity.Degraded, "Item references non-existent wordlist (WIT)", "wordlistId='{0}'", wordlistId); } return(0); } // Read the wordlist XML var xml = new XmlDocument(sXmlNt); if (!TryLoadXml(ff, ii.FullId + ".xml", xml)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Severe, "Invalid wordlist file.", LoadXmlErrorDetail); return(0); } // Make sure this is a wordlist if (!string.Equals(xml.XpEvalE("itemrelease/item/@type"), cItemTypeWordlist)) { ReportingUtility.ReportError(itemIt, ErrorCategory.Item, ErrorSeverity.Severe, "WordList reference is to a non-wordList item.", $"referencedId='{ii.ItemId}'"); return(0); } // Sanity check if (!string.Equals(xml.XpEvalE("itemrelease/item/@id"), ii.ItemId.ToString())) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Severe, "Wordlist file id mismatch.", $"wordListId='{xml.XpEval("itemrelease/item/@id")}' expected='{ii.ItemId}'"); return(0); } // Add this to the wordlist queue (if not there already) and manage progress count if (mWordlistQueue.Add(ii)) { if (mItemQueue.Contains(ii)) { ++mTransferCount; } } ; // Create a dictionary of attachment files Dictionary <string, long> attachmentFiles = new Dictionary <string, long>(); foreach (FileFile fi in ff.Files) { // If Audio or image file var extension = fi.Extension.ToLowerInvariant(); if (!string.Equals(extension, ".xml", StringComparison.Ordinal)) { attachmentFiles.Add(fi.Name, fi.Length); } } // Create a hashset of all wordlist terms that are referenced by the item HashSet <int> referencedIndices = new HashSet <int>(termIndices); // Load up the list of wordlist terms List <string> wordlistTerms = new List <string>(); foreach (XmlNode kwNode in xml.SelectNodes("itemrelease/item/keywordList/keyword")) { // Get the term and its index string term = kwNode.XpEval("@text"); int index = int.Parse(kwNode.XpEval("@index")); // Make sure the index is unique and add to the term list while (wordlistTerms.Count < index + 1) { wordlistTerms.Add(string.Empty); } if (!string.IsNullOrEmpty(wordlistTerms[index])) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Severe, "Wordlist has multiple terms with the same index.", "index='{0}'", index); } else { wordlistTerms[index] = term; } } // Keep track of term information for error checks Dictionary <string, TermAttachmentReference> attachmentToReference = new Dictionary <string, TermAttachmentReference>(); // Enumerate all the terms in the wordlist (second pass) int ordinal = 0; GlossaryTypes aggregateGlossariesFound = 0; foreach (XmlNode kwNode in xml.SelectNodes("itemrelease/item/keywordList/keyword")) { ++ordinal; // Get the term and its index string term = kwNode.XpEval("@text"); int index = int.Parse(kwNode.XpEval("@index")); // See if this term is referenced by the item. bool termReferenced = referencedIndices.Contains(index); if (!termReferenced && Program.gValidationOptions.IsEnabled("uwt")) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Benign, "Wordlist term is not referenced by item.", "term='{0}' termIndex='{1}'", term, index); } // Find the attachment references and enumberate the translations GlossaryTypes glossariesFound = 0; foreach (XmlNode htmlNode in kwNode.SelectNodes("html")) { var listType = htmlNode.XpEval("@listType"); mTranslationCounts.Increment(listType); if (sKnownGlossariesIndex.TryGetValue(listType, out GlossaryTypes gt)) { glossariesFound |= gt; } // Get the embedded HTML string html = htmlNode.InnerText; string audioType = string.Empty; long audioSize = 0; string imageType = string.Empty; long imageSize = 0; // Look for an audio glossary entry Match match = sRxAudioAttachment.Match(html); if (match.Success) { // Use RegEx to find the audio glossary entry in the contents. string filename = match.Groups[1].Value; ProcessGlossaryAttachment(filename, itemIt, ii, index, listType, termReferenced, wordlistTerms, attachmentFiles, attachmentToReference, ref audioType, ref audioSize); // Check for dual types if (string.Equals(Path.GetExtension(filename), ".ogg", StringComparison.OrdinalIgnoreCase)) { filename = Path.GetFileNameWithoutExtension(filename) + ".m4a"; ProcessGlossaryAttachment(filename, itemIt, ii, index, listType, termReferenced, wordlistTerms, attachmentFiles, attachmentToReference, ref audioType, ref audioSize); } else if (string.Equals(Path.GetExtension(filename), ".m4a", StringComparison.OrdinalIgnoreCase)) { filename = Path.GetFileNameWithoutExtension(filename) + ".ogg"; ProcessGlossaryAttachment(filename, itemIt, ii, index, listType, termReferenced, wordlistTerms, attachmentFiles, attachmentToReference, ref audioType, ref audioSize); } // If filename matches the naming convention, ensure that values are correct Match match2 = sRxAttachmentNamingConvention.Match(filename); if (match2.Success) { // Sample attachment filename that follows the convention: // item_116605_v1_116605_01btagalog_glossary_ogg_m4a.m4a // Check both instances of the wordlist ID if (!wordlistId.Equals(match2.Groups[1].Value, StringComparison.Ordinal) && !wordlistId.Equals(match2.Groups[2].Value, StringComparison.Ordinal)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Degraded, "Wordlist attachment filename indicates wordlist ID mismatch.", "filename='{0}' filenameItemId='{1}' expectedItemId='{2}'", filename, match2.Groups[1].Value, wordlistId); } // Check that the wordlist term index matches /* While most filename indices match. It's quite common for them not to match and still be the correct audio * Disabling this check because it's mostly false alarms. * * int filenameIndex; * if (!int.TryParse(match2.Groups[3].Value, out filenameIndex)) filenameIndex = -1; * if (filenameIndex != index && filenameIndex != ordinal * && (filenameIndex >= wordlistTerms.Count || !string.Equals(wordlistTerms[filenameIndex], term, StringComparison.OrdinalIgnoreCase))) * { * ReportingUtility.ReportWitError(ItemIt, it, ErrorSeverity.Degraded, "Wordlist attachment filename indicates term index mismatch.", "filename='{0}' filenameIndex='{1}' expectedIndex='{2}'", filename, filenameIndex, index); * } */ // Translate from language in the naming convention to listType value string filenameListType = match2.Groups[4].Value.ToLower(); switch (filenameListType) { // Special cases case "spanish": filenameListType = "esnGlossary"; break; case "tagalog": case "atagalog": case "btagalog": case "ilocano": case "atagal": filenameListType = "tagalGlossary"; break; case "apunjabi": case "bpunjabi": case "punjabiwest": case "punjabieast": filenameListType = "punjabiGlossary"; break; // Conventional case default: filenameListType = string.Concat(filenameListType.ToLower(), "Glossary"); break; } if (!filenameListType.Equals(listType)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Degraded, "Wordlist audio filename indicates attachment language mismatch.", "filename='{0}' filenameListType='{1}' expectedListType='{2}'", filename, filenameListType, listType); } } } // Look for an image glossary entry match = sRxImageAttachment.Match(html); if (match.Success) { // Use RegEx to find the illustration glossary entry in the contents. string filename = match.Groups[1].Value; ProcessGlossaryAttachment(filename, itemIt, ii, index, listType, termReferenced, wordlistTerms, attachmentFiles, attachmentToReference, ref imageType, ref imageSize); } else if (listType.Equals("illustration", StringComparison.Ordinal)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Degraded, "Illustration glossary entry does not include image.", "term='{0}' index='{1}'", term, index); } // Report error if translated glossary lacks audio if ((gt & sAllTranslatedGlossaries) != 0 && string.IsNullOrEmpty(audioType)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Degraded, "Translated glossary entry lacks audio.", "term='{0}' index='{1}'", term, index); } string folderDescription = string.Concat(mPackage.Name, "/", ii.FolderName); // Folder,WIT_ID,ItemId,Index,Term,Language,Length,Audio,AudioSize,Image,ImageSize if (Program.gValidationOptions.IsEnabled("gtr")) { mGlossaryReport.WriteLine(string.Join(",", CsvEncode(folderDescription), ii.BankKey.ToString(), ii.ItemId.ToString(), itemIt.ItemId.ToString(), index.ToString(), CsvEncodeExcel(term), CsvEncode(listType), html.Length.ToString(), audioType, audioSize.ToString(), imageType, imageSize.ToString(), CsvEncode(html))); } else { mGlossaryReport.WriteLine(string.Join(",", CsvEncode(folderDescription), ii.BankKey.ToString(), ii.ItemId.ToString(), itemIt.ItemId.ToString(), index.ToString(), CsvEncodeExcel(term), CsvEncode(listType), html.Length.ToString(), audioType, audioSize.ToString(), imageType, imageSize.ToString())); } } // Report any expected translations that weren't found if (termReferenced && (glossariesFound & sExpectedTranslatedGlossaries) != 0 && // at least one translated glossary (glossariesFound & sExpectedTranslatedGlossaries) != sExpectedTranslatedGlossaries) // not all translated glossaries { // Make a list of translations that weren't found string missedTranslations = (sExpectedTranslatedGlossaries & ~glossariesFound).ToString(); ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Tolerable, "Wordlist term does not include all expected translations.", "term='{0}' missing='{1}'", term, missedTranslations); } aggregateGlossariesFound |= glossariesFound; } Porter.Stemmer stemmer = new Porter.Stemmer(); // Make sure terms match references for (int i = 0; i < termIndices.Count; ++i) { int index = termIndices[i]; if (index >= wordlistTerms.Count || string.IsNullOrEmpty(wordlistTerms[index])) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Benign, "Item references non-existent wordlist term.", "text='{0}' termIndex='{1}'", terms[i], index); } else { if (!stemmer.TermsMatch(terms[i], wordlistTerms[index])) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Degraded, "Item text does not match wordlist term.", "text='{0}' term='{1}' termIndex='{2}'", terms[i], wordlistTerms[index], index); } } } // Report unreferenced attachments if (Program.gValidationOptions.IsEnabled("umf")) { foreach (var pair in attachmentFiles) { if (!attachmentToReference.ContainsKey(pair.Key)) { ReportingUtility.ReportWitError(itemIt, ii, ErrorSeverity.Benign, "Unreferenced wordlist attachment file.", "filename='{0}'", pair.Key); } } } return(aggregateGlossariesFound); }