/// ------------------------------------------------------------------------------------ /// <summary> /// Gets a collection of Mark chunk words. An understanding /// of how SA handles annotations, glosses and references is important to understanding /// what's going on in this method. /// </summary> /// ------------------------------------------------------------------------------------ private SortedDictionary <uint, AudioDocWords> GetMarkInfo() { SortedDictionary <uint, AudioDocWords> words = new SortedDictionary <uint, AudioDocWords>(); ResetSegmentEnumerators(); uint offset; uint length; bool isBkMrk; string gloss; string pos; string reference; while (ReadMarkSegment(out offset, out length, out gloss, out pos, out reference, out isBkMrk)) { AudioDocWords adw = new AudioDocWords(); SortedDictionary <AnnotationType, string> wrds = ReflectionHelper.GetField(adw, "m_words") as SortedDictionary <AnnotationType, string>; if (wrds != null) { wrds[AnnotationType.Gloss] = gloss; wrds[AnnotationType.Reference] = reference; } adw.AudioLength = length; words[offset] = adw; } return(words.Count == 0 ? null : words); }
/// ------------------------------------------------------------------------------------ /// <summary> /// When there are no mark segments added to the audio document in SA to indicate word /// boundaries, the assumption is that all segments belong to a single word. Therefore /// all segments found for the specified annotation type will be combined into a single /// word in the specified AudioDocWords object. /// </summary> /// ------------------------------------------------------------------------------------ private void BuildSingleAnnotationWord(AnnotationType atype, AudioDocWords adw) { if (adw == null) { return; } uint offset; uint length; string segment; StringBuilder bldr = new StringBuilder(); // Read all the segments for the annotation type. while (ReadSegment((int)atype, out offset, out length, out segment)) { bldr.Append(segment); } // Make sure to save the last word constructed. if (bldr.Length > 0) { SortedDictionary <AnnotationType, string> wrds = ReflectionHelper.GetField(adw, "m_words") as SortedDictionary <AnnotationType, string>; if (wrds != null) { wrds[atype] = bldr.ToString(); } } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets a collection of the words in the audio document. /// </summary> /// ------------------------------------------------------------------------------------ public SortedDictionary <uint, AudioDocWords> GetWords() { SortedDictionary <uint, AudioDocWords> words = GetMarkInfo(); ResetSegmentEnumerators(); if (words != null) { BuildAnnotationWords(AnnotationType.Phonetic, words); BuildAnnotationWords(AnnotationType.Phonemic, words); BuildAnnotationWords(AnnotationType.Tone, words); BuildAnnotationWords(AnnotationType.Orthographic, words); return(words); } // At this point we know there were no mark segments added to the audio // document in SA to indicate word boundaries. Therefore combine all the // existing segments into single words for each annotation type. This // should fix JIRA issue SPM-404. words = new SortedDictionary <uint, AudioDocWords>(); words[0] = new AudioDocWords(); BuildSingleAnnotationWord(AnnotationType.Phonetic, words[0]); BuildSingleAnnotationWord(AnnotationType.Phonemic, words[0]); BuildSingleAnnotationWord(AnnotationType.Tone, words[0]); BuildSingleAnnotationWord(AnnotationType.Orthographic, words[0]); SortedDictionary <AnnotationType, string> wrds = ReflectionHelper.GetField(words[0], "m_words") as SortedDictionary <AnnotationType, string>; if (wrds == null) { return(null); } return(string.IsNullOrEmpty(wrds[AnnotationType.Phonetic]) && string.IsNullOrEmpty(wrds[AnnotationType.Phonemic]) && string.IsNullOrEmpty(wrds[AnnotationType.Tone]) && string.IsNullOrEmpty(wrds[AnnotationType.Orthographic]) ? null : words); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Builds the word from segments in the specified annotation type. An understanding /// of how SA handles annotations, glosses and references is important to understanding /// what's going on in this method. /// </summary> /// ------------------------------------------------------------------------------------ private void BuildAnnotationWords(AnnotationType atype, SortedDictionary <uint, AudioDocWords> words) { uint offset; uint length; uint firstSegOffset = 0; uint lengthSum = 0; string segment; StringBuilder bldr = new StringBuilder(); AudioDocWords prevAdw = null; // Read all the segments for the annotation type. while (ReadSegment((int)atype, out offset, out length, out segment)) { AudioDocWords currWord; // We'll only use lengthSum and firstSegOffset in the case when the first // word's offset isn't the same as the offset of the first phonetic segment. lengthSum += length; if (bldr.Length == 0) { firstSegOffset = offset; } // When the offset for the current segment matches one already in the // collection of words we know we've come to the beginning of the next // word (or the first word if the string builder is empty). if (words.TryGetValue(offset, out currWord)) { // If we have a word that's been constructed, save it and reset the // builder to accept the next word coming down the pike. if (bldr.Length > 0) { // This should only happen when the first word's offset is not the same as // the first phonetic segment's offset. When that happens, we need to add // a word at the beginning of the collection to accomodate the fact that // the audio file contains one or more phonetic segments at the beginning // of the transcription that do not belong to a word. if (prevAdw == null) { prevAdw = new AudioDocWords(); prevAdw.AudioLength = lengthSum; words[firstSegOffset] = prevAdw; } SortedDictionary <AnnotationType, string> wrds = ReflectionHelper.GetField(prevAdw, "m_words") as SortedDictionary <AnnotationType, string>; if (wrds != null) { wrds[atype] = bldr.ToString().Replace(kNullSegment, string.Empty); } bldr.Length = 0; } // Save a reference to the AudioDocWords object so we can // store in it the word we're just beginning to construct. prevAdw = currWord; } bldr.Append(segment); } // Make sure to save the last word constructed. if (bldr.Length > 0 && prevAdw != null) { SortedDictionary <AnnotationType, string> wrds = ReflectionHelper.GetField(prevAdw, "m_words") as SortedDictionary <AnnotationType, string>; if (wrds != null) { wrds[atype] = bldr.ToString().Replace(kNullSegment, string.Empty); } } }