/// ------------------------------------------------------------------------------------ /// <summary> /// Gets a collection of Mark chunk words. An understanding /// of how SA handles annotations, glosses and references is important to understanding /// what's going on in this method. /// </summary> /// ------------------------------------------------------------------------------------ private SortedDictionary <uint, AudioDocWords> GetMarkInfo() { var words = new SortedDictionary <uint, AudioDocWords>(); ResetSegmentEnumerators(); uint offset; uint length; bool isBkMrk; string gloss; string pos; string reference; while (ReadMarkSegment(out offset, out length, out gloss, out pos, out reference, out isBkMrk)) { AudioDocWords adw = new AudioDocWords(); adw.m_words[AnnotationType.Gloss] = gloss; adw.m_words[AnnotationType.Reference] = reference; adw.AudioLength = length; words[offset] = adw; } return(words.Count == 0 ? null : words); }
/// ------------------------------------------------------------------------------------ /// <summary> /// When there are no mark segments added to the audio document in SA to indicate word /// boundaries, the assumption is that all segments belong to a single word. Therefore /// all segments found for the specified annotation type will be combined into a single /// word in the specified AudioDocWords object. /// </summary> /// ------------------------------------------------------------------------------------ private void BuildSingleAnnotationWord(AnnotationType atype, AudioDocWords adw) { if (adw == null) { return; } uint offset; uint length; string segment; var bldr = new StringBuilder(); // Read all the segments for the annotation type. while (ReadSegment((int)atype, out offset, out length, out segment)) { bldr.Append(segment); } // Make sure to save the last word constructed. if (bldr.Length > 0) { adw.m_words[atype] = bldr.ToString(); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Builds the word from segments in the specified annotation type. An understanding /// of how SA handles annotations, glosses and references is important to understanding /// what's going on in this method. /// </summary> /// ------------------------------------------------------------------------------------ private void BuildAnnotationWords(AnnotationType atype, SortedDictionary <uint, AudioDocWords> words) { uint offset; uint length; uint firstSegOffset = 0; uint lengthSum = 0; string segment; var bldr = new StringBuilder(); AudioDocWords prevAdw = null; // Read all the segments for the annotation type. while (ReadSegment((int)atype, out offset, out length, out segment)) { AudioDocWords currWord; // We'll only use lengthSum and firstSegOffset in the case when the first // word's offset isn't the same as the offset of the first phonetic segment. lengthSum += length; if (bldr.Length == 0) { firstSegOffset = offset; } // When the offset for the current segment matches one already in the // collection of words we know we've come to the beginning of the next // word (or the first word if the string builder is empty). if (words.TryGetValue(offset, out currWord)) { // If we have a word that's been constructed, save it and reset the // builder to accept the next word coming down the pike. if (bldr.Length > 0) { // This should only happen when the first word's offset is not the same as // the first phonetic segment's offset. When that happens, we need to add // a word at the beginning of the collection to accomodate the fact that // the audio file contains one or more phonetic segments at the beginning // of the transcription that do not belong to a word. if (prevAdw == null) { prevAdw = new AudioDocWords(); prevAdw.AudioLength = lengthSum; words[firstSegOffset] = prevAdw; } prevAdw.m_words[atype] = bldr.ToString(); bldr.Length = 0; } // Save a reference to the AudioDocWords object so we can // store in it the word we're just beginning to construct. prevAdw = currWord; } bldr.Append(segment); } // Make sure to save the last word constructed. if (bldr.Length > 0 && prevAdw != null) { prevAdw.m_words[atype] = bldr.ToString(); } }