private static InfoLine CreateTcFromXml(IEnumerable <XmlNode> segNodes, string locale, string corpus, string sessionId, string internalId) { var list = segNodes.SelectMany(x => x.SelectNodes("w").Cast <XmlNode>().Select(y => y.InnerText)); string trans = StringProcess.CleanupSpace(string.Join(" ", list)); double startTime = 0; double endTime = 0; if (segNodes.Where(x => x["time"] != null).Count() > 1) { string startStr = string.Empty; string endStr = string.Empty; try { var startNode = segNodes.First(x => x["time"] != null); startStr = startNode["time"].Attributes["value"].Value; startTime = Common.TimeStrToSec(startStr); } catch { } try { var endNode = segNodes.Last(x => x["time"] != null); endStr = endNode["time"].Attributes["value"].Value; endTime = Common.TimeStrToSec(endStr); } catch { } } return(new InfoLine(corpus, "U", sessionId, internalId, startTime, endTime, trans, "<NA/>")); }
private void TaggingSyl() { foreach (TextGridItem tgi in ItemDict["SYL"]) { if (tgi.Text.Contains('/') || tgi.Text.Contains('\\')) { tgi.UpdateText("<overlap>"); } else { string tagText = StringProcess.NormXSil(tgi.Text); string cleanText = StringProcess.CleanupSpace(tagText); tgi.UpdateText(cleanText); } } }
private void CreateNewData(string cleanDataPath, string tagDataPath, string noEmptyPath, string wbrPath, string outputPath) { var noEmptyList = File.ReadLines(cleanDataPath).Select(x => x.Replace(" ", string.Empty)); File.WriteAllLines(noEmptyPath, noEmptyList); var tagList = File.ReadLines(tagDataPath).Select(x => StringProcess.GetTagPrefixIndices(x)); //RunWordBreak rwb = new RunWordBreak(Cfg); //rwb.WordBreak(noEmptyPath, wbrPath); var wbrList = File.ReadLines(wbrPath); var outputList = wbrList.Zip(tagList, (x, y) => StringProcess.InsertTagToWords(x, " <bi> ", y)).Select(x => StringProcess.CleanupSpace(x)); File.WriteAllLines(outputPath, outputList); }
private void RefreshTextGridWbr(string cleanDatapath, string tagDataPath, string outputPath, string wbrPath, string tag) { string tmpName = Guid.NewGuid().ToString(); string noEmptyPath = Path.Combine(Cfg.TmpFolder, tmpName + ".noEmpty"); var noEmptyList = File.ReadLines(cleanDatapath).Select(x => x.Replace(" ", string.Empty)); File.WriteAllLines(noEmptyPath, noEmptyList); RunWordBreak rwb = new RunWordBreak(Cfg); rwb.WordBreak(noEmptyPath, wbrPath); var tagList = File.ReadLines(tagDataPath).Select(x => StringProcess.GetTagPrefixIndices(x)); var wbrList = File.ReadLines(wbrPath); var outputList = wbrList.Zip(tagList, (x, y) => StringProcess.InsertTagToWords(x, " " + tag + " ", y)).Select(x => StringProcess.CleanupSpace(x)); File.WriteAllLines(outputPath, outputList); }
private void CleanupWbr(string wbrPath, string outputPath) { var list = File.ReadLines(wbrPath).Select(x => StringProcess.CleanupSpace(x)); File.WriteAllLines(outputPath, list); }