/********************************************************************** *** Processing Functions **********************************************************************/ private void ProcessFile(string filePath) { _filePath = filePath; UpdateDialogTitle(); Cursor.Current = Cursors.WaitCursor; // read the OpenOffice document into memory; if the user has the same file still open // in OpenOffice this operation fails with an exception; we then giv ehim the chance to // close the document and retry. bool retry = false; _doc = new OpenOfficeDoc(); do { try { stsAction.Text = "Reading file"; if (!_doc.Read(filePath)) { return; } break; } catch (Exception ex) { DialogResult res = MessageBox.Show("Error: Could not read file from disk. Original error: " + ex.Message, "File Open Error", MessageBoxButtons.RetryCancel); if (res == DialogResult.Retry) { retry = true; } else { stsAction.Text = "Processing aborted."; Cursor.Current = Cursors.Default; return; } } } while (retry); // extract the hyperlinks from the document stsAction.Text = "Extracting hyperlinks"; ClearAllLinks(); if (!_doc.ExtractLinks(_enLinks)) { Cursor.Current = Cursors.Default; return; } // translate these hyperlinks stsAction.Text = "Translating hyperlinks"; _lt.TranslateLinks2(_enLinks, _deLinks); // show the links and their translations _curIdx = 0; ShowCurrentLink(); stsAction.Text = "Done"; Cursor.Current = Cursors.Default; appendLinksToDocumentToolStripMenuItem.Enabled = true; }
/// <summary> /// Collect links and their translations from an OpenOffice Writer file. /// We assume that the files contains first the English text, then the translated /// German text. We build on the fact that the sequence of hypeslinks is unchanged /// in the translation so that we can associate each link with its translated version. /// </summary> /// <param name="filePath">File path to the input document.</param> private void CollectLinksFromDocument(string filePath) { OpenOfficeDoc doc = new OpenOfficeDoc(); if (!doc.Read(filePath)) { return; } List <Hyperlink> links = new List <Hyperlink> (); if (!doc.ExtractLinks(links)) { return; } if (!doc.ContainsLangSeparator()) { return; } // fold link list in half and treat the second half as translations // of the first half if (links.Count % 2 != 0) { _msgStream.WriteLine("{0}: Number of links in document is not even! Skipping this document.", filePath); return; } int splitIndex = links.Count / 2; int remaining = links.Count - splitIndex; List <Hyperlink> deLinks = links.GetRange(splitIndex, remaining); links.RemoveRange(splitIndex, remaining); // loop over both lists in sync and record potential translation candidates in // text lines of the format // // <english-text>; <english-link>; <german-text>; <german-link> // // We only collect links to Wikipedia.org! // for (int i = 0; i < splitIndex; ++i) { Hyperlink enLink = links[i]; Hyperlink deLink = deLinks[i]; Uri uri = new Uri(enLink.uri); if (uri.Authority == "en.wikipedia.org") { _outStream.WriteLine("{0};{1};{2};{3}", enLink.text, enLink.uri, deLink.text, deLink.uri); } } }