Пример #1
0
 public static void MakeWordList() {
   using (var imp = new Impersonator("pavel", "LANGMaster", "zvahov88_"))
     DictLib.RunStemming<List<string>>(
       Dict.allLangs,
       lng => File.ReadAllLines(string.Format(Machines.rootPath + @"RwDicts\Sources\LingeaOld\design\wordList_{0}.txt", lng)).Concat(
         File.ReadAllLines(string.Format(Machines.rootPath + @"RwDicts\Sources\Wiktionary.back\{0}.txt", lng)).Select(l => l.Split(' ')[0]).Take(50000)).
         Where(w => !string.IsNullOrEmpty(w)).
         Select(w => w.ToLower()).
         Distinct().ToArray(),
       lng => new List<string>(),
       (lng, word, row, res) => res.AddRange(row.Select(r => doNormalize(r)).Where(r => !string.IsNullOrEmpty(r))),
       (lng, res) => File.WriteAllLines(string.Format(Machines.rootPath + @"RwDicts\Sources\Ultralingua.back\design\WordsStems_{0}.txt", lng), res.Distinct().OrderBy(w => w)),
       imp
     );
 }
Пример #2
0
 public static void MakeWordList_Other() {
   using (var imp = new Impersonator("pavel", "LANGMaster", "zvahov88_")) {
     DictLib.RunStemming<List<string>>(
       otherLangsStr.Values,
       lng => {
         Thread.CurrentThread.CurrentCulture = Thread.CurrentThread.CurrentUICulture = CultureInfo.CreateSpecificCulture("ar-sa");
         var list = File.ReadAllLines(string.Format(@"d:\LMCom\rew\Web4\RwDicts\Sources\Wiktionary.back\src\{0}.txt", otherLangsStr.First(kv => kv.Value == lng).Key)).ToArray();
         return list.Select(l => l.Split(' ')[0]).Take(200000).
           Where(w => !string.IsNullOrEmpty(w)).
           Select(w => w.ToLower()).
           Distinct().ToArray();
       },
       lng => new List<string>(),
       (lng, word, row, res) => res.AddRange(row.Select(r => doNormalize(r)).Where(r => !string.IsNullOrEmpty(r))),
       (lng, res) => File.WriteAllLines(string.Format(Machines.rootPath + @"RwDicts\Sources\Ultralingua.back\design\WordsStems_{0}.txt", lng), res.Distinct().OrderBy(w => w)),
       imp
     );
   }
 }
Пример #3
0
 //LingeaOld\temp1 + LingeaOld\design\entriesInfo.xml => LingeaOld
 public static void OldToNew3() {
   using (var imp = new Impersonator("pavel", "LANGMaster", "zvahov88_")) {
     var files = repairLingea(Machines.rootPath + @"RwDicts\Sources\LingeaOld\temp1\", imp);
     foreach (var fn in Directory.EnumerateFiles(Machines.rootPath + @"RwDicts\Sources\LingeaOld", "*.xml")) File.Delete(fn);
     foreach (var f in files) XmlUtils.ObjectToFile(string.Format(Machines.rootPath + @"RwDicts\Sources\LingeaOld\{1}_{0}.xml", f.crsLang, f.natLang), f);
   }
 }
Пример #4
0
 public DictEntryObj toNew(Impersonator imp) {
   using (WindowsIdentity.Impersonate(imp.token)) {
     return new DictEntryObj {
       entryId = entryId,
       entry = entry.Elements().First(),
       soundMaster = soundMaster,
       type = DictEntryType.lingeaOld,
       headWords = headWords,
       courseWords = entry.AttributeValue("courseUsed", "").Split('|').Select(w => w.ToLower().Replace("'", null)).Except(headWords.Select(h => h.ToLower())).Where(w => !string.IsNullOrEmpty(w)).ToArray(),
       //headWords.Select(hw => new CourseDictionary2.DictStem { type = CourseDictionary2.DictStemType.wordId, word = hw }).
       //  Concat(headWords.ToArray().SelectMany(w => CourseDictionary.RunStemming(okCrs, w)).Distinct().Select(w => new CourseDictionary2.DictStem { type = CourseDictionary2.DictStemType.stem, word = w })).
       //  Concat(entry.AttributeValue("courseUsed", "").Split('|').Where(w => !string.IsNullOrEmpty(w)).Select(w => new CourseDictionary2.DictStem { type = CourseDictionary2.DictStemType.courseUses, word = w })).
       //  ToArray()
     };
   };
 }
Пример #5
0
  public static IEnumerable<DictObj> repairLingea(string srcDir, Impersonator imp) {
    var allnew = XmlUtils.FileToObject<LingeaDictionary.DictEntry[]>(@"d:\LMCom\rew\Web4\RwDicts\Sources\LingeaOld\design\entriesInfo.xml");
    var allOld = LingeaDictionary.getOldLingeaEntries(srcDir).SelectMany(kv => kv.Value).SelectMany(kv => kv.Value.entryIdToEntry).ToDictionary(kv => kv.Key, kv => kv.Value);

    //k jednomu zvuku jeden headword
    Dictionary<Langs, Dictionary<string, string>> soundMasterToHeadword = allnew.Where(de => de.soundMaster != null).GroupBy(de => de.okCrs).ToDictionary(
      g => g.Key,
      g => g.GroupBy(de => de.soundMaster).ToDictionary(
        sg => sg.Key,
        sg => sg.Select(d => d.headWords[0].ToLower()).Distinct().Single()));

    //pripojeni .info souboru s headword k .mp3 souboru
    foreach (var lKv in soundMasterToHeadword) foreach (var hKv in lKv.Value) {
        string right = @"d:\LMCom\rew\Web4\RwDicts\LingeaSound\" + hKv.Key.Replace('/', '\\') + ".mp3";
        if (!File.Exists(right)) throw new Exception();
        File.WriteAllText(right.Replace(".mp3", ".info"), lKv.Key.ToString() + "=" + hKv.Value);
      }

    //vytvor nova hesla - merge entriesInfo.xml s LingeaOld.back (LingeaOld\temp1). Pridej a napln sound tag v entry
    List<DictObj> res = new List<DictObj>();
    Parallel.ForEach(allnew.Where(de => de.okCrs != Langs.no).GroupBy(dn => dn.okCrs),
      //new ParallelOptions() { MaxDegreeOfParallelism = 1 },
      crs => {
        Parallel.ForEach(crs.GroupBy(dc => dc.crsLang == dc.okCrs ? dc.natLang : dc.crsLang),
          //new ParallelOptions() { MaxDegreeOfParallelism = 1 },
          nat => {
            foreach (var newDe in nat) {
              //merge
              newDe.entry = allOld[newDe.entryId];
              //sound tag
              var snd = newDe.entry.Descendants("sound").FirstOrDefault();
              if (snd == null) {
                snd = new XElement("sound");
                var head = newDe.entry.DescendantsAttr("class", "head").First();
                var pron = head.Elements().FirstOrDefault(e => e.AttributeValue("class") == "pron");
                if (pron != null) pron.AddAfterSelf(snd);
                else {
                  var morf = head.Elements().FirstOrDefault(e => e.AttributeValue("class") == "morf");
                  if (morf != null) morf.AddAfterSelf(snd);
                  else head.Add(snd);
                }
              }
              snd.Value = (string.IsNullOrEmpty(newDe.soundMaster) ? "@" + newDe.headWords[0].ToLower() : @"RwDicts/LingeaSound/" + newDe.soundMaster + ".mp3").ToLower();
            }
            lock (res) res.Add(new DictObj { crsLang = crs.Key, natLang = nat.Key, entries = nat.Select(n => n.toNew(imp)).DistinctBy(n => n.entry.ToString()).ToArray() });
          });
      });

    return res;
  }