示例#1
0
        public static string  GetContext(Entry termEntry, string[] lexemes, out ArrayList hgltPairs)
        {
            string context        = cNoContextSign;
            int    contextsNumber = Math.Min(MinimalNumberOfContexts, termEntry.Count);

            int[] shifts = new int[termEntry.Count];
            hgltPairs = new ArrayList();
            Collector.Init(termEntry.Offsets, shifts);

            try
            {
                //  it is possible situation when temporary file is removed
                //  during this processing.
                IResource res = Core.ResourceStore.TryLoadResource(termEntry.DocIndex);
                if (res != null)
                {
                    Core.PluginLoader.InvokeResourceTextProviders(res, Collector);

                    if (Collector.Body.Length > 0)
                    {
                        context = cFragmentsDelimiter;
                        int leftBorder = Int32.MaxValue, rightBorder = Int32.MinValue;
                        int prevContextLength = 0;
                        for (int i = 0; i < contextsNumber; i++)
                        {
                            InstanceOffset instance         = termEntry.Instance(i);
                            int            origOffset       = instance.OffsetNormal;
                            int            offset           = Collector.ConvertOffset(origOffset, instance.SectionId);
                            ArrayList      delimiterOffsets = new ArrayList();

                            //  workaround of possible invalid text body reconstruction
                            //  by plugin, when search terms appear out of the text margins...
                            if (offset < Collector.Body.Length)
                            {
                                if (offset < leftBorder || offset > rightBorder)
                                {
                                    leftBorder  = Math.Max(0, offset - cContextSideLength);
                                    rightBorder = Math.Min(Collector.Body.Length - 1, offset + cContextSideLength);
                                    TuneBorders(offset, Collector.Body, ref leftBorder, ref rightBorder);

                                    string fragment = Collector.Body.Substring(leftBorder, rightBorder - leftBorder + 1);
                                    InsertSectionDelimiters(ref fragment, leftBorder, rightBorder, context.Length, delimiterOffsets);

                                    prevContextLength = context.Length;
                                    context          += fragment + cFragmentsDelimiter;
                                }
                                else
                                if (contextsNumber < termEntry.Count)
                                {
                                    contextsNumber++;
                                }

                                int    startOffset = offset - leftBorder + prevContextLength;
                                string lexeme      = lexemes[instance.BaseID];
                                lexeme = ReconstructWordform(instance.Offset, lexeme, OMEnv.DictionaryServer);
                                TuneOffsetByBorders(ref startOffset, delimiterOffsets);

                                hgltPairs.Add(new OffsetData(startOffset, lexeme.Length));
                            }
                        }
                        context = context.Replace("\r\n", "  ");
                        context = context.Replace("\n", " ");
                        context = context.Replace("\r", " ");
                        context = context.Replace("\t", " ");
                        Trace.WriteLine("ContextExtractor -- context for [" + termEntry.DocIndex + "/" + res.Type + "] is [" + context + "]");
                        foreach (OffsetData pair in hgltPairs)
                        {
                            if (pair.Start + pair.Length >= context.Length)
                            {
                                Trace.WriteLine("                  highlight prefix of token [" + context.Substring(pair.Start) + "]");
                            }
                            else
                            {
                                Trace.WriteLine("                  highlight token [" + context.Substring(pair.Start, pair.Length) + "]");
                            }
                        }
                    }
                }
            }
            catch
            {
                //  Here we catch exceptions described in the OM-10659, reason
                //  for which is still is not found. Just hide the bug.
            }

            return(context);
        }