C# (CSharp) pwiz.ProteomeDatabase.Fasta ProteinSearchInfo示例

编程语言: C# (CSharp)

命名空间/包名称: pwiz.ProteomeDatabase.Fasta

hotexamples.com的示例: 3

C# (CSharp) pwiz.ProteomeDatabase.Fasta ProteinSearchInfo - 已找到3个示例。这些是从开源项目中提取的最受好评的pwiz.ProteomeDatabase.Fasta.ProteinSearchInfo现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SetWebSearchTerm(1)

示例#1

显示文件

文件： WebEnabledFastaImporter.cs 项目： lgatto/proteowizard

 /// <summary>
 /// Quick, cheap check for internet access (uniprot access, specifically)
 /// </summary>
 /// <returns>false if internet isn't available for any reason</returns>
 public bool HasWebAccess()
 {
     if (!_hasWebAccess.HasValue)
     {
         // First time anyone has asked - try a simple search to see if we have access
         var prot = ParseProteinLine(KNOWNGOOD_UNIPROT_SEARCH_TARGET);
         var protname = new ProteinSearchInfo(new DbProteinName(prot, new ProteinMetadata(KNOWNGOOD_UNIPROT_SEARCH_TARGET, string.Empty, null, null, null, null,
             UNIPROTKB_TAG + KNOWNGOOD_UNIPROT_SEARCH_TARGET)), KNOWNGOOD_UNIPROT_SEARCH_TARGET_SEQLEN);
         _hasWebAccess = DoWebserviceLookup(new []{protname}, null, true).Any();
     }
     return _hasWebAccess.Value;
 }

示例#2

显示文件

文件： WebEnabledFastaImporter.cs 项目： lgatto/proteowizard

        /// <summary>
        /// Handles web access for deriving missing protein metadata
        /// </summary>
        /// <param name="proteins">items to search</param>
        /// <param name="searchType">Uniprot or Entrez</param>
        /// <param name="progressMonitor">For detecting operation cancellation</param>
        /// <returns>negative value if we need to try again later, else number of proteins looked up</returns>
        /// 
        private int DoWebserviceLookup(IList<ProteinSearchInfo> proteins, char searchType, IProgressMonitor progressMonitor)
        {
            int lookupCount = _webSearchProvider is FakeWebSearchProvider ? proteins.Count : 0; // Fake websearch provider used in tests just claims victory, returns 0 for WebRetryCount
            var searchterms = _webSearchProvider.ListSearchTerms(proteins);
                
            if (searchterms.Count == 0)
                return 0; // no work, but not error either
            var responses = new List<ProteinSearchInfo>();
            for (var retries = _webSearchProvider.WebRetryCount();retries-->0;)  // be patient with the web
            {
                if (searchterms.Count == 0)
                    break;
                if ((progressMonitor != null) && progressMonitor.IsCanceled)
                    break;
                var caught = false;
                try
                {
                    string urlString; // left at outer scope for exception debugging ease
                    if ((searchType == GENINFO_TAG) || (searchType == ENTREZ_TAG))
                    {
                        // first try to get enough summary information to redo this seach in uniprot

                        // throw in something we know will hit (Note: it's important that this particular value appear in the unit tests, so we can mimic web response)
                        string knowngood = (searchType == GENINFO_TAG) ? KNOWNGOOD_GENINFO_SEARCH_TARGET : KNOWNGOOD_ENTREZ_SEARCH_TARGET; //  Not L10N
                        bool addedKnowngood = false;
                        if (!searchterms.Any(searchterm => SimilarSearchTerms(searchterm,knowngood)))
                        {
                            searchterms.Insert(0, knowngood); // ensure at least one response if connection is good
                            addedKnowngood = true;
                        }

                        urlString = _webSearchProvider.ConstructEntrezURL(searchterms,true); // get in summary form

                        /*
                         * a search on XP_915497 and 15834432 yields something like this (but don't mix GI and non GI in practice):
                            <DocSum>
                            <Id>82891194</Id>
                            <Item Name="Caption" Type="String">XP_915497</Item>
                            <Item Name="Title" Type="String">
                            PREDICTED: similar to Syntaxin binding protein 3 (UNC-18 homolog 3) (UNC-18C) (MUNC-18-3) [Mus musculus]
                            </Item>
                            <Item Name="Extra" Type="String">gi|82891194|ref|XP_915497.1|[82891194]</Item>
                            <Item Name="Gi" Type="Integer">82891194</Item>
                            <Item Name="CreateDate" Type="String">2005/12/01</Item>
                            <Item Name="UpdateDate" Type="String">2005/12/01</Item>
                            <Item Name="Flags" Type="Integer">512</Item>
                            <Item Name="TaxId" Type="Integer">10090</Item>
                            <Item Name="Length" Type="Integer">566</Item>
                            <Item Name="Status" Type="String">replaced</Item>
                            <Item Name="ReplacedBy" Type="String">NP_035634</Item>   <-- useful for Uniprot search
                            <Item Name="Comment" Type="String">
                            <![CDATA[ This record was replaced or removed. ]]>
                            </Item>
                            </DocSum>
                            <DocSum>
                            <Id>15834432</Id>
                            <Item Name="Caption" Type="String">NP_313205</Item>    <-- useful for Uniprot search
                            <Item Name="Title" Type="String">
                            30S ribosomal protein S18 [Escherichia coli O157:H7 str. Sakai]
                            </Item>
                            <Item Name="Extra" Type="String">gi|15834432|ref|NP_313205.1|[15834432]</Item>
                            <Item Name="Gi" Type="Integer">15834432</Item>
                            <Item Name="CreateDate" Type="String">2001/03/07</Item>
                            <Item Name="UpdateDate" Type="String">2013/12/20</Item>
                            <Item Name="Flags" Type="Integer">512</Item>
                            <Item Name="TaxId" Type="Integer">386585</Item>
                            <Item Name="Length" Type="Integer">75</Item>
                            <Item Name="Status" Type="String">live</Item>
                            <Item Name="ReplacedBy" Type="String"/>
                            <Item Name="Comment" Type="String">
                            <![CDATA[ ]]>
                            </Item>
                            </DocSum>
                        */
                        using (var xmlTextReader = _webSearchProvider.GetXmlTextReader(urlString))
                        {
                            var elementName = String.Empty;
                            var response = new ProteinSearchInfo();
                            bool dummy = addedKnowngood;
                            string id = null;
                            string caption = null;
                            string replacedBy = null;
                            string attrName = null;
                            string length = null;
                            while (xmlTextReader.Read())
                            {
                                switch (xmlTextReader.NodeType)
                                {
                                    case XmlNodeType.Element: // The node is an element.
                                        elementName = xmlTextReader.Name;
                                        attrName = xmlTextReader.GetAttribute("Name"); // Not L10N
                                        break;
                                    case XmlNodeType.Text: // text for current element
                                        if ("Id" == elementName) // this will be the input GI number, or GI equivalent of input // Not L10N
                                        {
                                            id = NullForEmpty(xmlTextReader.Value);
                                        }
                                        else if ("ERROR" == elementName) // Not L10N
                                        {
                                            // we made connection, but some trouble on their end
                                            throw new WebException(xmlTextReader.Value);
                                        }
                                        else if ("Item" == elementName) // Not L10N
                                        {
                                            var value = NullForEmpty(xmlTextReader.Value);
                                            if (value != null)
                                            {
                                                switch (attrName)
                                                {
                                                    case "ReplacedBy": // Not L10N
                                                        replacedBy = value; // a better read on name
                                                        break;
                                                    case "Caption": // Not L10N
                                                        caption = value; // a better read on name
                                                        break;
                                                    case "Length": // Not L10N
                                                        length = value; // Useful for disambiguation
                                                        break;
                                                }
                                            }
                                        }
                                        break;
                                    case XmlNodeType.EndElement:
                                        if ("DocSum" == xmlTextReader.Name) // Not L10N
                                        {
                                            if (dummy)
                                            {
                                                dummy = false; // first returned is just the known-good seed, the rest are useful
                                            }
                                            else
                                            {
                                                // can we transfer this search to UniprotKB? Gets us the proper accession ID,
                                                // and avoids downloading sequence data we already have or just don't want
                                                string newSearchTerm = null;
                                                string intermediateName = null;
                                                if (replacedBy != null)
                                                {
                                                    newSearchTerm = replacedBy; //  Ref|XP_nnn -> GI -> NP_yyyy
                                                    intermediateName = caption;
                                                }
                                                else if (caption != null)
                                                {
                                                    newSearchTerm = caption; // GI -> NP_yyyy
                                                    intermediateName = id;
                                                }
                                                if (newSearchTerm != null)
                                                {
                                                    response.Accession = newSearchTerm;  // a decent accession if uniprot doesn't find it
                                                    response.Description = intermediateName; // stow this here to help make the connection between searches
                                                    response.SetWebSearchTerm(new WebSearchTerm(UNIPROTKB_TAG, newSearchTerm));
                                                    int intLength;
                                                    if (!int.TryParse(length, out intLength))
                                                        intLength = 0;
                                                    response.SeqLength = intLength; // Useful for disambiguation
                                                    responses.Add(response);
                                                    foreach (var value in new[] {id, caption})
                                                    {
                                                        // note as altname for association with the original search
                                                        if (response.Protein == null)
                                                            response.Protein = new DbProtein();
                                                        response.Protein.Names.Add(new DbProteinName(null, new ProteinMetadata(value, null)));
                                                        // and remove from consideration for the full-data Entrez search
                                                        var val = value;
                                                        var oldSearches = searchterms.Where(s => SimilarSearchTerms(s, val)).ToArray();
                                                        if (oldSearches.Any())
                                                        {
                                                            // conceivably same search term is in there twice, just replace the first
                                                            searchterms.Remove(oldSearches[0]); // don't do the more verbose Entrez search
                                                        }
                                                    }
                                                }
                                            }
                                            response = new ProteinSearchInfo(); // and start another
                                            id = caption = replacedBy = null;
                                        }
                                        break;
                                }
                            }
                            xmlTextReader.Close();
                        }

                        if (searchterms.Count > (addedKnowngood ? 1 : 0))
                        {
                            // now do full entrez search - unfortunately this pulls down sequence information so it's slow and we try to avoid it
                            urlString = _webSearchProvider.ConstructEntrezURL(searchterms, false); // not a summary

                            using (var xmlTextReader = _webSearchProvider.GetXmlTextReader(urlString))
                            {
                                var elementName = String.Empty;
                                var latestGbQualifierName = string.Empty;
                                var response = new ProteinSearchInfo(); // and start another
                                bool dummy = addedKnowngood;
                                while (xmlTextReader.Read())
                                {
                                    switch (xmlTextReader.NodeType)
                                    {
                                        case XmlNodeType.Element: // The node is an element.
                                            elementName = xmlTextReader.Name;
                                            break;
                                        case XmlNodeType.Text: // text for current element
                                            if ("GBSeq_organism" == elementName) // Not L10N
                                            {
                                                response.Species = NullForEmpty(xmlTextReader.Value);
                                            }
                                            else if ("GBSeq_locus" == elementName) // Not L10N
                                            {
                                                response.PreferredName = NullForEmpty(xmlTextReader.Value);
                                                    // a better read on name
                                            }
                                            else if ("GBSeq_primary-accession" == elementName) // Not L10N
                                            {
                                                response.Accession = NullForEmpty(xmlTextReader.Value);
                                            }
                                            else if ("GBSeq_definition" == elementName) // Not L10N
                                            {
                                                if (String.IsNullOrEmpty(response.Description))
                                                    response.Description = NullForEmpty(xmlTextReader.Value);
                                            }
                                            else if ("GBQualifier_name" == elementName) // Not L10N
                                            {
                                                latestGbQualifierName = NullForEmpty(xmlTextReader.Value);
                                            }
                                            else if (("GBQualifier_value" == elementName) && // Not L10N
                                                        ("gene" == latestGbQualifierName)) // Not L10N
                                            {
                                                response.Gene = NullForEmpty(xmlTextReader.Value);
                                            }
                                            else if ("GBSeqid" == elementName) // Not L10N
                                            {
                                                // alternate name  
                                                // use this as a way to associate this result with a search -
                                                // accession may be completely unlike the search term in GI case
                                                if (response.Protein == null)
                                                    response.Protein = new DbProtein();
                                                response.Protein.Names.Add(new DbProteinName(null,
                                                    new ProteinMetadata(NullForEmpty(xmlTextReader.Value), null)));
                                            }
                                            else if ("GBSeq_length" == elementName) // Not L10N
                                            {
                                                int length;
                                                if (!int.TryParse(xmlTextReader.Value, out length))
                                                    length = 0;
                                                response.SeqLength = length;
                                            }
                                            break;
                                        case XmlNodeType.EndElement:
                                            if ("GBSeq" == xmlTextReader.Name) // Not L10N
                                            {
                                                if (dummy)
                                                {
                                                    dummy = false; // first returned is just the known-good seed, the rest are useful
                                                }
                                                else
                                                {
                                                    responses.Add(response);
                                                }
                                                response = new ProteinSearchInfo(); // and start another
                                            }
                                            break;
                                    }
                                }
                                xmlTextReader.Close();
                            }
                        } // end full entrez search
                    } // End if GENINFO or ENTREZ
                    else if (searchType == UNIPROTKB_TAG)
                    {
                        int timeout = _webSearchProvider.GetTimeoutMsec(searchterms.Count); // 10 secs + 1 more for every 5 search terms
                        urlString = _webSearchProvider.ConstructUniprotURL(searchterms);
                        using (var webResponseStream = _webSearchProvider.GetWebResponseStream(urlString, timeout))
                        {
                            if (webResponseStream != null)
                            {
                                using (var reader = new StreamReader(webResponseStream))
                                {
                                    if (!reader.EndOfStream)
                                    {
                                        var header = reader.ReadLine(); // eat the header
                                        string[] fieldNames = header.Split('\t'); // Not L10N
                                        // Normally comes in as Entry\tEntry name\tStatus\tProtein names\tGene names\tOrganism\tLength, but could be any order
                                        int colAccession = Array.IndexOf(fieldNames, "Entry");  // Not L10N
                                        int colPreferredName = Array.IndexOf(fieldNames, "Entry name");  // Not L10N
                                        int colDescription = Array.IndexOf(fieldNames, "Protein names");  // Not L10N
                                        int colGene = Array.IndexOf(fieldNames, "Gene names"); // Not L10N
                                        int colSpecies = Array.IndexOf(fieldNames, "Organism"); // Not L10N
                                        int colLength = Array.IndexOf(fieldNames, "Length"); // Not L10N
                                        int colStatus = Array.IndexOf(fieldNames, "Status"); // Not L10N
                                        while (!reader.EndOfStream)
                                        {
                                            var line = reader.ReadLine();
                                            if (line != null)
                                            {
                                                string[] fields = line.Split('\t'); // Not L10N
                                                int length = 0;
                                                if (colLength >= 0)
                                                    int.TryParse(fields[colLength], out length);
                                                var response = new ProteinSearchInfo
                                                {
                                                    ProteinDbInfo = new DbProteinName
                                                    {
                                                        Accession = NullForEmpty(fields[colAccession]),
                                                        PreferredName = NullForEmpty(fields[colPreferredName]),
                                                        Description = NullForEmpty(fields[colDescription]),
                                                        Gene = NullForEmpty(fields[colGene]),
                                                        Species = NullForEmpty(fields[colSpecies]),
                                                    },
                                                    SeqLength = length,
                                                    ReviewStatus = NullForEmpty(colStatus>=0 ? fields[colStatus] : null) // Reviewed or unreviewed
                                                };
                                                responses.Add(response);
                                            }
                                        }
                                    }
                                    reader.Close();
                                }
                                webResponseStream.Close();
                            }
                        }
                    } // End if Uniprot
                }
                catch (WebException ex)
                {
                    if (ex.Status == WebExceptionStatus.ProtocolError)
                    {
                        switch (((HttpWebResponse)ex.Response).StatusCode)
                        {
                            case HttpStatusCode.BadRequest:
                            case HttpStatusCode.RequestUriTooLong:
                                // malformed search, stop trying
                                if (proteins.Count == 1)
                                {
                                    proteins[0].SetWebSearchCompleted(); // No more need for lookup
                                    return 1; // We resolved one
                                }
                                return -2; // Probably asked for too many at once, caller will go into batch reduction mode
                        }
                    }
                    caught = true;
                }
                catch
                {
                    caught = true;
                }
                if (caught)
                {
                    if (retries == 0)
                        return -1;  // just try again later
                    Thread.Sleep(1000);
                    continue;
                }

                if (responses.Count>0)
                {
                    const string STATUS_REVIEWED = "reviewed"; // Uniprot reviewed status // Not L10N
                    // now see if responses are ambiguous or not
                    if (proteins.Count() == 1)
                    {
                        // Any responses must belong to this protein - or this isn't a protein at all (user named it "peptide6" for example).
                        // Can get multiple results for single uniprot code, but we'll ignore those
                        // since we're not in the market for alternative proteins (in fact we're likely 
                        // resolving metadata for one here).
                        ProteinSearchInfo result = null;
                        // See if we can uniquely match by sequence length
                        int length = proteins[0].SeqLength;
                        if (length == 0)  
                        {
                            // From a peptide list, probably - sequence unknown
                            if (responses.Count(r => Equals(r.ReviewStatus, STATUS_REVIEWED)) == 1)
                            {
                                result = responses.First(r => Equals(r.ReviewStatus, STATUS_REVIEWED));
                            }
                            else if (responses.Count(r => Equals(r.Accession, proteins[0].Accession)) == 1)
                            {
                                result = responses.First(r =>Equals(r.Accession, proteins[0].Accession));
                            }
                            else
                            {
                                if (responses.Count != 1)
                                {
                                    // Ambiguous - don't make uneducated guesses.  But if all responses share species or gene etc note that
                                    var common = ProteinSearchInfo.Intersection(responses);
                                    if (common != null)
                                    {
                                        var old = proteins[0].GetProteinMetadata();
                                        proteins[0].ChangeProteinMetadata(MergeSearchResult(common.GetProteinMetadata(), old));
                                    }
                                    proteins[0].SetWebSearchCompleted(); // We aren't going to get an answer
                                    proteins[0].NoteSearchFailure();
                                    break;
                                }
                                result = responses.First();  // We got an unambiguous response
                            }
                        }
                        else if (responses.Count(r => r.SeqLength == length) == 1)
                        {
                            result = responses.First(r =>r.SeqLength == length);
                        }
                        else if (responses.Count(r => r.SeqLength == length && Equals(r.ReviewStatus, STATUS_REVIEWED)) == 1) // Narrow it down to reviewed only
                        {
                            result = responses.First(r => r.SeqLength == length && Equals(r.ReviewStatus, STATUS_REVIEWED));
                        }

                        if (result == null)
                        {
                            if ((length > 0) && (responses.Count(r => r.SeqLength == length) == 0)) // No plausible matches (nothing of the proper length)
                            {
                                proteins[0].SetWebSearchCompleted(); // We aren't going to get an answer
                                proteins[0].NoteSearchFailure();
                                break;
                            }
                            else if (responses.Count(r => Equals(r.ReviewStatus, STATUS_REVIEWED)) == 1)
                            {
                                result = responses.First(r => Equals(r.ReviewStatus, STATUS_REVIEWED));
                            }
                            else
                            {
                                // Ambiguous - don't make uneducated guesses.  But if all responses share species or gene etc note that
                                var common = ProteinSearchInfo.Intersection(responses);
                                if (common != null)
                                {
                                    var old = proteins[0].GetProteinMetadata();
                                    proteins[0].ChangeProteinMetadata(MergeSearchResult(common.GetProteinMetadata(), old));
                                }
                                proteins[0].SetWebSearchCompleted(); // We aren't going to get an answer
                                proteins[0].NoteSearchFailure();
                                break;
                            }
                        }
                        // prefer the data we got from web search to anything we parsed.
                        var oldMetadata = proteins[0].GetProteinMetadata();
                        proteins[0].ChangeProteinMetadata(MergeSearchResult(result.GetProteinMetadata(), oldMetadata)); // use the first, if more than one, as the primary
                        proteins[0].Status = ProteinSearchInfo.SearchStatus.success;
                        lookupCount++; // Succcess!
                        if (Equals(searchType, proteins[0].GetProteinMetadata().GetSearchType())) // did we reassign search from Entrez to UniprotKB? If so don't mark as resolved yet.
                            proteins[0].SetWebSearchCompleted(); // no more need for lookup
                    }
                    else if ((searchType == ENTREZ_TAG) || (searchType == GENINFO_TAG))
                    {
                        // multiple proteins, but responses come in reliable order
                        if (proteins.Count() == responses.Count())
                        {
                            int n = 0;
                            foreach (var response in responses)
                            {
                                // prefer the data we got from web search
                                var oldMetadata = proteins[n].GetProteinMetadata();
                                if (Equals(searchType, proteins[n].GetProteinMetadata().GetSearchType())) // did we reassign search from Entrez to UniprotKB?
                                    oldMetadata = oldMetadata.SetWebSearchCompleted();  // no more need for lookup
                                // use oldMetadata to fill in any holes in response, then take oldMetadata name and description
                                proteins[n].Status = ProteinSearchInfo.SearchStatus.success;
                                proteins[n++].ChangeProteinMetadata(MergeSearchResult(response.GetProteinMetadata(), oldMetadata));
                                lookupCount++; // Succcess!
                            }
                        }
                        else // but sometimes with gaps
                        {
                            int n = 0;
                            foreach (var response in responses)
                            {   // each response should correspond to a protein, but some proteins won't have a response
                                while (n < proteins.Count)
                                {
                                    var s = proteins[n].GetProteinMetadata().WebSearchInfo;
                                    bool hit = (s.MatchesPendingSearchTerm(response.Accession) ||
                                                s.MatchesPendingSearchTerm(response.PreferredName));
                                    if (!hit && (response.ProteinDbInfo != null))
                                    {
                                        // we have a list of alternative names from the search, try those
                                        foreach (var altName in response.Protein.Names)
                                        {
                                            hit = s.MatchesPendingSearchTerm(altName.Name);
                                            if (hit)
                                                break;
                                        }
                                    }
                                    if (hit)
                                    {
                                        // prefer the data we got from web search
                                        var oldMetadata = proteins[n].GetProteinMetadata();
                                        if (Equals(searchType, proteins[0].GetProteinMetadata().GetSearchType())) // did we reassign search from Entrez to UniprotKB?
                                            oldMetadata = oldMetadata.SetWebSearchCompleted();  // no more need for lookup
                                        // use oldMetadata to fill in any holes in response, then take oldMetadata name and description
                                        proteins[n].ChangeProteinMetadata(MergeSearchResult(response.GetProteinMetadata(), oldMetadata));
                                        proteins[n].Status = ProteinSearchInfo.SearchStatus.success;
                                        lookupCount++; // Succcess!
                                        break;
                                    }
                                    n++;
                                }
                            }
                        }
                    }
                    else // (searchType == UNIPROTKB_TAG)
                    {
                        // Multiple proteins, responses come back in no particular order, and 
                        // possibly with alternatives thrown in
                        foreach (var p in proteins)
                        {
                            var seqLength = p.SeqLength;
                            var uniqueProteinLength = proteins.Count(pr => (pr.SeqLength == seqLength)) == 1;
                            for (var reviewedOnly=0; reviewedOnly < 2; reviewedOnly++)
                            {
                                // Only look at responses with proper sequence length - narrowing to reviewed only if we have ambiguity
                                var likelyResponses = reviewedOnly == 0 ?
                                    (from r in responses where (r.SeqLength == seqLength) select r).ToArray() :
                                    (from r in responses where (r.SeqLength == seqLength && Equals(r.ReviewStatus, STATUS_REVIEWED)) select r).ToArray();

                                var results = (uniqueProteinLength && likelyResponses.Count()==1) ?
                                    likelyResponses : // Unambiguous - single response that matches this length, and this protein is the only one with this length
                                    (from r in likelyResponses where (p.GetProteinMetadata().WebSearchInfo.MatchesPendingSearchTerm(r.Accession)) select r).ToArray();
                                if (results.Count() != 1)
                                {
                                    // See if the search term is found in exactly one result's description field
                                    var resultsDescription = (from r in likelyResponses
                                                              where ((!String.IsNullOrEmpty(r.Description) && r.Description.ToUpperInvariant().
                                                              Split(' ').Contains(p.GetProteinMetadata().GetPendingSearchTerm().ToUpperInvariant())))
                                        select r).ToArray();
                                    if (resultsDescription.Count() == 1)
                                        results = resultsDescription;
                                }
                                if (results.Count() != 1)
                                {
                                    // See if the search term is found in exactly one result's gene names field
                                    var resultsGene = (from r in likelyResponses
                                                       where ((!String.IsNullOrEmpty(r.Gene) && r.Gene.ToUpperInvariant().
                                                       Split(' ').Contains(p.GetProteinMetadata().GetPendingSearchTerm().ToUpperInvariant())))
                                        select r).ToArray();
                                    if (resultsGene.Count() == 1)
                                        results = resultsGene;
                                }
                                if (results.Count() != 1 && uniqueProteinLength)
                                {
                                    // Didn't find an obvious match, but this is the only protein of this length in the search
                                    results = likelyResponses;
                                }
                                // Make sure all matching responses have same accession, at a minimum
                                var common = ProteinSearchInfo.Intersection(results);
                                if (results.Any() && common.Accession != null)
                                {
                                    // prefer the data we got from web search
                                    var oldMetadata = p.GetProteinMetadata();
                                    oldMetadata = oldMetadata.SetWebSearchCompleted();  // no more need for lookup
                                    // use oldMetadata to fill in any holes in response, then take oldMetadata name and description
                                    p.ChangeProteinMetadata(MergeSearchResult(common.GetProteinMetadata(), oldMetadata));
                                    p.Status = ProteinSearchInfo.SearchStatus.success;
                                    lookupCount++; // Succcess!
                                    break;
                                }
                            }
                            if (p.GetProteinMetadata().NeedsSearch() && uniqueProteinLength)
                            {
                                p.SetWebSearchCompleted(); // No answer found, but we're done
                                p.NoteSearchFailure();
                                lookupCount++; // done with this one
                            }
                        }
                    }
                } // End if we got any respones
                else if (searchType == UNIPROTKB_TAG)
                {
                    // None of the searches hit - Uniprot is our last search so just set these as complete
                    foreach (var p in proteins.Where(p => p.GetProteinMetadata().NeedsSearch()))
                    {
                        p.SetWebSearchCompleted();  // No answer found, but we're done
                        p.NoteSearchFailure();
                        lookupCount++; // done with this one
                    }
                }
                else if (proteins.Count() == 1)
                {
                    proteins[0].SetWebSearchCompleted(); // no response for a single protein - we aren't going to get an answer
                    proteins[0].NoteSearchFailure();
                    lookupCount++; // done with this one
                }

                break; // No need for retry
            }
            return lookupCount;
        }

示例#3

显示文件

文件： WebEnabledFastaImporter.cs 项目： lgatto/proteowizard

 //
 // Return a ProteinSearchInfo whose members are the same in every member of the list, or null when list members disagree
 //
 public static ProteinSearchInfo Intersection(IEnumerable<ProteinSearchInfo> list)
 {
     if (list == null)
         return null;
     var proteinSearchInfos = list as ProteinSearchInfo[] ?? list.ToArray();
     if (!proteinSearchInfos.Any())
         return null;
     var result = new ProteinSearchInfo(new DbProteinName(proteinSearchInfos[0].ProteinDbInfo.Protein, proteinSearchInfos[0].ProteinDbInfo.GetProteinMetadata().ClearWebSearchInfo()),0);
     var rdb = result.ProteinDbInfo;
     foreach (var p in proteinSearchInfos.Skip(1))
     {
         // Make sure all string properties in list agree, nulling out those that don't
         var pdb = p.ProteinDbInfo;
         foreach (var resultProperty in rdb.GetType().GetProperties().Where(prop => prop.PropertyType == typeof (string)))
         {
             var pdbProperty = pdb.GetType().GetProperties().First(pprop => Equals(pprop.Name, resultProperty.Name));
             if (!Equals(resultProperty.GetValue(rdb, null), pdbProperty.GetValue(pdb, null)))
             {
                 resultProperty.SetValue(rdb, null);
             }
         }
     }
     return result;
 }