1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
|
#Copyright 2001 R.Gentleman, all rights reserved
#functions to look up particular genes at different sites
# Modifications to htmlpage and getQuery4XX functions added
# 7-12-04 by J. MacDonald
UniGeneQuery <- function(query, UGaddress="UniGene/",
type="CID") {
if (missing(query))
stop("No query, cannot proceed!")
##they are of the form HH.xxxx, where HH specifies the species
q1 <- strsplit(query, "\\.")
if( length(q1[[1]]) == 2 ) {
id <- sapply(q1, function(x) x[2])
species <- sapply(q1, function(x) x[1])
}
ncbiURL <- .getNcbiURL()
## Build up the query URL
query <- paste(ncbiURL, UGaddress,
"clust.cgi?ORG=",species,"&", type, "=",id, sep="")
return(query)
}
entrezGeneByID <- function(query) {
if (missing(query))
stop("No query, cannot proceed!")
ncbiURL <- .getNcbiURL()
## Build up the query URL
query <- paste(ncbiURL, "/sites/entrez?db=gene&cmd=search&term=",query, sep="")
return(query)
}
entrezGeneQuery <- function(query) {
if (missing(query))
stop("No query, cannot proceed!")
ncbiURL <- .getNcbiURL()
## Build up the query URL
str = ""
##reduce the set of parameters so that they are all one concatenated thing
for(i in seq_len(length(query))){
if(i==1){str=query[i]}else{
str = paste(str,"%20",query[i],sep="")
}
}
query <- paste(ncbiURL, "/sites/entrez?db=gene&cmd=search&term=",str, sep="")
return(query)
}
pmidQuery <- function(query) {
if (missing(query))
stop("No query, cannot proceed!")
query <- paste(query,collapse="%2c")
ncbiURL <- .getNcbiURL()
query <- paste(ncbiURL,"/entrez/query.fcgi?cmd=Retrieve&db=PubMed&",
"list_uids=",query,"&dopt=Abstract&tool=bioconductor",sep="")
return(query)
}
genbank <- function(..., disp=c("data","browser"),
type=c("accession", "uid"),
pmaddress=.efetch("gene",disp,type)) {
params <- list(...)
params <- unlist(params)
disp <- match.arg(disp)
type <- match.arg(type)
if (length(params) == 0) {
stop("No Gene ID, cannot proceed")
}
ncbiURL <- .getNcbiURL()
## Build up the query URL
args <- paste(params,collapse="%2c")
## See if we need to transform accession based arguments
err <- args
args <- .transformAccession(args, disp, type,db="genbank")
if (is.null(args)) {
print(paste("No XML records available for accession number",err))
return(NULL)
}
id <- .getIdTag(disp,type)
query <- paste(ncbiURL, pmaddress, id, args, sep="")
## Determine if we are displaying this data in a browser or
## returning an XMLDocument object
if (disp == "data") {
return(.handleXML(query))
}
else {
browseURL(query)
}
}
## bad query string:
## query = "http://www.ncbi.nih.gov/entrez/utils/pmfetch.fcgi?report=xml&mode=text&tool=bioconductor&db=Nucleotide&id=571320,4103966"
pubmed <- function(..., disp=c("data","browser"),
type=c("uid","accession"),
pmaddress=.efetch("PubMed",disp,type)) {
params <- list(...)
params <- unlist(params)
disp <- match.arg(disp)
type <- match.arg(type)
if (length(params) == 0) {
stop("No PMID, cannot proceed")
}
ncbiURL <- .getNcbiURL()
## Build up the query URL
args <- paste(params,collapse="%2c")
## See if we need to transform accession based arguments
err <- args
args <- .transformAccession(args, disp, type,"pubmed")
if (is.null(args)) {
message("No XML records available for accession number ", err)
return(NULL)
}
id <- .getIdTag(disp,type)
query <- paste(ncbiURL, pmaddress, id, args, sep="")
## Determine if we are displaying this data in a browser or
## returning an XMLDocument object
if (disp == "data") {
return(.handleXML(query))
}
else {
browseURL(query)
}
}
## A replacement for RCurl::getURL()
##
## Oct 13, 2020: RCurl::getURL() started to fail recently on Windows for
## some HTTPS requests e.g. we started to see the following error on all
## the Bioconductor Windows build machines (tokay1, riesling1, tokay2):
## > library(RCurl)
## > query <- "https://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&to$
## > doc <- RCurl::getURL(query)
## Error in function (type, msg, asError = TRUE) :
## error:1407742E:SSL routines:SSL23_GET_SERVER_HELLO:tlsv1 alert protocol version
## The reason is not clear but we suspect a server-side issue with the SSL
## certificate that only manifests itself with RCurl::getURL() on Windows.
## httr::GET() doesn't seem to be affected so starting with annotate 1.67.2
## accessionToUID() and blastSequences() use this instead of RCurl::getURL().
getURL2 <- function(url)
{
response <- httr::GET(url)
rawToChar(response$content)
}
accessionToUID <- function(...,db=c("genbank","pubmed")) {
#require(XML)
## Passed an accession #, returns a pubmed UID
accNum <- list(...)
accNum <- unlist(accNum)
accNum <- paste(accNum,collapse="+OR+")
db <- match.arg(db)
## Certain functions will be passing in a single string of comma
## deliminated Accession #s. Change the commas to "+OR+"
accNum <- gsub("\\,","+OR+",accNum)
if (db == "genbank") {
db <- "gene"
}
else {
db <- "PubMed"
}
query <- paste(.getNcbiURL(), "entrez/eutils/esearch.fcgi?db=", db,
"&tool=bioconductor&term=",accNum,sep="")
## parse using XML package
Sys.sleep(0.15) # avoid HTTP Error 429 "Too Many Requests"
doc <- xmlParse(getURL2(query)) # RCurl::getURL() has issues, see above
res <- xpathApply(doc=doc, path="/eSearchResult/IdList/Id",
fun=xmlValue)
retVal <- unlist(res)
if (length(retVal)==0){retVal <- NULL} else {
retVal <- paste(retVal, collapse=",")
}
return(retVal)
}
.handleXML <- function(query,handlers=NULL) {
## In the case of an error retrieving proper XML output,
## will return NA to the calling function
options(show.error.messages = FALSE)
on.exit(options(show.error.messages = TRUE))
## get the XML file contents from URL, and remove extra
## text strings before <xml...
Sys.sleep(0.15) # avoid HTTP Error 429 "Too Many Requests"
query <- paste(scan(query, what="", sep="\n"), "\n", collapse="\n")
query <- sub("^[^<]*<(.*)", "<\\1",query)
retVal <- NULL
xml <- try(xmlTreeParse(query,asText=TRUE,handlers=NULL,asTree=TRUE))
if (inherits(xml,"try-error") == TRUE) {
return(NA)
}
return(xml)
}
.getNcbiURL <- function() {
## Returns the URL for NCBI, which should be located in Annotate's
## option set
BioCOpt <- getOption("BioC")
if (!is.null(BioCOpt)) {
ncbiURL <- BioCOpt$annotate$urls$ncbi
}
if (!exists("ncbiURL")) {
ncbiURL <- "https://eutils.ncbi.nlm.nih.gov"
## old one: "http://www.ncbi.nih.gov/"
}
return(ncbiURL)
}
.getIdTag <- function(disp=c("data","browser"),
type=c("uid","accession")) {
disp <- match.arg(disp)
type <- match.arg(type)
if (disp == "data") {
return("&id=")
}
else {
if (type == "uid") {
return("&list_uids=")
}
else {
return("&term=")
}
}
}
## TODO: retire this method by replacing it with .efetch (NCBI is no longer supporting URLs of this ilk)
## .pmfetch <- function(db="PubMed", disp=c("data","browser"),
## type=c("uid","accession")) {
## ## Returns the base query string for the pmfetch engine @ pubmed
## disp <- match.arg(disp)
## type <- match.arg(type)
## if (disp == "data") {
## base <-
## "entrez/utils/pmfetch.fcgi?report=xml&mode=text&tool=bioconductor&db="
## }
## else {
## base1 <- "entrez/query.fcgi?tool=bioconductor&cmd="
## if (type == "uid") {
## base2 <- "Retrieve&db="
## }
## else {
## base2 <- "Search&db="
## }
## base <- paste(base1,base2,sep="")
## }
## return(paste(base,db,sep=""))
## }
## Needed to replace the aging (and obsoleted by NCBI) pmfetch...
.efetch <- function(db="PubMed", disp=c("data","browser"),
type=c("uid","accession")) {
## Returns the base query string for the efetch engine
disp <- match.arg(disp)
type <- match.arg(type)
if (disp == "data") {
base <-
"entrez/eutils/efetch.fcgi?tool=bioconductor&rettype=xml&retmode=text&db="
}
else {
base1 <- "entrez/query.fcgi?tool=bioconductor&cmd="
if (type == "uid") {
base2 <- "Retrieve&db="
}
else {
base2 <- "Search&db="
}
base <- paste(base1,base2,sep="")
}
return(paste(base,db,sep=""))
}
.transformAccession <- function(args, disp, type, db) {
## Used to change accession ID arguments to query functions
## into UIDs if necessary. Returns NULL if there aren't any left.
if ((disp == "data")&&(type=="accession")) {
args <- accessionToUID(args,db=db)
}
return(args)
}
genelocator <- function(x) {
.Defunct("none", package="annotate", msg = "is no longer supported")
}
pmAbst2HTML <- function(absts, filename, title, frames = FALSE,
table.center=TRUE) {
## Currently just a very naive implementation of a pmid2html type
## of thing. Intended to be temporary just while I'm testing some
## of this stuff.
if (!is.list(absts)) {
if (is(absts,"pubMedAbst"))
absts <- list(absts)
else
stop("'absts' parameter does not seem to be valid.")
}
## Assign a default filename. If we're using frames, then
## 'filename' is really just the base filename, so make it empty
if (missing(filename))
if (frames)
fileName <- ""
else
filename <- "absts.html"
if (missing(title))
title <- "BioConductor Abstract List"
nrows = length(absts)
pmids <- unlist(lapply(absts,pmid))
dates <- unlist(lapply(absts,pubDate))
queries <- unlist(lapply(absts,
function(x){pm <- pmid(x);out<-pmidQuery(pm);out}))
titles <- unlist(lapply(absts, articleTitle))
## If we're using frames, need to point the anchors to
## the main frame, otherwise not.
anchors <- makeAnchor(queries, titles, toMain=frames)
topText <- paste("<html>\n<head>\n<title>", title, "</title>",
"\n</head>\n<body bgcolor=#708090>\n",
"<H1 ALIGN=CENTER>", title, "</H1>\n",
"</body></title>", sep="")
head <- c("Article Title", "Publication Date")
headOut <- paste("<TH>", head, "</TH>", collapse="\n")
if (frames) {
top <- new("HTMLPage", fileName=paste(filename,"Top.html",sep=""),
pageText= topText)
tableHeader <- paste("<TR>",headOut,"</TR>", sep="\n")
sideText <- paste("<TABLE BORDER=1>", tableHeader, sep="\n")
tds <- paste("<TD>",anchors,"</TD><TD>",dates,"</TD>",sep="",
collapse="\n</TR>\n<TR>\n")
tds <- paste("<TR>",tds,"</TR>")
sideText <- paste(sideText, tds)
if (table.center)
sideText <- paste("<CENTER>",sideText,"</CENTER>", sep="\n")
sideText <- paste("<html>", "<head>",
"<title>BioConductor Abstract List</title>",
"</head>","<body bgcolor=#708090>",
sideText, "</body>", "</html>", sep="\n")
side <- new("HTMLPage",
fileName=paste(filename,"Side.html",sep=""),
pageText=sideText)
metaText <- paste("<meta HTTP-EQUIV=\"REFRESH\" CONTENT=\"1;",
queries[1],"\">",sep="")
mainText <- paste("<html>", "<head>",
"<title>BioConductor Abstract List</title>",
"</head>","<body bgcolor=#708090>",
metaText,
"</body>","</html>", sep="\n")
main <- new("HTMLPage",
fileName=paste(filename,"Main.html",sep=""),
pageText=mainText)
page <- new("FramedHTMLPage", topPage=top, sidePage=side, mainPage=main,
fileName=paste(filename,"index.html",sep=""),
pageTitle=title)
toFile(page)
}
else {
outfile <- file(filename,"w")
cat(topText, file = outfile)
if( table.center )
cat("<CENTER> \n", file=outfile)
cat("<TABLE BORDER=1>", file = outfile, sep = "\n")
cat("<TR>",headOut,"</TR>", file=outfile, sep="\n")
tds <- paste("<TD>",anchors,"</TD><TD>",dates,"</TD>",sep="")
for (td in tds)
cat("<TR>", td, "</TR>", file=outfile,sep="\n")
cat("</TABLE>",file=outfile)
if( table.center )
cat("</CENTER> \n", file=outfile)
cat("</body>", "</html>", sep = "\n", file = outfile)
close(outfile)
}
invisible(NULL)
}
htmlpage <- function (genelist, filename, title, othernames, table.head,
table.center=TRUE, repository = list("en"), ...){
if(!is.list(repository))
stop("The repository argument must be a list!", call. = FALSE)
chklen <- function(x){
if(is.data.frame(x) || is.matrix(x)) dim(x)[1]
else length(x)
}
getRows <- function(x){
paste("<P>", x, "</P>", collapse="", sep="")
}
if(is.data.frame(genelist))
len.vec <- chklen(genelist)
else
if(is.list(genelist))
len.vec <- sapply(genelist, chklen)
else
stop("The 'genelist' should be either a data.frame or a list",
call.=FALSE)
if(!missing(othernames)) {
if(is.data.frame(othernames))
len.vec <- c(len.vec, chklen(othernames))
else if( is.list(othernames))
len.vec <- c(len.vec, sapply(othernames, chklen))
else
stop("The 'othernames' should be either a data.frame or a list",
call.=FALSE)
}
if(any(len.vec != len.vec[1]))
stop(paste("Some items in either", genelist, "or", othernames,
"have mis-matched lengths.\nPlease check this",
"discrepancy and re-run.\n"), .call=FALSE)
out <- mapply(getCells, genelist, repository, ..., SIMPLIFY=TRUE)
if (!missing(othernames)) {
if(is.data.frame(othernames))
out <- data.frame(out, othernames)
else
if (is.list(othernames)) {
## if othernames is a list, we have to ensure we handle
## the contents of the list correctly
## e.g., cbind()ing a factor will coerce things incorrectly
## here we just put everything in another list that we can
## then coerce to a data.frame
others <- vector("list", length(othernames))
for(i in seq(along=othernames)){
if(is.data.frame(othernames[[i]]))
others[[i]] <- othernames[[i]]
else
if(is.list(othernames[[i]])){
## if othernames[[i]] is a list, the assumption
## here is that we want a multi-line table entry
## in the HTML page
others[[i]] <- sapply(othernames[[i]],
getRows)
}else{
others[[i]] <- othernames[[i]]
}
}
out <- data.frame(out, as.data.frame(others))
}
}
colnames(out) <- table.head
out <- xtable(out, caption=if(!missing(title)) title, ...)
print(out, type="html", file=filename, caption.placement="top",
include.rownames=FALSE, sanitize.text.function=function(x) x,
...)
}
getCells <- function(ids, repository = "ug", ...){
# This function allows us to insert multiple links in each cell by
# building up the HTML more incrementally. Passing a list of character
# vectors will result in multiple links per cell. Otherwise we get one link per cell.
if(is.list(ids)){
out <- vector()
temp <- lapply(ids, getQueryLink, repository=repository, ...)
for(i in seq(along = ids)){
if(temp[i] != " ")
out[i] <- paste("<P><A HREF=\"", temp[[i]], "\">",
ids[[i]], "</A></P>", sep = "", collapse="")
else
out[i] <- temp[i]
}
}else{
temp <- getQueryLink(ids, repository, ...)
blanks <- temp == " "
out <- paste(" <A HREF=\"", temp, "\">",
ids, "</A>", sep = "")
out[blanks] <- " "
}
return(out)
}
## getQueryLink <-function (ids, repository = "ug", ...){
## switch(tolower(repository), ug = return(getQuery4UG(ids)),
## gb = return(getQuery4GB(ids)), sp = return(getQuery4SP(ids)),
## omim = return(getQuery4OMIM(ids)), fb = return(getQuery4FB(ids)),
## en = return(getQuery4EN(ids)), tr = return(getQuery4TR(ids)),
## go = return(getQuery4GO(ids)), ens = return(getQuery4ENSEMBL(ids, ...)),
## random = return(getQuery4Random(ids)), stop("Unknown repository name"))
## }
## Code from Martin Morgan that allows end user to add arbitrary
## repository
## the interface: set, get, clear
setRepository <- function(repository, FUN, ..., verbose=TRUE)
{
## checs on repository, FUN, then...
if (verbose && exists(repository, .repositories))
warning("replacing repository '", repository, "'")
.repositories[[repository]] <- FUN
}
getRepositories <- function()
{
ls(.repositories)
}
clearRepository <- function(repository, verbose=TRUE)
{
if (!(length(repository) == 1 && is.character(repository)))
stop("argument 'repository' must be character(1)")
## check repository, then
if (exists(repository, .repositories))
rm(list=repository, envir=.repositories)
else if (verbose)
warning("undefined repository '", repository, "'")
}
## this should be backward compatible
getQueryLink <- function (ids, repository = "ug", ...)
{
if (!exists(repository, .repositories))
stop("unknown repository '", repository, "'")
.repositories[[repository]](ids, ...)
}
getTDRows <- function (ids, repository = "ug", ...){
# Modification of Jianhua's original code to allow for multiple links per cell.
out <- paste("<TD>", getCells(ids, repository), "</TD>", sep="")
return(out)
}
getQuery4GO <- function(ids, ...) {
##GO IDs
blanks <- ids == " "
AMIGO_URL <- "http://amigo.geneontology.org/cgi-bin/amigo/term_details?term="
out <- paste(AMIGO_URL, ids, sep = "")
out[blanks] = " "
return(out)
}
getQuery4Affy <- function (ids, ...){
# Affy IDs are all over the map, so there is no good way to catch any garbage input.
# Here we have to rely on the end user to filter out garbage by passing an empty cell.
blanks <- ids == " "
out <- paste("https://www.affymetrix.com/LinkServlet?&probeset=",
ids, sep="")
out[blanks] <- " "
return(out)
}
getQuery4UG <- function (ids, ...){
# Slight modification of Jianhua's original code, replacing error message with
# empty cells in the table.
if(is.factor(ids))
ugs <- strsplit(as.character(ids), "\\.")
else
ugs <- strsplit(ids, "\\.")
badUG <- function(x) if (length(x) != 2 || nchar(x[1]) < 2)
return(TRUE)
else return(FALSE)
bIDs <- sapply(ugs, badUG)
temp <- vector()
for( i in seq(along=ids)){
if(!bIDs[i])
temp[i] <- paste("https://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=",
ugs[[i]][1], "&CID=", ugs[[i]][2], sep = "")
else
temp[i] <- " "
}
return(temp)
}
getQuery4LL <- function (ids, ...) {
.Defunct(msg="The 'll' repository argument is deprecated. Please use 'en'\n.")
}
getQuery4EN <- function (ids, ...){
## Here we rely on Entrez Gene IDs being all numeric to filter out garbage
## that will result in busted links.
if(is.factor(ids)){
options(warn = -1)
ids <- as.numeric(as.character(ids))
options(warn = 0)
blanks <- is.na(ids)
}
if(is.character(ids)){
options(warn = -1)
ids <- as.numeric(ids)
options(warn = 0)
blanks <- is.na(ids)
}
if(is.numeric(ids))
blanks <- is.na(ids)
out <- paste("https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=",
ids, sep = "")
out[blanks] <- " "
return(out)
}
getQuery4TR <- function(ids, ...){
## No automatic garbage checking. The ath1121501 has accnum values of 'multiple'
## that we can convert to blanks however.
blanks <- ids == " " || ids == "multiple"
out <- paste("http://www.arabidopsis.org/servlets/Search?type=general&search_action=detail&method=1&name=", ids,
"&sub_type=gene", sep="")
out[blanks] <- " "
return(out)
}
getQuery4GB <- function (ids, ...){
# GenBank ids can be either GB or RefSeq, so there is no good way to filter garbage.
# Again we rely on end user to pass blanks.
blanks <- ids == " "
out <- paste("https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=",
ids, sep="")
out[blanks] <- " "
return(out)
}
getQuery4SP <- function(ids, ...){
## SwissProt ids are not consistent enough to do any sort of garbage checking
## so here we rely on a blank being passed by the end user.
blanks <- ids == " "
## http://www.uniprot.org/uniprot?query=1&AC=P21108
out <- paste("http://www.uniprot.org/uniprot/", ids, sep="")
out[blanks] <- " "
return(out)
}
getQuery4OMIM <- function(ids, ...){
# Conversion here relies on the assumption that OMIM ids are all numeric
# so any non-numeric entry must be some sort of garbage that will result in
# a broken link.
if(is.factor(ids)){
options(warn = -1)
ids <- as.numeric(as.character(ids))
options(warn = 0)
blanks <- is.na(ids)
}
if(is.character(ids)){
options(warn = -1)
ids <- as.numeric(ids)
options(warn = 0)
blanks <- is.na(ids)
}
if(is.numeric(ids))
blanks <- is.na(ids)
out <- paste("http://www.omim.org/entry/", ids, sep="")
if(!is.null(blanks))
out[blanks] <- " "
return(out)
}
getQuery4FB <- function (ids, ...){
## Function to build links to flybase for drosophila arrays
## Here I rely on the flybase number starting with FBgn
## The end user can also pass an empty cell identifier
if(is.factor(ids))
fbs <- strsplit(as.character(ids), "FBgn")
else
fbs <- strsplit(ids, "FBgn")
badFB <- function(x) if(length(x) != 2 || nchar(x[1]) != 0)
return(TRUE) else return(FALSE)
bIDS <- sapply(fbs, badFB)
out <- paste("http://flybase.bio.indiana.edu/.bin/fbidq.html?",
ids, sep = "")
out[bIDS] <- " "
return(out)
}
getQuery4ENSEMBL <- function(ids, ...){
## function to build links to Ensembl
## Ensembl IDs can start with ENSG, ENSE, ENSP or ENST at the very least
ids[is.na(ids)] <- " "
if(is.factor(ids))
enids <- strsplit(as.character(ids), "ENS")
else
enids <- strsplit(ids, "ENS")
badENS <- function(x) if(length(x) !=2 || nchar(x[1]) != 0)
return(TRUE) else return(FALSE)
bIDS <- sapply(enids, badENS)
##FIXME: should we do some error checking on the species?
## it should be e.g., Homo_sapiens
if(!is.null(list(...)$species))
species <- list(...)$species
else
stop("To make links for Ensembl, you need to pass a 'species' argument.",
call. = FALSE)
out <- paste("http://www.ensembl.org/", species, "/Gene/Summary?g=",
ids, sep = "")
out[bIDS] <- " "
out
}
|