installifnot <- function (packageName){
if (!(require(packageName, character.only=TRUE))) {
install.packages(packageName)
}else{
detach(paste ("package", packageName, sep=":"), character.only=TRUE)
}
}
installBiocifnot <- function (packageName){
if (!(require(packageName, character.only=TRUE))) {
source("http://bioconductor.org/biocLite.R")
biocLite(packageName)
}else{
detach(paste ("package", packageName, sep=":"), character.only=TRUE)
}
}
installifnot("knitr")
installifnot("xlsx")
# installifnot("writexl")
installBiocifnot("GEOquery")
installifnot("tidyverse")
for
loop an instruction to change the program flow.ExpressionSet
intended to store in a single object the distinct data associated with a microarray experiment such as:
library(Biobase)
data("sample.ExpressionSet")
The distinct components of the dataset can be accessed with their names that can be known with the instruction SlotNames
.
slotNames(sample.ExpressionSet)
## [1] "experimentData" "assayData" "phenoData" "featureData"
## [5] "annotation" "protocolData" ".__classVersion__"
experimentData(sample.ExpressionSet)
## Experiment data
## Experimenter name: Pierre Fermat
## Laboratory: Francis Galton Lab
## Contact information: pfermat@lab.not.exist
## Title: Smoking-Cancer Experiment
## URL: www.lab.not.exist
## PMIDs:
##
## Abstract: A 8 word abstract is available. Use 'abstract' method.
## notes:
## notes:
## An example object of expression set (exprSet) class
The two most commonly used functions for accessing data are: -exprs
that provides the expression matrix. -pData
that provides the covariables.
pData(sample.ExpressionSet)
## sex type score
## A Female Control 0.75
## B Male Case 0.40
## C Male Control 0.73
## D Male Case 0.42
## E Female Case 0.93
## F Male Control 0.22
## G Male Case 0.96
## H Male Case 0.79
## I Female Case 0.37
## J Male Control 0.63
## K Male Case 0.26
## L Female Control 0.36
## M Male Case 0.41
## N Male Case 0.80
## O Female Case 0.10
## P Female Control 0.41
## Q Female Case 0.16
## R Male Control 0.72
## S Male Case 0.17
## T Female Case 0.74
## U Male Control 0.35
## V Female Control 0.77
## W Male Control 0.27
## X Male Control 0.98
## Y Female Case 0.94
## Z Female Case 0.32
X <- exprs(sample.ExpressionSet)
dim(X)
## [1] 500 26
head(X)
## A B C D E F G H I
## AFFX-MurIL2_at 192.7420 85.75330 176.7570 135.5750 64.49390 76.3569 160.5050 65.9631 56.9039
## AFFX-MurIL10_at 97.1370 126.19600 77.9216 93.3713 24.39860 85.5088 98.9086 81.6932 97.8015
## AFFX-MurIL4_at 45.8192 8.83135 33.0632 28.7072 5.94492 28.2925 30.9694 14.7923 14.2399
## AFFX-MurFAS_at 22.5445 3.60093 14.6883 12.3397 36.86630 11.2568 23.0034 16.2134 12.0375
## AFFX-BioB-5_at 96.7875 30.43800 46.1271 70.9319 56.17440 42.6756 86.5156 30.7927 19.7183
## AFFX-BioB-M_at 89.0730 25.84610 57.2033 69.9766 49.58220 26.1262 75.0083 42.3352 41.1207
## J K L M N O P Q R
## AFFX-MurIL2_at 135.60800 63.44320 78.2126 83.0943 89.3372 91.0615 95.9377 179.8450 152.4670
## AFFX-MurIL10_at 90.48380 70.57330 94.5418 75.3455 68.5827 87.4050 84.4581 87.6806 108.0320
## AFFX-MurIL4_at 34.48740 20.35210 14.1554 20.6251 15.9231 20.1579 27.8139 32.7911 33.5292
## AFFX-MurFAS_at 4.54978 8.51782 27.2852 10.1616 20.2488 15.7849 14.3276 15.9488 14.6753
## AFFX-BioB-5_at 46.35200 39.13260 41.7698 80.2197 36.4903 36.4021 35.3054 58.6239 114.0620
## AFFX-BioB-M_at 91.53070 39.91360 49.8397 63.4794 24.7007 47.4641 47.3578 58.1331 104.1220
## S T U V W X Y Z
## AFFX-MurIL2_at 180.83400 85.4146 157.98900 146.8000 93.8829 103.85500 64.4340 175.61500
## AFFX-MurIL10_at 134.26300 91.4031 -8.68811 85.0212 79.2998 71.65520 64.2369 78.70680
## AFFX-MurIL4_at 19.81720 20.4190 26.87200 31.1488 22.3420 19.01350 12.1686 17.37800
## AFFX-MurFAS_at -7.91911 12.8875 11.91860 12.8324 11.1390 7.55564 19.9849 8.96849
## AFFX-BioB-5_at 93.44020 22.5168 48.64620 90.2215 42.0053 57.57380 44.8216 61.70440
## AFFX-BioB-M_at 115.83100 58.1224 73.42210 64.6066 40.3068 41.82090 46.1087 49.41220
GEOquery
packageGEOquery
is a package that allows downloading a whole study from GEO with a simple instruction.ExpressionSet
? getGEO
after loading the package.gse <- getGEO('GSE10')
# Returns a list, so look at first item
eset<- gse[[1]]
class(eset)
pData(eset)
x<- exprs(eset)
dim(x)
head(x)
require(GEOquery)
listOfStudies <- c("73517", "16476")# c("62564", "3446") # c("45547")
for (studyID in listOfStudies){
gse <- getGEO(paste0("GSE",studyID), GSEMatrix =TRUE, AnnotGPL=TRUE)
eset <- gse[[1]]
phenoDat <- pData(eset)
require(xlsx)
write.xlsx(phenoDat, file="phenoData.xlsx", sheetName=paste0("GSE",studyID), append=TRUE)
}