options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)
### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
#rawdataDir = "/Users/gabriella/Columbia/rawdata/Pufall/"
processedDataDir = "/Users/gabriella/Columbia/SplitFastqData/Pufall/ConcatFiles/"
# CLUSTER VERSIONS ARE COMMENTED OUT
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Pufall"
#processedDataDir = "/vega/hblab/users/gdm2120/SplitFastqData/Pufall/"
##################################################################
saveDir = "gabriella/SelexGLMtest/BasicSymmetry"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)
shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
"HelTB", "RollA", "RollB")
selex.defineSample('r0.Pufall',
paste(processedDataDir, "/Demultiplexed.R0.fastq.gz", sep = ""),
'r0',
0, 23, '', 'TGGAA')
selex.defineSample('AR.R8',
paste(processedDataDir,"/AR.R8.fastq.gz",sep = ""),
'AR-DBD',
8, 23, '', 'TGGAA')
selex.defineSample('AR.R7',
paste(processedDataDir,"/AR.R7.fastq.gz",sep = ""),
'AR-DBD',
7, 23, '', 'TGGAA')
r0 = selex.sample(seqName = 'r0.Pufall', sampleName='r0', round = 0)
r0.split = selex.split(r0)
r0.train = r0.split$train
r0.test = r0.split$test
dataSample = selex.sample(seqName = 'AR.R8', sampleName = 'AR-DBD', round = 8)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]
libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
kLen = 15
#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
# Inputs about library are data specific
ModelTest = model(name = "AR-DBD R8 Nucleotides (Rev. Comp. Sym.)",
varRegLen = libLen,
leftFixedSeq = "GTTCAGAGTTCTACAGTCCGACGATC",
rightFixedSeq ="TGGAATTCTCGGGTGCCAAGG",
consensusSeq = "RGWACANNNTGTWCY",
affinityType = "AffinitySym",
leftFixedSeqOverlap = 5,
minAffinity = 0.01,
missingValueSuppression = .5,
minSeedValue = .001,
upFootprintExtend = 4,
confidenceLevel = .99,
rounds = list(c(8)),
rcSymmetric = TRUE,
verbose = FALSE)
getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
##
## seedLen: 15
## upFootprintExtend: 4
## downFootprintExtend: 4
## rcSymmetric: TRUE
##
## Slot "N":
## N.upFootprintExtend: 4
## N.downFootprintExtend: 4
## N.set: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## Number of previous iterations: 0
##
## Slot "Intercept":
## Number of Views per Strand of DNA: 11
## Number of Rounds: 1 (8)
## Number of previous iterations: 0
##
## Slot "Shape":
## "ShapeParamsUsed": NONE
# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)
# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
## 1 2 3 4 5 6 7 8 9
## N.A 0 0 0 0 0.00000000 -1.2968295 -0.03073087 0.000000 -1.296829
## N.C 0 0 0 0 -0.60728754 -1.2968295 -0.25628921 -1.296829 0.000000
## N.G 0 0 0 0 -0.09864725 0.0000000 -0.34036727 -1.296829 -1.296829
## N.T 0 0 0 0 -0.42644057 -0.5591611 0.00000000 -1.296829 -1.296829
## 10 11 12 13 14 15
## N.A 0.0000000 -0.40799975 -0.1359377 -0.09020211 -0.7968295 -1.296829
## N.C -1.2968295 0.00000000 0.0000000 -0.36546623 -0.3957275 -1.296829
## N.G -0.3957275 -0.36546623 0.0000000 0.00000000 -1.2968295 0.000000
## N.T -0.7968295 -0.09020211 -0.1359377 -0.40799975 0.0000000 -1.296829
## 16 17 18 19 20 21 22 23
## N.A -1.296829 0.00000000 -0.5591611 -0.42644057 0 0 0 0
## N.C -1.296829 -0.34036727 0.0000000 -0.09864725 0 0 0 0
## N.G -1.296829 -0.25628921 -1.2968295 -0.60728754 0 0 0 0
## N.T 0.000000 -0.03073087 -1.2968295 0.00000000 0 0 0 0
plot(ModelTest@features@N, Ntitle = "AR-DBD R8 Nucleotides\nSeeding Model", ddG = TRUE)
Next we score the probes using topModelMatch
sampleD = sample(nrow(data.probeCounts), 1000000)
data = data.probeCounts[sampleD,]
#data = data.probeCounts
data = topModelMatch(data, ModelTest)
# Uses aligned probes to build design matrix
data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
## 8 Total
## Round 999408 999408
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5184 64675 97019 70065 107935 129680 140881 154352 110698
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56886 62033 999408
## Strand.R 0 0 0
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
## N.A N.C N.G N.T
## 1 582968 638436 349381 428031
## 2 720305 470885 371829 435797
## 3 910532 499174 170802 418308
## 4 927919 254915 406939 409043
## 5 1289964 18567 647804 42481
## 6 651 227 1982376 15562
## 7 843107 104615 93464 957630
## 8 1996426 404 1105 881
## 9 643 1997223 361 589
## 10 1879787 684 115401 2944
## 11 80154 1115076 111955 691631
## 12 343695 655713 0 0
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
##
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"),
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -20.572 -1.478 -0.744 0.255 64.231
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 36.5955610 0.0023611 15499.64 <2e-16 ***
## N.A1 0.0192588 0.0001384 139.14 <2e-16 ***
## N.G1 0.0148519 0.0001701 87.34 <2e-16 ***
## N.T1 -0.0244407 0.0001665 -146.78 <2e-16 ***
## N.C2 -0.1043241 0.0001536 -679.18 <2e-16 ***
## N.G2 -0.0207259 0.0001443 -143.58 <2e-16 ***
## N.T2 -0.0834206 0.0001644 -507.27 <2e-16 ***
## N.C3 -0.1109991 0.0001619 -685.47 <2e-16 ***
## N.G3 -0.1638791 0.0002356 -695.66 <2e-16 ***
## N.T3 -0.1259903 0.0001638 -768.94 <2e-16 ***
## N.C4 -0.1700052 0.0002408 -705.94 <2e-16 ***
## N.G4 -0.0844599 0.0001558 -542.19 <2e-16 ***
## N.T4 -0.1087257 0.0001539 -706.64 <2e-16 ***
## N.C5 -0.3386551 0.0010514 -322.10 <2e-16 ***
## N.G5 -0.0844214 0.0001300 -649.37 <2e-16 ***
## N.T5 -0.2938939 0.0006319 -465.08 <2e-16 ***
## N.A6 -0.6893494 0.0245148 -28.12 <2e-16 ***
## N.C6 -0.4466473 0.0151459 -29.49 <2e-16 ***
## N.T6 -0.3304381 0.0011572 -285.54 <2e-16 ***
## N.A7 0.0050843 0.0001105 46.03 <2e-16 ***
## N.C7 -0.1641560 0.0002946 -557.16 <2e-16 ***
## N.G7 -0.2037759 0.0003964 -514.12 <2e-16 ***
## N.C8 -0.4993633 0.0149408 -33.42 <2e-16 ***
## N.G8 -0.6588331 0.0164137 -40.14 <2e-16 ***
## N.T8 -0.7877859 0.0245149 -32.13 <2e-16 ***
## N.A9 -0.7925570 0.0360846 -21.96 <2e-16 ***
## N.G9 -0.8312360 0.0559018 -14.87 <2e-16 ***
## N.T9 -0.7112563 0.0255158 -27.88 <2e-16 ***
## N.C10 -0.7564857 0.0312502 -24.21 <2e-16 ***
## N.G10 -0.2177179 0.0003178 -685.00 <2e-16 ***
## N.T10 -0.5552904 0.0048680 -114.07 <2e-16 ***
## N.A11 -0.2762694 0.0004200 -657.79 <2e-16 ***
## N.G11 -0.2129582 0.0003259 -653.51 <2e-16 ***
## N.T11 -0.0671864 0.0001203 -558.57 <2e-16 ***
## N.A12 -0.1075403 0.0001773 -606.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 9228002 on 999407 degrees of freedom
## Residual deviance: 4816102 on 999373 degrees of freedom
## AIC: 6256139
##
## Number of Fisher Scoring iterations: 14
ModelTest = addNewBetas(ModelTest, data, fit)
## No shape parameters included in fit.
# # Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.001
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01925879 0.00000000 0.0000000 0.00000000 0.00000000 -0.6893494
## N.C 0.00000000 -0.10432409 -0.1109991 -0.17000521 -0.33865506 -0.4466473
## N.G 0.01485186 -0.02072592 -0.1638791 -0.08445988 -0.08442138 0.0000000
## N.T -0.02444075 -0.08342060 -0.1259903 -0.10872565 -0.29389389 -0.3304381
## 7 8 9 10 11 12
## N.A 0.005084271 0.0000000 -0.7925570 0.0000000 -0.27626944 -0.1075403
## N.C -0.164156015 -0.4993633 0.0000000 -0.7564857 0.00000000 0.0000000
## N.G -0.203775948 -0.6588331 -0.8312360 -0.2177179 -0.21295823 0.0000000
## N.T 0.000000000 -0.7877859 -0.7112563 -0.5552904 -0.06718637 -0.1075403
## 13 14 15 16 17 18
## N.A -0.06718637 -0.5552904 -0.7112563 -0.7877859 0.000000000 -0.3304381
## N.C -0.21295823 -0.2177179 -0.8312360 -0.6588331 -0.203775948 0.0000000
## N.G 0.00000000 -0.7564857 0.0000000 -0.4993633 -0.164156015 -0.4466473
## N.T -0.27626944 0.0000000 -0.7925570 0.0000000 0.005084271 -0.6893494
## 19 20 21 22 23
## N.A -0.29389389 -0.10872565 -0.1259903 -0.08342060 -0.02444075
## N.C -0.08442138 -0.08445988 -0.1638791 -0.02072592 0.01485186
## N.G -0.33865506 -0.17000521 -0.1109991 -0.10432409 0.00000000
## N.T 0.00000000 0.00000000 0.0000000 0.00000000 0.01925879
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536019 0.0001619317 0.0002408214 0.0010514095
## N.G 0.0001700521 0.0001443498 0.0002355738 0.0001557746 0.0001300052
## N.T 0.0001665123 0.0001644487 0.0001638487 0.0001538630 0.0006319266
## 6 7 8 9 10
## N.A 0.024514803 0.0001104562 0.00000000 0.03608458 0.0000000000
## N.C 0.015145882 0.0002946278 0.01494083 0.00000000 0.0312502390
## N.G 0.000000000 0.0003963599 0.01641371 0.05590184 0.0003178364
## N.T 0.001157234 0.0000000000 0.02451494 0.02551582 0.0048679542
## 11 12 13 14 15
## N.A 0.0004199947 0.0001773169 0.0001202824 0.0048679542 0.02551582
## N.C 0.0000000000 0.0000000000 0.0003258693 0.0003178364 0.05590184
## N.G 0.0003258693 0.0000000000 0.0000000000 0.0312502390 0.00000000
## N.T 0.0001202824 0.0001773169 0.0004199947 0.0000000000 0.03608458
## 16 17 18 19 20
## N.A 0.02451494 0.0000000000 0.001157234 0.0006319266 0.0001538630
## N.C 0.01641371 0.0003963599 0.000000000 0.0001300052 0.0001557746
## N.G 0.01494083 0.0002946278 0.015145882 0.0010514095 0.0002408214
## N.T 0.00000000 0.0001104562 0.024514803 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001638487 0.0001644487 0.0001665123
## N.C 0.0002355738 0.0001443498 0.0001700521
## N.G 0.0001619317 0.0001536019 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59556
##
## Intercept beta errors:
## Round.8:
## [1] 0.002361059
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
vPheight = verticalPlot_height(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)
data = data.probeCounts[sampleD,]
#data = data.probeCounts
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
if (nrow(data) > 0) {
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print ("Stability Reached")
}
}
## No shape parameters included in fit.
for (i in 2:20) {
if (nrow(data) == 0) {
break
} else if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
break
}
data.nrow = nrow(data)
print (paste("i =",i))
designMatrixSummary = designMatrixSummary.v2
print("Round summary: ")
print (designMatrixSummary$Round)
print("Mono-nucleotide summary: ")
print (designMatrixSummary$N)
print("View/strand orientation summary: ")
print (designMatrixSummary$Intercept)
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
print (regressionFormula)
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
ModelTest = addNewBetas(ModelTest, data, fit)
# # Nucleotide Features after first round of fitting
summary(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
if (nrow(data) > 0) {
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print (paste("Stability Reached after ", i, " iterations.", sep = ""))
break
}
} else {
print (paste("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = ""))
}
}
## [1] "i = 2"
## [1] "Round summary: "
## 8 Total
## Round 999476 999476
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 582980 638505 349412 428055
## 2 720349 470918 371857 435828
## 3 910585 499202 170812 418353
## 4 927972 254931 406970 409079
## 5 1290011 18597 647845 42499
## 6 678 273 1982394 15607
## 7 843159 104641 93483 957669
## 8 1996470 462 1122 898
## 9 659 1997291 386 616
## 10 1879825 696 115452 2979
## 11 80179 1115106 111986 691681
## 12 343728 655748 0 0
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5190 64687 97023 70068 107943 129688 140886 154361 110715
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56887 62028 999476
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.001
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01925408 0.00000000 0.0000000 0.0000000 0.00000000 -0.5504117
## N.C 0.00000000 -0.10431502 -0.1109867 -0.1700088 -0.33848663 -0.3971744
## N.G 0.01485390 -0.02072453 -0.1638730 -0.0844513 -0.08441401 0.0000000
## N.T -0.02443604 -0.08341628 -0.1259747 -0.1087101 -0.29385391 -0.3303312
## 7 8 9 10 11 12
## N.A 0.005089661 0.0000000 -0.7418528 0.0000000 -0.27627363 -0.1075385
## N.C -0.164141284 -0.4924021 0.0000000 -0.7645303 0.00000000 0.0000000
## N.G -0.203722709 -0.6186135 -0.8312186 -0.2176745 -0.21289589 0.0000000
## N.T 0.000000000 -0.7698694 -0.9352052 -0.5497032 -0.06717789 -0.1075385
## 13 14 15 16 17 18
## N.A -0.06717789 -0.5497032 -0.9352052 -0.7698694 0.000000000 -0.3303312
## N.C -0.21289589 -0.2176745 -0.8312186 -0.6186135 -0.203722709 0.0000000
## N.G 0.00000000 -0.7645303 0.0000000 -0.4924021 -0.164141284 -0.3971744
## N.T -0.27627363 0.0000000 -0.7418528 0.0000000 0.005089661 -0.5504117
## 19 20 21 22 23
## N.A -0.29385391 -0.1087101 -0.1259747 -0.08341628 -0.02443604
## N.C -0.08441401 -0.0844513 -0.1638730 -0.02072453 0.01485390
## N.G -0.33848663 -0.1700088 -0.1109867 -0.10431502 0.00000000
## N.T 0.00000000 0.0000000 0.0000000 0.00000000 0.01925408
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536015 0.0001619310 0.0002408317 0.0010508009
## N.G 0.0001700509 0.0001443480 0.0002355790 0.0001557749 0.0001300053
## N.T 0.0001665104 0.0001644494 0.0001638463 0.0001538592 0.0006318898
## 6 7 8 9 10
## N.A 0.014064097 0.0001104559 0.00000000 0.02946302 0.0000000000
## N.C 0.012433240 0.0002946282 0.01453144 0.00000000 0.0322750926
## N.G 0.000000000 0.0003963031 0.01397591 0.05590184 0.0003178047
## N.T 0.001156847 0.0000000000 0.02282222 0.06250012 0.0047610570
## 11 12 13 14 15
## N.A 0.0004200284 0.0001773176 0.0001202827 0.0047610570 0.06250012
## N.C 0.0000000000 0.0000000000 0.0003258143 0.0003178047 0.05590184
## N.G 0.0003258143 0.0000000000 0.0000000000 0.0322750926 0.00000000
## N.T 0.0001202827 0.0001773176 0.0004200284 0.0000000000 0.02946302
## 16 17 18 19 20
## N.A 0.02282222 0.0000000000 0.001156847 0.0006318898 0.0001538592
## N.C 0.01397591 0.0003963031 0.000000000 0.0001300053 0.0001557749
## N.G 0.01453144 0.0002946282 0.012433240 0.0010508009 0.0002408317
## N.T 0.00000000 0.0001104559 0.014064097 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001638463 0.0001644494 0.0001665104
## N.C 0.0002355790 0.0001443480 0.0001700509
## N.G 0.0001619310 0.0001536015 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59516
##
## Intercept beta errors:
## Round.8:
## [1] 0.002361106
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999449"
## No shape parameters included in fit.
## [1] "i = 3"
## [1] "Round summary: "
## 8 Total
## Round 999449 999449
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 582969 638483 349403 428043
## 2 720329 470904 371849 435816
## 3 910568 499189 170803 418338
## 4 927959 254916 406958 409065
## 5 1289992 18587 647831 42488
## 6 676 265 1982364 15593
## 7 843147 104633 93476 957642
## 8 1996438 448 1117 895
## 9 653 1997261 386 598
## 10 1879804 689 115440 2965
## 11 80169 1115092 111975 691662
## 12 343708 655741 0 0
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5189 64685 97021 70066 107940 129683 140886 154356 110711
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56886 62026 999449
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.001
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01925443 0.00000000 0.0000000 0.0000000 0.0000000 -0.5504135
## N.C 0.00000000 -0.10431586 -0.1109880 -0.1700099 -0.3385145 -0.3971760
## N.G 0.01485432 -0.02072441 -0.1638745 -0.0844519 -0.0844145 0.0000000
## N.T -0.02443639 -0.08341673 -0.1259763 -0.1087109 -0.2938555 -0.3303648
## 7 8 9 10 11 12
## N.A 0.005089645 0.0000000 -0.7418546 0.0000000 -0.27627623 -0.1075391
## N.C -0.164142794 -0.4924032 0.0000000 -0.7924252 0.00000000 0.0000000
## N.G -0.203723861 -0.6250269 -0.8312205 -0.2176788 -0.21289699 0.0000000
## N.T 0.000000000 -0.7698718 -0.9711674 -0.5497056 -0.06717801 -0.1075391
## 13 14 15 16 17 18
## N.A -0.06717801 -0.5497056 -0.9711674 -0.7698718 0.000000000 -0.3303648
## N.C -0.21289699 -0.2176788 -0.8312205 -0.6250269 -0.203723861 0.0000000
## N.G 0.00000000 -0.7924252 0.0000000 -0.4924032 -0.164142794 -0.3971760
## N.T -0.27627623 0.0000000 -0.7418546 0.0000000 0.005089645 -0.5504135
## 19 20 21 22 23
## N.A -0.2938555 -0.1087109 -0.1259763 -0.08341673 -0.02443639
## N.C -0.0844145 -0.0844519 -0.1638745 -0.02072441 0.01485432
## N.G -0.3385145 -0.1700099 -0.1109880 -0.10431586 0.00000000
## N.T 0.0000000 0.0000000 0.0000000 0.00000000 0.01925443
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536016 0.0001619311 0.0002408320 0.0010509082
## N.G 0.0001700510 0.0001443481 0.0002355793 0.0001557749 0.0001300053
## N.T 0.0001665107 0.0001644495 0.0001638466 0.0001538593 0.0006318897
## 6 7 8 9 10
## N.A 0.014064097 0.0001104559 0.00000000 0.02946302 0.0000000000
## N.C 0.012425748 0.0002946289 0.01453144 0.00000000 0.0360845988
## N.G 0.000000000 0.0003963031 0.01433895 0.05590184 0.0003178081
## N.T 0.001156992 0.0000000000 0.02282222 0.07216889 0.0047610570
## 11 12 13 14 15
## N.A 0.0004200304 0.0001773177 0.0001202827 0.0047610570 0.07216889
## N.C 0.0000000000 0.0000000000 0.0003258142 0.0003178081 0.05590184
## N.G 0.0003258142 0.0000000000 0.0000000000 0.0360845988 0.00000000
## N.T 0.0001202827 0.0001773177 0.0004200304 0.0000000000 0.02946302
## 16 17 18 19 20
## N.A 0.02282222 0.0000000000 0.001156992 0.0006318897 0.0001538593
## N.C 0.01433895 0.0003963031 0.000000000 0.0001300053 0.0001557749
## N.G 0.01453144 0.0002946289 0.012425748 0.0010509082 0.0002408320
## N.T 0.00000000 0.0001104559 0.014064097 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001638466 0.0001644495 0.0001665107
## N.C 0.0002355793 0.0001443481 0.0001700510
## N.G 0.0001619311 0.0001536016 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59519
##
## Intercept beta errors:
## Round.8:
## [1] 0.002361103
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999448"
## No shape parameters included in fit.
## [1] "i = 4"
## [1] "Round summary: "
## 8 Total
## Round 999448 999448
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 582969 638482 349403 428042
## 2 720329 470904 371849 435814
## 3 910568 499188 170803 418337
## 4 927958 254916 406958 409064
## 5 1289992 18587 647829 42488
## 6 676 265 1982363 15592
## 7 843147 104633 93476 957640
## 8 1996437 448 1116 895
## 9 653 1997260 386 597
## 10 1879804 688 115440 2964
## 11 80169 1115092 111975 691660
## 12 343707 655741 0 0
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5189 64685 97021 70066 107939 129683 140886 154356 110711
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56886 62026 999448
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.001
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01925443 0.00000000 0.0000000 0.0000000 0.0000000 -0.5504135
## N.C 0.00000000 -0.10431586 -0.1109880 -0.1700099 -0.3385145 -0.3971760
## N.G 0.01485432 -0.02072441 -0.1638745 -0.0844519 -0.0844145 0.0000000
## N.T -0.02443639 -0.08341673 -0.1259763 -0.1087109 -0.2938555 -0.3303648
## 7 8 9 10 11 12
## N.A 0.005089645 0.0000000 -0.7418546 0.0000000 -0.27627623 -0.1075391
## N.C -0.164142794 -0.4924032 0.0000000 -0.7924252 0.00000000 0.0000000
## N.G -0.203723861 -0.6250269 -0.8312205 -0.2176788 -0.21289699 0.0000000
## N.T 0.000000000 -0.7698718 -0.9711674 -0.5497056 -0.06717801 -0.1075391
## 13 14 15 16 17 18
## N.A -0.06717801 -0.5497056 -0.9711674 -0.7698718 0.000000000 -0.3303648
## N.C -0.21289699 -0.2176788 -0.8312205 -0.6250269 -0.203723861 0.0000000
## N.G 0.00000000 -0.7924252 0.0000000 -0.4924032 -0.164142794 -0.3971760
## N.T -0.27627623 0.0000000 -0.7418546 0.0000000 0.005089645 -0.5504135
## 19 20 21 22 23
## N.A -0.2938555 -0.1087109 -0.1259763 -0.08341673 -0.02443639
## N.C -0.0844145 -0.0844519 -0.1638745 -0.02072441 0.01485432
## N.G -0.3385145 -0.1700099 -0.1109880 -0.10431586 0.00000000
## N.T 0.0000000 0.0000000 0.0000000 0.00000000 0.01925443
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536016 0.0001619311 0.0002408320 0.0010509082
## N.G 0.0001700510 0.0001443481 0.0002355793 0.0001557749 0.0001300053
## N.T 0.0001665107 0.0001644495 0.0001638466 0.0001538593 0.0006318897
## 6 7 8 9 10
## N.A 0.014064097 0.0001104559 0.00000000 0.02946302 0.0000000000
## N.C 0.012425442 0.0002946289 0.01453144 0.00000000 0.0360845988
## N.G 0.000000000 0.0003963031 0.01433895 0.05590184 0.0003178081
## N.T 0.001156992 0.0000000000 0.02282222 0.07216889 0.0047610570
## 11 12 13 14 15
## N.A 0.0004200304 0.0001773177 0.0001202827 0.0047610570 0.07216889
## N.C 0.0000000000 0.0000000000 0.0003258142 0.0003178081 0.05590184
## N.G 0.0003258142 0.0000000000 0.0000000000 0.0360845988 0.00000000
## N.T 0.0001202827 0.0001773177 0.0004200304 0.0000000000 0.02946302
## 16 17 18 19 20
## N.A 0.02282222 0.0000000000 0.001156992 0.0006318897 0.0001538593
## N.C 0.01433895 0.0003963031 0.000000000 0.0001300053 0.0001557749
## N.G 0.01453144 0.0002946289 0.012425442 0.0010509082 0.0002408320
## N.T 0.00000000 0.0001104559 0.014064097 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001638466 0.0001644495 0.0001665107
## N.C 0.0002355793 0.0001443481 0.0001700510
## N.G 0.0001619311 0.0001536016 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59519
##
## Intercept beta errors:
## Round.8:
## [1] 0.002361103
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999448"
## No shape parameters included in fit.
## [1] "Stability Reached after 4 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))