options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)
### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
#rawdataDir = "/Users/gabriella/Columbia/rawdata/Pufall/"
processedDataDir = "/Users/gabriella/Columbia/SplitFastqData/Pufall/ConcatFiles/"
# CLUSTER VERSIONS ARE COMMENTED OUT
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Pufall"
#processedDataDir = "/vega/hblab/users/gdm2120/SplitFastqData/Pufall/"
##################################################################
saveDir = "gabriella/SelexGLMtest/MultiRoundSymmetry"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)
shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
"HelTB", "RollA", "RollB")
selex.defineSample('r0.Pufall',
paste(processedDataDir, "/Demultiplexed.R0.fastq.gz", sep = ""),
'r0',
0, 23, '', 'TGGAA')
selex.defineSample('AR.R8',
paste(processedDataDir,"/AR.R8.fastq.gz",sep = ""),
'AR-DBD',
8, 23, '', 'TGGAA')
selex.defineSample('AR.R7',
paste(processedDataDir,"/AR.R7.fastq.gz",sep = ""),
'AR-DBD',
7, 23, '', 'TGGAA')
r0 = selex.sample(seqName = 'r0.Pufall', sampleName='r0', round = 0)
r0.split = selex.split(r0)
r0.train = r0.split$train
r0.test = r0.split$test
dataSample = selex.sample(seqName = 'AR.R8', sampleName = 'AR-DBD', round = 8)
dataSample.R7 = selex.sample(seqName = 'AR.R7', sampleName = 'AR-DBD', round = 7)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]
libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
kLen = 15
#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
#data.probeCounts.R7 = getProbeCounts(dataSample.R7, markovModel = mm)
#save(data.probeCounts.R7, file = paste(selexDir, saveDir, "/data.probeCounts.R7.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.R7.RData", sep = ""))
# Inputs about library are data specific
ModelTest = model(name = "AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.)",
varRegLen = libLen,
leftFixedSeq = "GTTCAGAGTTCTACAGTCCGACGATC",
rightFixedSeq ="TGGAATTCTCGGGTGCCAAGG",
consensusSeq = "RGWACANNNTGTWCY",
affinityType = "AffinitySym",
leftFixedSeqOverlap = 5,
minAffinity = 0.01,
missingValueSuppression = .5,
minSeedValue = .01,
upFootprintExtend = 4,
confidenceLevel = .99,
rounds = list(c(7, 8)),
rcSymmetric = TRUE,
verbose = FALSE)
getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
##
## seedLen: 15
## upFootprintExtend: 4
## downFootprintExtend: 4
## rcSymmetric: TRUE
##
## Slot "N":
## N.upFootprintExtend: 4
## N.downFootprintExtend: 4
## N.set: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## Number of previous iterations: 0
##
## Slot "Intercept":
## Number of Views per Strand of DNA: 11
## Number of Rounds: 2 (7, 8)
## Number of previous iterations: 0
##
## Slot "Shape":
## "ShapeParamsUsed": NONE
# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)
# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
## 1 2 3 4 5 6 7 8 9
## N.A 0 0 0 0 0.00000000 -1.2968295 -0.03073087 0.000000 -1.296829
## N.C 0 0 0 0 -0.60728754 -1.2968295 -0.25628921 -1.296829 0.000000
## N.G 0 0 0 0 -0.09864725 0.0000000 -0.34036727 -1.296829 -1.296829
## N.T 0 0 0 0 -0.42644057 -0.5591611 0.00000000 -1.296829 -1.296829
## 10 11 12 13 14 15
## N.A 0.0000000 -0.40799975 -0.1359377 -0.09020211 -0.7968295 -1.296829
## N.C -1.2968295 0.00000000 0.0000000 -0.36546623 -0.3957275 -1.296829
## N.G -0.3957275 -0.36546623 0.0000000 0.00000000 -1.2968295 0.000000
## N.T -0.7968295 -0.09020211 -0.1359377 -0.40799975 0.0000000 -1.296829
## 16 17 18 19 20 21 22 23
## N.A -1.296829 0.00000000 -0.5591611 -0.42644057 0 0 0 0
## N.C -1.296829 -0.34036727 0.0000000 -0.09864725 0 0 0 0
## N.G -1.296829 -0.25628921 -1.2968295 -0.60728754 0 0 0 0
## N.T 0.000000 -0.03073087 -1.2968295 0.00000000 0 0 0 0
plot(ModelTest@features@N, Ntitle = "AR-DBD R7+R8 Nucleotides\nSeeding Model", ddG = TRUE)
Next we score the probes using topModelMatch:
sample1 = sample(nrow(data.probeCounts), 500000)
sample2 = sample(nrow(data.probeCounts.R7), 500000)
data = rbind(data.probeCounts[sample1,], data.probeCounts.R7[sample2,])
#data = rbind(data.probeCounts, data.probeCounts.R7)
data = topModelMatch(data, ModelTest)
# Uses aligned probes to build design matrix
data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
## 7 8 Total
## Round 499619 499688 999307
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5870 66649 97393 70583 105501 125963 137615 151438 111168
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 61109 66018 999307
## Strand.R 0 0 0
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
## N.A N.C N.G N.T
## 1 576398 634475 349079 438662
## 2 703895 481713 369005 444001
## 3 876441 517952 179493 424728
## 4 901539 270124 413438 413513
## 5 1271507 21294 657238 48575
## 6 802 216 1979084 18512
## 7 839804 110351 104587 943872
## 8 1996170 403 1088 953
## 9 547 1997295 294 478
## 10 1867027 607 127164 3816
## 11 91669 1087326 122362 697257
## 12 357518 641789 0 0
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
##
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"),
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -19.651 -1.319 -0.681 0.273 54.248
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 36.4989061 0.0028393 12854.80 <2e-16 ***
## Round.7 -0.7013046 0.0014420 -486.34 <2e-16 ***
## N.A1 0.0173411 0.0001655 104.80 <2e-16 ***
## N.G1 0.0161845 0.0002021 80.09 <2e-16 ***
## N.T1 -0.0249243 0.0001972 -126.38 <2e-16 ***
## N.C2 -0.1016295 0.0001817 -559.21 <2e-16 ***
## N.G2 -0.0193153 0.0001730 -111.67 <2e-16 ***
## N.T2 -0.0820297 0.0001946 -421.50 <2e-16 ***
## N.C3 -0.1051504 0.0001885 -557.80 <2e-16 ***
## N.G3 -0.1587209 0.0002731 -581.10 <2e-16 ***
## N.T3 -0.1222506 0.0001929 -633.83 <2e-16 ***
## N.C4 -0.1683776 0.0002791 -603.34 <2e-16 ***
## N.G4 -0.0802529 0.0001827 -439.25 <2e-16 ***
## N.T4 -0.1044913 0.0001813 -576.32 <2e-16 ***
## N.C5 -0.3377881 0.0011853 -284.98 <2e-16 ***
## N.G5 -0.0818587 0.0001529 -535.38 <2e-16 ***
## N.T5 -0.2913822 0.0007087 -411.16 <2e-16 ***
## N.A6 -0.7223916 0.0256040 -28.21 <2e-16 ***
## N.C6 -0.4753350 0.0200452 -23.71 <2e-16 ***
## N.T6 -0.3287272 0.0012741 -258.02 <2e-16 ***
## N.A7 0.0047030 0.0001317 35.70 <2e-16 ***
## N.C7 -0.1628126 0.0003444 -472.79 <2e-16 ***
## N.G7 -0.1977240 0.0004449 -444.45 <2e-16 ***
## N.C8 -0.4873711 0.0181716 -26.82 <2e-16 ***
## N.G8 -0.6823173 0.0185799 -36.72 <2e-16 ***
## N.T8 -0.7506992 0.0212071 -35.40 <2e-16 ***
## N.A9 -0.7311896 0.0349442 -20.92 <2e-16 ***
## N.G9 -0.7658653 0.0542937 -14.11 <2e-16 ***
## N.T9 -0.8394333 0.0492607 -17.04 <2e-16 ***
## N.C10 -0.8596817 0.0539825 -15.93 <2e-16 ***
## N.G10 -0.2127278 0.0003639 -584.64 <2e-16 ***
## N.T10 -0.5439339 0.0049749 -109.33 <2e-16 ***
## N.A11 -0.2710324 0.0004755 -570.04 <2e-16 ***
## N.G11 -0.2075728 0.0003739 -555.17 <2e-16 ***
## N.T11 -0.0671428 0.0001431 -469.20 <2e-16 ***
## N.A12 -0.1038748 0.0002078 -499.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 6879157 on 999306 degrees of freedom
## Residual deviance: 3613057 on 999271 degrees of freedom
## AIC: 4929596
##
## Number of Fisher Scoring iterations: 12
ModelTest = addNewBetas(ModelTest, data, fit)
## No shape parameters included in fit.
# # Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
##
## Slot "name": AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01734114 0.00000000 0.0000000 0.00000000 0.00000000 -0.7223916
## N.C 0.00000000 -0.10162951 -0.1051504 -0.16837758 -0.33778809 -0.4753350
## N.G 0.01618449 -0.01931531 -0.1587209 -0.08025293 -0.08185872 0.0000000
## N.T -0.02492432 -0.08202971 -0.1222506 -0.10449131 -0.29138223 -0.3287272
## 7 8 9 10 11 12
## N.A 0.004702985 0.0000000 -0.7311896 0.0000000 -0.27103242 -0.1038748
## N.C -0.162812600 -0.4873711 0.0000000 -0.8596817 0.00000000 0.0000000
## N.G -0.197723952 -0.6823173 -0.7658653 -0.2127278 -0.20757278 0.0000000
## N.T 0.000000000 -0.7506992 -0.8394333 -0.5439339 -0.06714284 -0.1038748
## 13 14 15 16 17 18
## N.A -0.06714284 -0.5439339 -0.8394333 -0.7506992 0.000000000 -0.3287272
## N.C -0.20757278 -0.2127278 -0.7658653 -0.6823173 -0.197723952 0.0000000
## N.G 0.00000000 -0.8596817 0.0000000 -0.4873711 -0.162812600 -0.4753350
## N.T -0.27103242 0.0000000 -0.7311896 0.0000000 0.004702985 -0.7223916
## 19 20 21 22 23
## N.A -0.29138223 -0.10449131 -0.1222506 -0.08202971 -0.02492432
## N.C -0.08185872 -0.08025293 -0.1587209 -0.01931531 0.01618449
## N.G -0.33778809 -0.16837758 -0.1051504 -0.10162951 0.00000000
## N.T 0.00000000 0.00000000 0.0000000 0.00000000 0.01734114
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001654693 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817386 0.0001885104 0.0002790747 0.0011853196
## N.G 0.0002020752 0.0001729761 0.0002731386 0.0001827061 0.0001528988
## N.T 0.0001972229 0.0001946156 0.0001928767 0.0001813086 0.0007086869
## 6 7 8 9 10
## N.A 0.025603970 0.0001317305 0.00000000 0.03494423 0.0000000000
## N.C 0.020045169 0.0003443641 0.01817157 0.00000000 0.0539825206
## N.G 0.000000000 0.0004448757 0.01857992 0.05429372 0.0003638615
## N.T 0.001274052 0.0000000000 0.02120711 0.04926069 0.0049749253
## 11 12 13 14 15
## N.A 0.0004754654 0.0002078089 0.0001431010 0.0049749253 0.04926069
## N.C 0.0000000000 0.0000000000 0.0003738882 0.0003638615 0.05429372
## N.G 0.0003738882 0.0000000000 0.0000000000 0.0539825206 0.00000000
## N.T 0.0001431010 0.0002078089 0.0004754654 0.0000000000 0.03494423
## 16 17 18 19 20
## N.A 0.02120711 0.0000000000 0.001274052 0.0007086869 0.0001813086
## N.C 0.01857992 0.0004448757 0.000000000 0.0001528988 0.0001827061
## N.G 0.01817157 0.0003443641 0.020045169 0.0011853196 0.0002790747
## N.T 0.00000000 0.0001317305 0.025603970 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001928767 0.0001946156 0.0001972229
## N.C 0.0002731386 0.0001729761 0.0002020752
## N.G 0.0001885104 0.0001817386 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654693
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.7976
##
## Round.8:
## [1] 36.49891
##
## Intercept beta errors:
## Round.7:
## [1] 0.003184519
##
## Round.8:
## [1] 0.002839322
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
vPheight = verticalPlot_height(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R7+R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)
data = rbind(data.probeCounts[sample1,], data.probeCounts.R7[sample2,])
#data = rbind(data.probeCounts, data.probeCounts.R7)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
if (nrow(data) > 0) {
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print ("Stability Reached")
}
}
## No shape parameters included in fit.
for (i in 2:20) {
if (nrow(data) == 0) {
break
} else if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
break
}
data.nrow = nrow(data)
print (paste("i =",i))
designMatrixSummary = designMatrixSummary.v2
print("Round summary: ")
print (designMatrixSummary$Round)
print("Mono-nucleotide summary: ")
print (designMatrixSummary$N)
print("View/strand orientation summary: ")
print (designMatrixSummary$Intercept)
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
print (regressionFormula)
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
ModelTest = addNewBetas(ModelTest, data, fit)
# # Nucleotide Features after first round of fitting
summary(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R7+R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
if (nrow(data) > 0) {
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print (paste("Stability Reached after ", i, " iterations.", sep = ""))
break
}
} else {
print (paste("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = ""))
}
}
## [1] "i = 2"
## [1] "Round summary: "
## 7 8 Total
## Round 499635 499741 999376
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 576408 634543 349127 438674
## 2 703956 481736 369035 444025
## 3 876487 517967 179511 424787
## 4 901582 270164 413471 413535
## 5 1271583 21323 657262 48584
## 6 852 278 1979080 18542
## 7 839886 110376 104604 943886
## 8 1996194 466 1111 981
## 9 572 1997339 346 495
## 10 1867072 617 127227 3836
## 11 91713 1087347 122392 697300
## 12 357539 641837 0 0
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5877 66679 97396 70588 105509 125969 137618 151451 111177
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 61102 66010 999376
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01733940 0.00000000 0.0000000 0.0000000 0.00000000 -0.7000605
## N.C 0.00000000 -0.10161808 -0.1051388 -0.1683537 -0.33758200 -0.4419699
## N.G 0.01618137 -0.01931137 -0.1587018 -0.0802435 -0.08184938 0.0000000
## N.T -0.02492349 -0.08202156 -0.1222410 -0.1044819 -0.29129306 -0.3284835
## 7 8 9 10 11 12
## N.A 0.004706272 0.0000000 -0.6223447 0.0000000 -0.27100578 -0.103863
## N.C -0.162796953 -0.4532656 0.0000000 -0.8818432 0.00000000 0.000000
## N.G -0.197689743 -0.6633602 -0.6414333 -0.2126870 -0.20754587 0.000000
## N.T 0.000000000 -0.7249655 -0.8072993 -0.5415725 -0.06713748 -0.103863
## 13 14 15 16 17 18
## N.A -0.06713748 -0.5415725 -0.8072993 -0.7249655 0.000000000 -0.3284835
## N.C -0.20754587 -0.2126870 -0.6414333 -0.6633602 -0.197689743 0.0000000
## N.G 0.00000000 -0.8818432 0.0000000 -0.4532656 -0.162796953 -0.4419699
## N.T -0.27100578 0.0000000 -0.6223447 0.0000000 0.004706272 -0.7000605
## 19 20 21 22 23
## N.A -0.29129306 -0.1044819 -0.1222410 -0.08202156 -0.02492349
## N.C -0.08184938 -0.0802435 -0.1587018 -0.01931137 0.01618137
## N.G -0.33758200 -0.1683537 -0.1051388 -0.10161808 0.00000000
## N.T 0.00000000 0.0000000 0.0000000 0.00000000 0.01733940
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001654685 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817383 0.0001885101 0.0002790682 0.0011845269
## N.G 0.0002020741 0.0001729749 0.0002731330 0.0001827066 0.0001528989
## N.T 0.0001972218 0.0001946152 0.0001928768 0.0001813078 0.0007085246
## 6 7 8 9 10
## N.A 0.023574641 0.0001317305 0.00000000 0.02290727 0.0000000000
## N.C 0.017767458 0.0003443633 0.01594197 0.00000000 0.0587644764
## N.G 0.000000000 0.0004448487 0.01728434 0.03352963 0.0003638329
## N.T 0.001273016 0.0000000000 0.01926555 0.04355738 0.0049314410
## 11 12 13 14 15
## N.A 0.0004754519 0.0002078065 0.0001431013 0.0049314410 0.04355738
## N.C 0.0000000000 0.0000000000 0.0003738744 0.0003638329 0.03352963
## N.G 0.0003738744 0.0000000000 0.0000000000 0.0587644764 0.00000000
## N.T 0.0001431013 0.0002078065 0.0004754519 0.0000000000 0.02290727
## 16 17 18 19 20
## N.A 0.01926555 0.0000000000 0.001273016 0.0007085246 0.0001813078
## N.C 0.01728434 0.0004448487 0.000000000 0.0001528989 0.0001827066
## N.G 0.01594197 0.0003443633 0.017767458 0.0011845269 0.0002790682
## N.T 0.00000000 0.0001317305 0.023574641 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001928768 0.0001946152 0.0001972218
## N.C 0.0002731330 0.0001729749 0.0002020741
## N.G 0.0001885101 0.0001817383 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654685
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.79718
##
## Round.8:
## [1] 36.49848
##
## Intercept beta errors:
## Round.7:
## [1] 0.003184563
##
## Round.8:
## [1] 0.002839375
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999347"
## No shape parameters included in fit.
## [1] "i = 3"
## [1] "Round summary: "
## 7 8 Total
## Round 499625 499722 999347
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 576389 634524 349119 438662
## 2 703936 481718 369024 444016
## 3 876468 517956 179497 424773
## 4 901561 270153 413458 413522
## 5 1271568 21312 657249 48565
## 6 845 274 1979043 18532
## 7 839871 110371 104590 943862
## 8 1996158 461 1103 972
## 9 565 1997298 346 485
## 10 1867044 608 127217 3825
## 11 91697 1087337 122379 697281
## 12 357517 641830 0 0
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5876 66676 97395 70583 105506 125966 137616 151445 111175
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 61099 66010 999347
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01734028 0.00000000 0.0000000 0.00000000 0.00000000 -0.7045849
## N.C 0.00000000 -0.10161956 -0.1051400 -0.16835561 -0.33758563 -0.4419733
## N.G 0.01618205 -0.01931169 -0.1587047 -0.08024499 -0.08185111 0.0000000
## N.T -0.02492325 -0.08202353 -0.1222430 -0.10448434 -0.29131199 -0.3285259
## 7 8 9 10 11 12
## N.A 0.004705558 0.0000000 -0.6356013 0.0000000 -0.27101045 -0.1038652
## N.C -0.162799443 -0.4553177 0.0000000 -0.8818467 0.00000000 0.0000000
## N.G -0.197698304 -0.6707316 -0.6414369 -0.2126927 -0.20754828 0.0000000
## N.T 0.000000000 -0.7279734 -0.8073039 -0.5421621 -0.06713838 -0.1038652
## 13 14 15 16 17 18
## N.A -0.06713838 -0.5421621 -0.8073039 -0.7279734 0.000000000 -0.3285259
## N.C -0.20754828 -0.2126927 -0.6414369 -0.6707316 -0.197698304 0.0000000
## N.G 0.00000000 -0.8818467 0.0000000 -0.4553177 -0.162799443 -0.4419733
## N.T -0.27101045 0.0000000 -0.6356013 0.0000000 0.004705558 -0.7045849
## 19 20 21 22 23
## N.A -0.29131199 -0.10448434 -0.1222430 -0.08202353 -0.02492325
## N.C -0.08185111 -0.08024499 -0.1587047 -0.01931169 0.01618205
## N.G -0.33758563 -0.16835561 -0.1051400 -0.10161956 0.00000000
## N.T 0.00000000 0.00000000 0.0000000 0.00000000 0.01734028
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001654687 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817383 0.0001885100 0.0002790681 0.0011845269
## N.G 0.0002020744 0.0001729750 0.0002731339 0.0001827066 0.0001528990
## N.T 0.0001972219 0.0001946156 0.0001928771 0.0001813084 0.0007085647
## 6 7 8 9 10
## N.A 0.023972589 0.0001317304 0.00000000 0.02411759 0.0000000000
## N.C 0.017760751 0.0003443633 0.01606798 0.00000000 0.0587644908
## N.G 0.000000000 0.0004448578 0.01777793 0.03352963 0.0003638365
## N.T 0.001273198 0.0000000000 0.01948298 0.04355739 0.0049423194
## 11 12 13 14 15
## N.A 0.0004754548 0.000207807 0.0001431013 0.0049423194 0.04355739
## N.C 0.0000000000 0.000000000 0.0003738744 0.0003638365 0.03352963
## N.G 0.0003738744 0.000000000 0.0000000000 0.0587644908 0.00000000
## N.T 0.0001431013 0.000207807 0.0004754548 0.0000000000 0.02411759
## 16 17 18 19 20
## N.A 0.01948298 0.0000000000 0.001273198 0.0007085647 0.0001813084
## N.C 0.01777793 0.0004448578 0.000000000 0.0001528990 0.0001827066
## N.G 0.01606798 0.0003443633 0.017760751 0.0011845269 0.0002790681
## N.T 0.00000000 0.0001317304 0.023972589 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001928771 0.0001946156 0.0001972219
## N.C 0.0002731339 0.0001729750 0.0002020744
## N.G 0.0001885100 0.0001817383 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654687
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.79725
##
## Round.8:
## [1] 36.49855
##
## Intercept beta errors:
## Round.7:
## [1] 0.003184557
##
## Round.8:
## [1] 0.002839369
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999346"
## No shape parameters included in fit.
## [1] "i = 4"
## [1] "Round summary: "
## 7 8 Total
## Round 499624 499722 999346
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 576388 634523 349119 438662
## 2 703935 481717 369024 444016
## 3 876468 517956 179496 424772
## 4 901560 270152 413458 413522
## 5 1271567 21312 657249 48564
## 6 845 274 1979042 18531
## 7 839871 110371 104589 943861
## 8 1996157 460 1103 972
## 9 564 1997297 346 485
## 10 1867043 608 127217 3824
## 11 91697 1087337 122378 697280
## 12 357516 641830 0 0
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5876 66676 97395 70583 105506 125966 137616 151444 111175
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 61099 66010 999346
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01734028 0.00000000 0.0000000 0.00000000 0.00000000 -0.7045849
## N.C 0.00000000 -0.10161956 -0.1051400 -0.16835561 -0.33758563 -0.4419733
## N.G 0.01618205 -0.01931169 -0.1587047 -0.08024499 -0.08185111 0.0000000
## N.T -0.02492325 -0.08202353 -0.1222430 -0.10448434 -0.29131199 -0.3285259
## 7 8 9 10 11 12
## N.A 0.004705558 0.0000000 -0.6356013 0.0000000 -0.27101045 -0.1038652
## N.C -0.162799443 -0.4553177 0.0000000 -0.8818467 0.00000000 0.0000000
## N.G -0.197698304 -0.6707316 -0.6414369 -0.2126927 -0.20754828 0.0000000
## N.T 0.000000000 -0.7279734 -0.8073039 -0.5421621 -0.06713838 -0.1038652
## 13 14 15 16 17 18
## N.A -0.06713838 -0.5421621 -0.8073039 -0.7279734 0.000000000 -0.3285259
## N.C -0.20754828 -0.2126927 -0.6414369 -0.6707316 -0.197698304 0.0000000
## N.G 0.00000000 -0.8818467 0.0000000 -0.4553177 -0.162799443 -0.4419733
## N.T -0.27101045 0.0000000 -0.6356013 0.0000000 0.004705558 -0.7045849
## 19 20 21 22 23
## N.A -0.29131199 -0.10448434 -0.1222430 -0.08202353 -0.02492325
## N.C -0.08185111 -0.08024499 -0.1587047 -0.01931169 0.01618205
## N.G -0.33758563 -0.16835561 -0.1051400 -0.10161956 0.00000000
## N.T 0.00000000 0.00000000 0.0000000 0.00000000 0.01734028
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0001654687 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817383 0.0001885100 0.0002790681 0.0011845269
## N.G 0.0002020744 0.0001729750 0.0002731339 0.0001827066 0.0001528990
## N.T 0.0001972219 0.0001946156 0.0001928771 0.0001813084 0.0007085647
## 6 7 8 9 10
## N.A 0.023972589 0.0001317304 0.00000000 0.02411759 0.0000000000
## N.C 0.017760510 0.0003443633 0.01606798 0.00000000 0.0587644908
## N.G 0.000000000 0.0004448578 0.01777793 0.03352963 0.0003638365
## N.T 0.001273198 0.0000000000 0.01948298 0.04355739 0.0049423194
## 11 12 13 14 15
## N.A 0.0004754548 0.000207807 0.0001431013 0.0049423194 0.04355739
## N.C 0.0000000000 0.000000000 0.0003738744 0.0003638365 0.03352963
## N.G 0.0003738744 0.000000000 0.0000000000 0.0587644908 0.00000000
## N.T 0.0001431013 0.000207807 0.0004754548 0.0000000000 0.02411759
## 16 17 18 19 20
## N.A 0.01948298 0.0000000000 0.001273198 0.0007085647 0.0001813084
## N.C 0.01777793 0.0004448578 0.000000000 0.0001528990 0.0001827066
## N.G 0.01606798 0.0003443633 0.017760510 0.0011845269 0.0002790681
## N.T 0.00000000 0.0001317304 0.023972589 0.0000000000 0.0000000000
## 21 22 23
## N.A 0.0001928771 0.0001946156 0.0001972219
## N.C 0.0002731339 0.0001729750 0.0002020744
## N.G 0.0001885100 0.0001817383 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654687
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.79725
##
## Round.8:
## [1] 36.49855
##
## Intercept beta errors:
## Round.7:
## [1] 0.003184557
##
## Round.8:
## [1] 0.002839369
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999346"
## No shape parameters included in fit.
## [1] "Stability Reached after 4 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R7+R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))