## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # library(dowser) # # # load example AIRR tsv data # data(ExampleAirr) # # # trait value of interest # trait="biopsy" # # # Process example data using default settings # clones = formatClones(ExampleAirr, # traits=trait, num_fields="duplicate_count", minseq=3) # # # Column shows which biopsy the B cell was obtained from # print(table(ExampleAirr[[trait]])) # #Lung Nose # # 145 244 # # # Calculate number of tissues sampled in tree # tissue_types = unlist(lapply(clones$data, function(x) # length(unique(x@data[[trait]])))) # # # Filter to multi-type trees # clones = clones[tissue_types > 1,] # # # Build trees using maximum likelihood (can use alternative builds if desired) # trees = getTrees(clones, build="pml") # ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # # the location of the igphyml executable # # this is location in Docker image, will likely be different if you've set it up yourself # # note this is the location of the compiled executable file, not just the source folder # igphyml_location = "/usr/local/share/igphyml/src/igphyml" # # # build trees as before, but use IgPhyML to reconstruct the states of internal # # nodes using maximum parsimony # trees = getTrees(clones, build="pml", trait=trait, igphyml=igphyml_location) # # # show internal node (edge) predictions based on maximum parsimony # plotTrees(trees, tips=trait, nodes=TRUE, palette="Set1")[[1]] ## ----eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE---------------------- library(dowser) # Load data instead of running phylip data(BiopsyTrees) plotTrees(BiopsyTrees, tips="biopsy", nodes=TRUE, palette="Set1")[[1]] ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # # the location of the igphyml executable # # this is location in Docker image, will likely be different if you've set it up yourself # # note this is the location of the compiled executable file, not just the source folder # igphyml_location = "/usr/local/share/igphyml/src/igphyml" # # # calculate switches along trees compared to 100 random permutations # # this may take a while, and can be parallelized using nproc # switches = findSwitches(trees, permutations=100, trait=trait, # igphyml=igphyml_location, fixtrees=TRUE) # # # Perform PS test on switches # ps = testPS(switches$switches) # print(ps$means) # # A tibble: 1 x 6 # # RECON PERMUTE PLT DELTA STAT REPS # # # #1 8 8.6 0.4 -0.6 PS 100 # # # sp = testSP(switches$switches, alternative="greater") # print(sp$means) # # A tibble: 2 x 8 # # Groups: FROM [2] # # FROM TO RECON PERMUTE PGT DELTA STAT REPS # # # #1 Lung Nose 0.131 0.323 1 -0.192 SP 100 # #2 Nose Lung 0.869 0.677 0 0.192 SP 100 # ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # # calculate switches along bootstrap distribution of trees # # build using the 'pml' maximum likelihood option # # in a real analysis it's important to use at least 100 permutations # switches = findSwitches(trees, permutations=10, trait=trait, # igphyml=igphyml_location, fixtrees=FALSE, build="pml") # # sp = testSP(switches$switches, alternative="greater") # print(sp$means) # # A tibble: 2 x 8 # # Groups: FROM [2] # # FROM TO RECON PERMUTE PGT DELTA STAT REPS # # # #1 Lung Nose 0.168 0.358 1 -0.190 SP 10 # #2 Nose Lung 0.832 0.642 0.1 0.190 SP 10 # ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # sp = testSP(switches$switches, alternative="greater", permuteAll=TRUE) # print(sp$means) # # A tibble: 2 x 8 # # Groups: FROM [2] # # FROM TO RECON PERMUTE PGT DELTA STAT REPS # # # #1 Lung Nose 0.168 0.241 0.6 -0.0736 SP 10 # #2 Nose Lung 0.832 0.759 0.4 0.0736 SP 10 ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # # Downsample each tree to a tip-to-switch ratio of 10 instead of 20 # # this will reduce the false positive rate but also (likely) power # switches = findSwitches(trees, permutations=100, trait=trait, # igphyml=igphyml_location, fixtrees=TRUE, tip_switch=10) # # # didn't have much effect for this dataset # sp = testSP(switches$switches, alternative="greater") # print(sp$means) # # A tibble: 2 x 8 # # Groups: FROM [2] # # FROM TO RECON PERMUTE PGT DELTA STAT REPS # # # #1 Lung Nose 0.168 0.358 1 -0.190 SP 10 # #2 Nose Lung 0.832 0.642 0.1 0.190 SP 10 # ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # # the location of the igphyml executable # # this is location in Docker image, will likely be different if you've set it up yourself # # note this is the location of the compiled executable file, not just the source folder # igphyml_location = "/usr/local/share/igphyml/src/igphyml" # # # constant region column name # trait = "c_call" # # # Vector of human isotypes in the proper order. Isotype switching # # can only go from left to right (e.g. IGHM to IGHA1). One exception # # is IGHD, which can switch to IGHM. # isotypes = c("IGHM","IGHD","IGHG3","IGHG1","IGHA1","IGHG2", # "IGHG4","IGHE","IGHA2") # # # Process example data using default settings with "c_call" as a trait value # clones = formatClones(ExampleAirr, traits=trait, minseq=3) # # # Column shows which constant region associated with a BCR # print(table(ExampleAirr[[trait]])) # # IGHA1 IGHA2 IGHD IGHG1 IGHG2 IGHG3 IGHG4 IGHM # # 55 56 11 58 64 60 63 22 # # # Calculate number of istoypes sampled in each tree # isotype_counts = unlist(lapply(clones$data, function(x) # length(unique(x@data[[trait]])))) # # # make model file with irreversibility constraints # # Will prohibit switches from right to left in the "states" vector # # IGHD to IGHM switching listed as an exception, since this can occur # makeModelFile(file="isotype_model.txt", states=isotypes, # constraints="irrev", exceptions=c("IGHD,IGHM")) # # # Build trees and predict states at internal nodes using maximum parsimony # trees = getTrees(clones[isotype_counts > 1,], trait=trait, igphyml=igphyml_location, # modelfile="isotype_model.txt") # # # show internal node (edge) predictions based on maximum parsimony # plotTrees(trees, tips=trait, nodes=TRUE, palette="Paired", ambig="grey")[[1]] ## ----eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE---------------------- data(IsotypeTrees) plotTrees(IsotypeTrees, tips="c_call", nodes=TRUE, palette="Paired", ambig="grey")[[1]] ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # # Downsample each tree to a tip-to-switch ratio of 10 instead of 20 # # this will reduce the false positive rate but also (likely) power # switches = findSwitches(trees, permutations=100, trait=trait, # igphyml=igphyml_location, fixtrees=TRUE, tip_switch=10, # modelfile="isotype_model.txt") # # # didn't have much effect for this dataset # sp = testSP(switches$switches, alternative="greater") # print(sp$means,n=42) # # A tibble: 42 × 8 # # Groups: FROM [7] # # FROM TO RECON PERMUTE PGT DELTA STAT REPS # # # # 1 IGHA1 IGHA2 0.0972 0.0994 0.56 -0.00219 SP 100 # # 2 IGHA1 IGHD 0 0 1 0 SP 100 # # 3 IGHA1 IGHG1 0 0 1 0 SP 100 # # 4 IGHA1 IGHG2 0.00815 0.00833 0.58 -0.000184 SP 100 # # 5 IGHA1 IGHG3 0 0 1 0 SP 100 # # 6 IGHA1 IGHG4 0.00352 0.00409 0.59 -0.000575 SP 100 # # 7 IGHA2 IGHA1 0 0 1 0 SP 100 # # 8 IGHA2 IGHD 0 0 1 0 SP 100 # # 9 IGHA2 IGHG1 0 0 1 0 SP 100 # #10 IGHA2 IGHG2 0 0 1 0 SP 100 # #11 IGHA2 IGHG3 0 0 1 0 SP 100 # #12 IGHA2 IGHG4 0 0 1 0 SP 100 # #13 IGHD IGHA1 0 0 1 0 SP 100 # #14 IGHD IGHA2 0 0 1 0 SP 100 # #15 IGHD IGHG1 0.0144 0.0127 0.42 0.00173 SP 100 # #16 IGHD IGHG2 0.00926 0.00981 0.54 -0.000545 SP 100 # #17 IGHD IGHG3 0.0157 0.0113 0.3 0.00440 SP 100 # #18 IGHD IGHG4 0.00203 0.00271 0.58 -0.000681 SP 100 # #19 IGHG1 IGHA1 0.00632 0.00757 0.59 -0.00126 SP 100 # #20 IGHG1 IGHA2 0.00326 0.00419 0.51 -0.000932 SP 100 # #21 IGHG1 IGHD 0 0 1 0 SP 100 # #22 IGHG1 IGHG2 0.0417 0.0545 0.86 -0.0127 SP 100 # #23 IGHG1 IGHG3 0 0 1 0 SP 100 # #24 IGHG1 IGHG4 0.0451 0.0493 0.66 -0.00427 SP 100 # #25 IGHG2 IGHA1 0 0 1 0 SP 100 # #26 IGHG2 IGHA2 0.00542 0.00511 0.45 0.000316 SP 100 # #27 IGHG2 IGHD 0 0 1 0 SP 100 # #28 IGHG2 IGHG1 0 0 1 0 SP 100 # #29 IGHG2 IGHG3 0 0 1 0 SP 100 # #30 IGHG2 IGHG4 0.0715 0.0581 0.08 0.0133 SP 100 # #31 IGHG3 IGHA1 0.0716 0.0600 0.16 0.0116 SP 100 # #32 IGHG3 IGHA2 0.0508 0.0492 0.37 0.00154 SP 100 # #33 IGHG3 IGHD 0 0 1 0 SP 100 # #34 IGHG3 IGHG1 0.159 0.181 0.96 -0.0225 SP 100 # #35 IGHG3 IGHG2 0.228 0.203 0.05 0.0250 SP 100 # #36 IGHG3 IGHG4 0.161 0.175 0.8 -0.0137 SP 100 # #37 IGHG4 IGHA1 0 0 1 0 SP 100 # #38 IGHG4 IGHA2 0.00596 0.00437 0.27 0.00159 SP 100 # #39 IGHG4 IGHD 0 0 1 0 SP 100 # #40 IGHG4 IGHG1 0 0 1 0 SP 100 # #41 IGHG4 IGHG2 0 0 1 0 SP 100 # #42 IGHG4 IGHG3 0 0 1 0 SP 100 ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # sp = testSP(switches$switches, alternative="greater", to="IGHA2") # print(sp$means) # # A tibble: 8 × 8 # # Groups: FROM [8] # # FROM TO RECON PERMUTE PGT DELTA STAT REPS # # # #1 IGHA1 IGHA2 0.620 0.634 0.59 -0.0144 SP 100 # #2 IGHD IGHA2 0 0 1 0 SP 100 # #3 IGHE IGHA2 0 0 1 0 SP 100 # #4 IGHG1 IGHA2 0.0116 0.0292 0.85 -0.0176 SP 100 # #5 IGHG2 IGHA2 0.0371 0.0252 0.32 0.0119 SP 100 # #6 IGHG3 IGHA2 0.297 0.283 0.45 0.0144 SP 100 # #7 IGHG4 IGHA2 0.0346 0.0290 0.36 0.00563 SP 100 # #8 IGHM IGHA2 0 0 1 0 SP 100