Drysdale selection method

Load permutations

perms_list <- lapply(dir('data', pattern = 'cca_perms-drysdale-chunk_.*rds', full.names = TRUE), readRDS)

#The optimal penalty for each permuted data set
penalties <- unlist(lapply(perms_list, function(permset){
  lapply(permset, function(aperm) unique(aperm$penalty))
}))
#Create a data frame with one row for each correlation for each permutation.
#Column `k` indexes the correlation.
null_ccs <- data.table::rbindlist(
  lapply(perms_list, function(permset){
    data.table::rbindlist(
      lapply(
        permset, 
        function(aperm) data.frame(cc = aperm$cca_cors, k = 1:length(aperm$cca_cors))))
  }))
#index the permutation iteration too, just in case we want to see what penalty was used.
null_ccs[, i := 1:.N, by = k]
null_ccs[, mean := mean(cc), by = k]

Load CCA of observed data

cca_obsv <- readRDS(file.path('data', 'cca-drysdale.rds'))
cca_penalty_used <- cca_obsv$cca.permute_obj$bestpenaltyx
cc <- data.frame(cc_obs = cca_obsv$cca_obj$cors, 
                 k = 1:length(cca_obsv$cca_obj$cors))
#To find the Z score we just see what proportion of null CCs are less than the
#observed CC and then use qnorm to turn that probability into a Z score. We can
#then get a two-sided p-value for the CC using pnorm (there are other ways
#too--we could just directly use the prop_null_lt_obs).
Z_table <- null_ccs[cc, on = 'k'][, list(prop_null_lt_obs = mean(cc < cc_obs),
                                         Z = qnorm(mean(cc < cc_obs)),
                                         `P(>|Z|)` = pnorm(abs(qnorm(mean(cc < cc_obs))), lower.tail = F)*2,
                                         cc_obs = unique(cc_obs)), by = k]

ggplot(data.frame(penalties = penalties), aes(x = penalties)) + 
  geom_histogram(bins = 19, fill = '#a7c6b8') + 
  geom_vline(xintercept = cca_penalty_used, color = '#2a5078') + 
  theme_minimal()

Distribution of penalties across permutations. Blue line indicates penalty used for unpermuted data

#https://www.instagram.com/p/B_fRZQzgsnc/
dens_plot(null_ccs = null_ccs, Z_table = Z_table)

Density of canonical correlations (CC) on permuted data for each of 10 canonical variates in order. Lines indicate CC of unpermuted data with Z scores for their location in the null density.

Drysdale 2nd selection method

Load permutations

perms_list <- lapply(dir('data', pattern = 'cca_perms-drysdale2-chunk_.*rds', full.names = TRUE), readRDS)

#The optimal penalty for each permuted data set
penalties <- unlist(lapply(perms_list, function(permset){
  lapply(permset, function(aperm) unique(aperm$penalty))
}))
#Create a data frame with one row for each correlation for each permutation.
#Column `k` indexes the correlation.
null_ccs <- data.table::rbindlist(
  lapply(perms_list, function(permset){
    data.table::rbindlist(
      lapply(
        permset, 
        function(aperm) data.frame(cc = aperm$cca_cors, k = 1:length(aperm$cca_cors))))
  }))
#index the permutation iteration too, just in case we want to see what penalty was used.
null_ccs[, i := 1:.N, by = k]
null_ccs[, mean := mean(cc), by = k]

Load CCA of observed data

cca_obsv <- readRDS(file.path('data', 'cca-drysdale2.rds'))
cca_penalty_used <- cca_obsv$cca.permute_obj$bestpenaltyx
cc <- data.frame(cc_obs = cca_obsv$cca_obj$cors, 
                 k = 1:length(cca_obsv$cca_obj$cors))
#To find the Z score we just see what proportion of null CCs are less than the
#observed CC and then use qnorm to turn that probability into a Z score. We can
#then get a two-sided p-value for the CC using pnorm (there are other ways
#too--we could just directly use the prop_null_lt_obs).
Z_table <- null_ccs[cc, on = 'k'][, list(prop_null_lt_obs = mean(cc < cc_obs),
                                         Z = qnorm(mean(cc < cc_obs)),
                                         `P(>|Z|)` = pnorm(abs(qnorm(mean(cc < cc_obs))), lower.tail = F)*2,
                                         cc_obs = unique(cc_obs)), by = k]

ggplot(data.frame(penalties = penalties), aes(x = penalties)) + 
  geom_histogram(bins = 19, fill = '#a7c6b8') + 
  geom_vline(xintercept = cca_penalty_used, color = '#2a5078') + 
  theme_minimal()

Distribution of penalties across permutations. Blue line indicates penalty used for unpermuted data

#https://www.instagram.com/p/B_fRZQzgsnc/
dens_plot(null_ccs = null_ccs, Z_table = Z_table)

Density of canonical correlations (CC) on permuted data for each of 10 canonical variates in order. Lines indicate CC of unpermuted data with Z scores for their location in the null density.

Drysdale 3nd selection method

This is the closest to an exact replication of Drysdale et al’s method (as reported by Dinga et al). We select 100 features from the resting state data that show the highest absolute maximum correlations with any of the threat/deprivation features, and 100 features from the threat/dep data that show the highest absolute max correlation with any of the resting state features. We then submit these features to a non-regularized CCA.

Load permutations

perms_list <- lapply(dir('data', pattern = 'cca_perms-drysdale3-noreg--chunk_.*rds', full.names = TRUE), readRDS)

#Create a data frame with one row for each correlation for each permutation.
#Column `k` indexes the correlation.
null_ccs <- data.table::rbindlist(
  lapply(perms_list, function(permset){
    data.table::rbindlist(
      lapply(
        permset, 
        function(aperm) data.frame(cc = aperm$cca_cors, k = 1:length(aperm$cca_cors))))
  }))
#index the permutation iteration too, just in case we want to see what penalty was used.
null_ccs[, i := 1:.N, by = k]
null_ccs[, mean := mean(cc), by = k]

Load CCA of observed data

cca_obsv <- readRDS(file.path('data', 'cca-drysdale3-noreg-.rds'))
cc <- data.frame(cc_obs = cca_obsv$cca_obj$cancor, 
                 k = 1:length(cca_obsv$cca_obj$cancor))
#To find the Z score we just see what proportion of null CCs are less than the
#observed CC and then use qnorm to turn that probability into a Z score. We can
#then get a two-sided p-value for the CC using pnorm (there are other ways
#too--we could just directly use the prop_null_lt_obs).
Z_table <- null_ccs[cc, on = 'k'][, list(prop_null_lt_obs = mean(cc < cc_obs),
                                         Z = qnorm(mean(cc < cc_obs)),
                                         `P(>|Z|)` = pnorm(abs(qnorm(mean(cc < cc_obs))), lower.tail = F)*2,
                                         cc_obs = unique(cc_obs)), by = k]

#https://www.instagram.com/p/B_fRZQzgsnc/
<<<<<<< HEAD
dens_plot(null_ccs = null_ccs[k <= 10], Z_table = Z_table[k <= 10], xlim = c(.9, 1), label_h = 30, text_offset = .0075)

======= dens_plot(null_ccs = null_ccs[k <= 10], Z_table = Z_table[k <= 10], xlim = c(.9, 1), label_h = 50, text_offset = .02, nudge_x = -.0025)

>>>>>>> ccbe708cce2725b09de1922ef0b2a28faf7a6601

Density of canonical correlations (CC) on permuted data for each of 10 canonical variates in order. Lines indicate CC of unpermuted data with Z scores for their location in the null density. X axis starts at r=.90.

library(data.table)
w_dt <- data.table::data.table(Wilks(cca_obsv$cca_obj))
Z_table_table <- copy(Z_table[, c('Z', 'P(>|Z|)', 'cc_obs')])
setnames(Z_table_table, c('cc_obs', 'Z'), c('CanR', 'Z_perm'))
knitr::kable(w_dt[Z_table_table, on = 'CanR'], caption = 'Wilk\'s test (F) and permutation Z score for observed canonical correlation')

Wilk’s test (F) and permutation Z score for observed canonical correlation
CanR	LR test stat	approx F	numDF	denDF	Pr(> F)	Z_perm	P(>\|Z\|)
0.9859025	0.0000000	1.0710227	10000	4654.2141	0.0032688	-0.4646250	0.6422
0.9814274	0.0000000	1.0334541	9801	4657.2062	0.0964924	-0.6638286	0.5068
0.9784669	0.0000000	1.0008461	9604	4659.1986	0.4878042	-0.2923749	0.7700
0.9733658	0.0000000	0.9704068	9409	4660.1912	0.8830995	-0.8344987	0.4040
0.9706689	0.0000000	0.9434681	9216	4660.1841	0.9892505	-0.3783104	0.7052
0.9661512	0.0000000	0.9175621	9025	4659.1772	0.9996504	-0.5837329	0.5594
0.9614896	0.0000000	0.8936659	8836	4657.1707	0.9999950	-0.7776082	0.4368
0.9592206	0.0000000	0.8714795	8649	4654.1644	1.0000000	-0.1180854	0.9060
0.9536070	0.0000000	0.8494983	8464	4650.1584	1.0000000	-0.5307385	0.5956
0.9500005	0.0000000	0.8292342	8281	4645.1528	1.0000000	-0.2528295	0.8004
0.9453873	0.0000000	0.8095576	8100	4639.1474	1.0000000	-0.2412001	0.8094
0.9404680	0.0000000	0.7907588	7921	4632.1424	1.0000000	-0.2678692	0.7888
0.9357113	0.0000000	0.7728069	7744	4624.1377	1.0000000	-0.2139321	0.8306
0.9311028	0.0000000	0.7555149	7569	4615.1334	1.0000000	-0.0760240	0.9394
0.9279590	0.0000000	0.7387303	7396	4605.1295	1.0000000	0.4504306	0.6524
0.9225776	0.0000000	0.7219292	7225	4594.1259	1.0000000	0.4341218	0.6642
0.9092683	0.0000000	0.7057113	7056	4582.1227	1.0000000	-1.3806064	0.1674
0.9020225	0.0000000	0.6919979	6889	4569.1200	1.0000000	-1.6625627	0.0964
0.8932996	0.0000000	0.6790445	6724	4555.1176	1.0000000	-2.2603425	0.0238
0.8912017	0.0000000	0.6670678	6561	4540.1157	1.0000000	-1.3279337	0.1842
0.8851038	0.0000000	0.6546150	6400	4524.1142	1.0000000	-1.3035121	0.1924
0.8763340	0.0000000	0.6424748	6241	4507.1133	1.0000000	-1.7346659	0.0828
0.8723941	0.0000000	0.6311035	6084	4489.1128	1.0000000	-1.1769887	0.2392
0.8654022	0.0000000	0.6195451	5929	4470.1128	1.0000000	-1.2170111	0.2236
0.8554756	0.0000000	0.6083183	5776	4450.1133	1.0000000	-1.7518477	0.0798
0.8494989	0.0000000	0.5978707	5625	4429.1144	1.0000000	-1.5156811	0.1296
0.8441465	0.0000000	0.5874939	5476	4407.1161	1.0000000	-1.2013899	0.2296
0.8338354	0.0000000	0.5770574	5329	4384.1183	1.0000000	-1.6777291	0.0934
0.8266227	0.0000000	0.5672959	5184	4360.1212	1.0000000	-1.5581403	0.1192
0.8195352	0.0000000	0.5576976	5041	4335.1248	1.0000000	-1.5062617	0.1320
0.8166160	0.0000000	0.5482136	4900	4309.1290	1.0000000	-0.6554159	0.5122
0.8062270	0.0000000	0.5382204	4761	4282.1339	1.0000000	-1.0043706	0.3152
0.8000342	0.0000000	0.5287456	4624	4254.1395	1.0000000	-0.7156621	0.4742
0.7897209	0.0000000	0.5191774	4489	4225.1459	1.0000000	-0.9797693	0.3272
0.7819267	0.0000000	0.5100410	4356	4195.1531	1.0000000	-0.9096635	0.3630
0.7793152	0.0000000	0.5009749	4225	4164.1612	1.0000000	-0.0313380	0.9750
0.7688372	0.0000000	0.4912802	4096	4132.1701	1.0000000	-0.2743704	0.7838
0.7605658	0.0000000	0.4819419	3969	4099.1800	1.0000000	-0.1919262	0.8478
0.7487849	0.0000001	0.4726517	3844	4065.1908	1.0000000	-0.5727706	0.5668
0.7415346	0.0000002	0.4638156	3721	4030.2026	1.0000000	-0.2881917	0.7732
0.7390130	0.0000005	0.4548586	3600	3994.2155	1.0000000	0.6122082	0.5404
0.7344750	0.0000010	0.4451767	3481	3957.2295	1.0000000	1.2657573	0.2056
0.7190208	0.0000022	0.4349700	3364	3919.2447	1.0000000	0.4808825	0.6306
0.7067002	0.0000046	0.4255331	3249	3880.2612	1.0000000	0.1421013	0.8870
0.6905255	0.0000092	0.4164616	3136	3840.2789	1.0000000	-0.6754341	0.4994
0.6797927	0.0000176	0.4081692	3025	3799.2980	1.0000000	-0.7544151	0.4506
0.6720859	0.0000327	0.4000260	2916	3757.3186	1.0000000	-0.4446122	0.6566
0.6566229	0.0000596	0.3916758	2809	3714.3406	1.0000000	-1.0634013	0.2876
0.6530188	0.0001048	0.3839498	2704	3670.3644	1.0000000	-0.2593049	0.7954
0.6369279	0.0001828	0.3755377	2601	3625.3898	1.0000000	-0.9226301	0.3562
0.6167556	0.0003075	0.3677723	2500	3579.4170	1.0000000	-1.9845012	0.0472
0.6111934	0.0004963	0.3610749	2401	3532.4462	1.0000000	-1.4030565	0.1606
0.6067723	0.0007922	0.3539090	2304	3484.4775	1.0000000	-0.6155372	0.5382
0.6016535	0.0012539	0.3461027	2209	3435.5109	1.0000000	0.0574333	0.9542
0.5909021	0.0019653	0.3376716	2116	3385.5466	1.0000000	0.0948928	0.9244
0.5724424	0.0030196	0.3291938	2025	3334.5849	1.0000000	-0.7059809	0.4802
0.5693515	0.0044914	0.3215070	1936	3282.6258	1.0000000	0.2265161	0.8208
0.5545710	0.0066456	0.3129068	1849	3229.6695	1.0000000	-0.1294520	0.8970
0.5495665	0.0095972	0.3046470	1764	3175.7162	1.0000000	0.5896878	0.5554
0.5305700	0.0137501	0.2956002	1681	3120.7663	1.0000000	-0.2172674	0.8280
0.5005048	0.0191373	0.2873238	1600	3064.8198	1.0000000	-2.1674573	0.0302
0.4951916	0.0255336	0.2810616	1521	3007.8771	1.0000000	-1.4487765	0.1474
0.4840004	0.0338290	0.2741413	1444	2949.9385	1.0000000	-1.3626273	0.1730
0.4749113	0.0441780	0.2671626	1369	2891.0043	1.0000000	-1.0295953	0.3032
0.4641850	0.0570437	0.2598598	1296	2831.0748	1.0000000	-0.8596174	0.3900
0.4542053	0.0727104	0.2523838	1225	2770.1506	1.0000000	-0.6182660	0.5364
0.4346087	0.0916097	0.2446110	1156	2708.2320	1.0000000	-1.3664429	0.1718
0.4232542	0.1129429	0.2376909	1089	2645.3195	1.0000000	-1.2154356	0.2242
0.4132981	0.1375917	0.2306574	1024	2581.4138	1.0000000	-0.9668881	0.3336
0.4057623	0.1659361	0.2233054	961	2516.5154	1.0000000	-0.4377052	0.6616
0.3833183	0.1986409	0.2152593	900	2450.6252	1.0000000	-1.4057445	0.1598
0.3661229	0.2328550	0.2084475	841	2383.7439	1.0000000	-1.8494005	0.0644
0.3613280	0.2688999	0.2022680	784	2315.8726	1.0000000	-1.0211149	0.3072
0.3434072	0.3092787	0.1950586	729	2247.0123	1.0000000	-1.5381989	0.1240
0.3372612	0.3506277	0.1885696	676	2177.1643	1.0000000	-0.8231899	0.4104
0.3209058	0.3956286	0.1811262	625	2106.3300	1.0000000	-1.1709970	0.2416
0.3162455	0.4410479	0.1741670	576	2034.5113	1.0000000	-0.3039054	0.7612
0.2890464	0.4900593	0.1658459	529	1961.7102	1.0000000	-1.7179814	0.0858
0.2867981	0.5347353	0.1597420	484	1887.9289	1.0000000	-0.5813568	0.5610
0.2846951	0.5826610	0.1518729	441	1813.1705	1.0000000	0.5516327	0.5812
0.2606538	0.6340517	0.1417681	400	1737.4382	1.0000000	-0.5284313	0.5972
0.2481691	0.6802695	0.1333625	361	1660.7364	1.0000000	-0.4294442	0.6676
0.2282877	0.7249156	0.1245816	324	1583.0700	1.0000000	-1.0907112	0.2754
0.2165012	0.7647719	0.1169293	289	1504.4453	1.0000000	-0.9066347	0.3646
0.2038433	0.8023818	0.1088389	256	1424.8702	1.0000000	-0.8182753	0.4132
0.1940846	0.8371678	0.1003996	225	1344.3546	1.0000000	-0.4059219	0.6848
0.1717208	0.8699374	0.0907479	196	1262.9114	1.0000000	-1.3255162	0.1850
0.1626738	0.8963695	0.0830159	169	1180.5575	1.0000000	-0.8210812	0.4116
0.1467252	0.9207347	0.0738807	144	1097.3152	1.0000000	-1.0629602	0.2878
0.1255751	0.9409927	0.0650567	121	1013.2154	1.0000000	-1.8956979	0.0580
0.1125801	0.9560690	0.0584313	100	928.3013	1.0000000	-1.8289974	0.0674
0.1037941	0.9683421	0.0519138	81	842.6357	1.0000000	-1.2696762	0.2042
0.0891739	0.9788878	0.0437994	64	756.3139	1.0000000	-1.3760699	0.1688
0.0714483	0.9867343	0.0359874	49	669.4872	1.0000000	-1.8384237	0.0660
0.0562528	0.9917973	0.0303729	36	582.4135	1.0000000	-2.0663017	0.0388
0.0505646	0.9949457	0.0270574	25	495.5750	1.0000000	-1.1058302	0.2688
0.0425487	0.9974961	0.0210380	16	410.0144	1.0000000	-0.3937034	0.6938
0.0237593	0.9993052	0.0104317	9	328.7051	1.0000000	-1.1249741	0.2606
0.0096061	0.9998696	0.0044326	4	272.0000	0.9999607	-1.3957161	0.1628
0.0061711	0.9999619	0.0052174	1	137.0000	0.9425229	0.2450729	0.8064

Full Selection via regularization

No pre-selection of features.

Load permutations

<<<<<<< HEAD

perms_list <- lapply(dir('data/old', pattern = 'cca_perms-nofeatsel-chunk_.*rds', full.names = TRUE), readRDS)
=======
perms_list <- lapply(dir('data', pattern = 'cca_perms-nofeatsel-chunk_.*rds', full.names = TRUE), readRDS)
>>>>>>> ccbe708cce2725b09de1922ef0b2a28faf7a6601

#The optimal penalty for each permuted data set
penalties <- unlist(lapply(perms_list, function(permset){
  lapply(permset, function(aperm) unique(aperm$penalty))
}))
#Create a data frame with one row for each correlation for each permutation.
#Column `k` indexes the correlation.
null_ccs <- data.table::rbindlist(
  lapply(perms_list, function(permset){
    data.table::rbindlist(
      lapply(
        permset, 
        function(aperm) data.frame(cc = aperm$cca_cors, k = 1:length(aperm$cca_cors))))
  }))
#index the permutation iteration too, just in case we want to see what penalty was used.
null_ccs[, i := 1:.N, by = k]
null_ccs[, mean := mean(cc), by = k]



Load CCA of observed data
<<<<<<< HEAD
cca_obsv <- readRDS(file.path('data/old', 'cca-nofeatsel.rds'))
=======
cca_obsv <- readRDS(file.path('data', 'cca-nofeatsel.rds'))
>>>>>>> ccbe708cce2725b09de1922ef0b2a28faf7a6601
cca_penalty_used <- cca_obsv$cca.permute_obj$bestpenaltyx
cc <- data.frame(cc_obs = cca_obsv$cca_obj$cors, 
                 k = 1:length(cca_obsv$cca_obj$cors))
#To find the Z score we just see what proportion of null CCs are less than the
#observed CC and then use qnorm to turn that probability into a Z score. We can
#then get a two-sided p-value for the CC using pnorm (there are other ways
#too--we could just directly use the prop_null_lt_obs).
Z_table <- null_ccs[cc, on = 'k'][, list(prop_null_lt_obs = mean(cc < cc_obs),
                                         Z = qnorm(mean(cc < cc_obs)),
                                         `P(>|Z|)` = pnorm(abs(qnorm(mean(cc < cc_obs))), lower.tail = F)*2,
                                         cc_obs = unique(cc_obs)), by = k]
ggplot(data.frame(penalties = penalties), aes(x = penalties)) + 
  geom_histogram(bins = 19, fill = '#a7c6b8') + 
  geom_vline(xintercept = cca_penalty_used, color = '#2a5078') + 
  theme_minimal()

<<<<<<< HEAD

=======

>>>>>>> ccbe708cce2725b09de1922ef0b2a28faf7a6601

Distribution of penalties across permutations. Blue line indicates penalty used for unpermuted data


#https://www.instagram.com/p/B_fRZQzgsnc/
dens_plot(null_ccs = null_ccs[k <= 10], Z_table = Z_table[k <=10])

<<<<<<< HEAD

=======

>>>>>>> ccbe708cce2725b09de1922ef0b2a28faf7a6601

Density of canonical correlations (CC) on permuted data for each of 10 canonical variates in order. Lines indicate CC of unpermuted data with Z scores for their location in the null density.



Xia selection method

Load permutations
perms_list <- lapply(dir('data', pattern = 'cca_perms-xia.*rds', full.names = TRUE), readRDS)

#The optimal penalty for each permuted data set
penalties <- unlist(lapply(perms_list, function(permset){
  lapply(permset, function(aperm) unique(aperm$penalty))
}))
#Create a data frame with one row for each correlation for each permutation.
#Column `k` indexes the correlation.
null_ccs <- data.table::rbindlist(
  lapply(perms_list, function(permset){
    data.table::rbindlist(
      lapply(
        permset, 
        function(aperm) data.frame(cc = aperm$cca_cors, k = 1:length(aperm$cca_cors))))
  }))
#index the permutation iteration too, just in case we want to see what penalty was used.
null_ccs[, i := 1:.N, by = k]
null_ccs[, mean := mean(cc), by = k]


Load CCA of observed data
cca_obsv <- readRDS(file.path('data', 'cca-xia.rds'))
cca_penalty_used <- cca_obsv$cca.permute_obj$bestpenaltyx
cc <- data.frame(cc_obs = cca_obsv$cca_obj$cors, 
                 k = 1:length(cca_obsv$cca_obj$cors))
#To find the Z score we just see what proportion of null CCs are less than the
#observed CC and then use qnorm to turn that probability into a Z score. We can
#then get a two-sided p-value for the CC using pnorm (there are other ways
#too--we could just directly use the prop_null_lt_obs).
Z_table <- null_ccs[cc, on = 'k'][, list(prop_null_lt_obs = mean(cc < cc_obs),
                                         Z = qnorm(mean(cc < cc_obs)),
                                         `P(>|Z|)` = pnorm(abs(qnorm(mean(cc < cc_obs))), lower.tail = F)*2,
                                         cc_obs = unique(cc_obs)), by = k]
ggplot(data.frame(penalties = penalties), aes(x = penalties)) + 
  geom_histogram(bins = 19, fill = '#a7c6b8') + 
  geom_vline(xintercept = cca_penalty_used, color = '#2a5078') + 
  theme_minimal()



Distribution of penalties across permutations. Blue line indicates penalty used for unpermuted data


dens_plot(null_ccs = null_ccs, Z_table = Z_table)



Density of canonical correlations (CC) on permuted data for each of 10 canonical variates in order. Lines indicate CC of unpermuted data with Z scores for their location in the null density.

CCA Results

Drysdale selection method

Load permutations

Load CCA of observed data

Drysdale 2nd selection method

Load permutations

Load CCA of observed data

Drysdale 3nd selection method

Load permutations

Load CCA of observed data

Full Selection via regularization

Load permutations

Load CCA of observed data

Xia selection method

Load permutations

Load CCA of observed data