#first, install and load the required R Packages.

library(compositions) 
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
## 
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
## 
##     anova, cor, cov, dist, var
## The following objects are masked from 'package:base':
## 
##     %*%, norm, scale, scale.default
library(car)
## Loading required package: carData
#Download the dataset from https://ijbnpa.biomedcentral.com/articles/10.1186/s12966-017-0521-z
##Fairclough SJ, Dumuid D, Taylor S, Curry W, McGrane B, Stratton G, Maher C, Olds T. Fitness, fatness and the reallocation of time between children's daily movement behaviours: an analysis of compositional data. Int J Behav Nutr Phys Act. 2017 May 10;14(1):64. doi: 10.1186/s12966-017-0521-z.

#it is Additional File 7. Open in Excel and save as a csv file. I renamed it "dataset_example.csv"

#Import the data to R, and give the dataset a name (I will call it "data1").

data1 <- read.csv("dataset_example.csv") #you can use this if you have the dataset saved in the working directory.
#otherwise it might be easier to search for you dataset manually, using the following command...
#data1 <- read.csv(file.choose(), header = TRUE)
names(data1) #tells you the names of the variables
##  [1] "Child.ID"                 "School"                  
##  [3] "Sex"                      "Decimal.Age..y."         
##  [5] "IMD.Decile"               "Height..cm."             
##  [7] "Mass..cm."                "BMI"                     
##  [9] "zBMI"                     "IOTF.grade"              
## [11] "Waist.Circumference..cm." "WHtR"                    
## [13] "Total.20m.Shuttles"       "Wear.time"               
## [15] "Sedentary"                "Light"                   
## [17] "Moderate"                 "Vigorous"                
## [19] "MVPA"                     "sleep"                   
## [21] "X24hours"
#there are some empty rows at the end of the dataset, so we get rid of them here
data1 <- data1[1:169,]  #only keep the first 169 rows...

#amalgamate MVPA to one variable, call it "mvpa"
data1$mvpa <- data1$Moderate + data1$Vigorous

#make activity vectors- this puts all the behaviours together (the cbind() function binds the component columns together)
data1$activity <- cbind.data.frame(data1$sleep, data1$Sedentary, data1$Light, data1$mvpa)
head(data1$activity) #this shows us some of the observations in our activity variable.
#check if there are any zeros. NEed to do this before making ilrs because you will get bad ilrs if you have zeros!
missingSummary(data1$activity)
##                  missingType
## variable          NMV BDL MAR MNAR  SZ Err
##   data1$sleep     169   0   0    0   0   0
##   data1$Sedentary 169   0   0    0   0   0
##   data1$Light     169   0   0    0   0   0
##   data1$mvpa      169   0   0    0   0   0
#look at the "BDL" Below Detection Limit column which lists the number of zeros. There are no zeros in this dataset.

#Now we can designate this variable of time-use behavoiurs as compositional- using the acomp(activity) tells R that "activity" is a composition. The acomp() function also expresses the composition in proportions which sum to 1.
#We call the composition "act.comp"
data1$act.comp <- acomp(data1$activity)
head(data1$act.comp)
##      [,1]        [,2]        [,3]        [,4]         
## [1,] "0.3355556" "0.3808333" "0.2680556" "0.015555556"
## [2,] "0.3692361" "0.3530556" "0.2494444" "0.028263889"
## [3,] "0.3940972" "0.3171528" "0.2636806" "0.025069444"
## [4,] "0.3979167" "0.3653472" "0.2288889" "0.007847222"
## [5,] "0.3916939" "0.3343288" "0.2702271" "0.003750260"
## [6,] "0.3796528" "0.3646528" "0.2331250" "0.022569444"
## attr(,"class")
## [1] "acomp"
mean(data1$act.comp) #this is the mean composition of the sample, expressed in proportions. 
## [1] "0.3809541" "0.3543493" "0.2463897" "0.0183069"
## attr(,"class")
## [1] "acomp"
clo(mean(data1$act.comp),total=1440) #this is the mean composition, adjusted to sum to 1440 min/day.
## [1] 548.57385 510.26305 354.80116  26.36194
#check covariates
data1$ses <- as.factor(data1$IMD.Decile) #tell R that this is a factor (might be better to treat as integer or continuous, but we'll just use factor in our example here)
#rename age variable to something more simple
data1$age <- data1$Decimal.Age..y. # a continuous variable
data1$sex <- factor(data1$Sex) # a factor (M/F)


#descriptive statistics
#center
(m=mean(data1$act.comp)) #because R knows that act.comp is a composition, it gives you the geometric mean, in proportions (i.e., the values will all add to 1). This is called the compositional mean. If we are interested in the mean values in minutes, we can re-adjust the composiitonal mean to make the parts altogether sum to 1440 minutes). This does not change the composition, because the ratios between parts remain the same. We are simply zooming in/out but keeping the aspect ratio constant.
## [1] "0.3809541" "0.3543493" "0.2463897" "0.0183069"
## attr(,"class")
## [1] "acomp"
round(clo(m, total=1440),digits=1)
## [1] 548.6 510.3 354.8  26.4
#variation matrix, rounded to make it nicer: gives us an idea of the spread of the compositional parts, pairwise.
#values closer to zero are more dependent on each other. Zero=exactly proportional.
#this replaces univariate standard deviation.
round(variation(data1$act.comp), digits=3)
##       [,1]  [,2]  [,3]  [,4]
## [1,] 0.000 0.033 0.037 0.355
## [2,] 0.033 0.000 0.094 0.474
## [3,] 0.037 0.094 0.000 0.272
## [4,] 0.355 0.474 0.272 0.000
#Regression models

#The complete composition (i.e., all components) cannot be used as an explanatory variables because there is singularity or perfect multi-collinearity. Because all parts sum to 1 (or 24 h or 1440 minutes) one of the parts is always completely explained by the sum of the other parts (it will equal 1 - sum of remaining parts (if in proportions))

#This is why we use isometric log ratios to represent the composition in the linear model. The isometric log ratios (ilr) are created using an orthonormal basis, therefore multi-collinearity is not an issue. The order of ratios and parts do not matter as the ratios are orthogonal rotations of each other.

#There are always 1 less ilr than there are parts in the composition (this is because one part in the composition can be always be explained by the remaining parts, therefore is superfluous, and the real dimension of compositional data is one less than the number of parts)

#the set of ilr coordinates contains all the relative information about the composition.

#the set of ilr coordinates are coordinates in real Euclidean space. This is why we can use the set of ilr coordinates to represent the composition in statistical models deisgned for read vectors in Euclidean space (e.g. regression models)



#first make ilr coordinates using the ilr() command that is default in the compositions package (call them "ilr.comp")
data1$ilr.comp <- ilr(data1$act.comp)
head(as.data.frame(data1$ilr.comp)) #this is what the ilr coordinates look like
#MODELS

#Explanatory compositions

#Now we use the ilr coordinates as explanatory variables in our linear model.
#zbmi is the dependent variable, with covariates of SES, sex and age
lm.bmi=lm(zBMI~ilr.comp+ses+sex+age, data=data1)
(sum.bmi=car::Anova(lm.bmi))
#the set of ilr coordinates (ilr.comp), i.e., the overall composition is a significant predictor of zBMI.
###I use the car::Anova() command because it adjusts each predictor for all the remaining covariates R has a different anova() command that
#does sequential adjustment (adjusts each predictor only for the previous variables in the model, so the first variable would be completely unadjusted)


#should check out the model fit
#model diagnostics
par(mfrow=c(2,2)) 
plot(lm.bmi) #model diagnostics look great
## Warning: not plotting observations with leverage one:
##   91
#Use the linear model to predict change in zBMI for change in composition.

##MODEL ISOTEMPORAL 1:1 reallocations, pairwise...

# Firstly, predict zBMI for the mean composition (here we use the composition in proportions, i.e., as proportions which sum to 1... it just makes it easier down the track)
# so the mean composition (in proportions) was labelled "m"

#we also need to find the mean values of the covariates so that these can be kept constant in the linear models. Then the only thing that changes is time reallocation.
#some covariates were factors or integers, so the "arithmetic mean" will not make sense. We will have to use the most common level, or round to the nearest whole number.

#to find average condition for integer covariate (SES). Must be a whole number to match the original data.
as.integer(as.character(data1$ses))
##   [1] 2 2 9 1 2 9 3 3 3 2 6 1 2 1 1 1 1 1 2 1 3 3 1 3 3 2 4 2 3 3 3 2 1 2 2 3 1
##  [38] 1 2 1 2 3 1 3 2 1 3 1 3 1 2 1 3 6 2 1 2 1 1 2 1 1 2 2 2 1 2 1 2 1 2 2 2 2
##  [75] 1 2 2 2 1 1 2 9 3 3 9 9 6 9 9 3 7 9 3 9 4 2 4 4 2 2 4 2 2 2 2 2 2 2 9 4 4
## [112] 2 4 5 2 3 4 2 2 2 2 2 4 2 2 5 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 2 2 1 1
## [149] 1 1 1 1 1 1 2 1 2 1 2 1 1 1 3 2 2 1 1 1 1
(m.ses<-mean(as.integer(as.character(data1$ses))))
## [1] 2.449704
as.character(round(m.ses,0))
## [1] "2"
m.ses=data1$ses[data1$ses==as.character(round(m.ses,0))][1] #round to nearest whole number
m.ses
## [1] 2
## Levels: 1 2 3 4 5 6 7 9
(m.ses<-data1$ses[data1$ses=="2"][1]) # here we designate the median "m.ses" to be "2".
## [1] 2
## Levels: 1 2 3 4 5 6 7 9
#to find average condition for factor coordinate (sex)
m.sex <- names(table(data1$sex))[which.max(table(data1$sex))] #average condition is "0"

m.age <- mean(data1$age) #this one is easy, because age is a continuous variable. Just find the arithmetic mean.
str(data1$ses)
##  Factor w/ 8 levels "1","2","3","4",..: 2 2 8 1 2 8 3 3 3 2 ...
str(m.ses)
##  Factor w/ 8 levels "1","2","3","4",..: 2
#Predictions from model....

#this was the model:
lm.bmi=lm(zBMI~ilr.comp+ses+sex+age, data=data1)
#since R and the composition packages have been updated the above model set-up no longer works for predict(). R does not like the ilr() variable
#a solution is to enter each ilr separately
#we can make separate ilr variables for the dataset
data1$ilr1= data1$ilr.comp[,1] #select the first column of the ilr set as ilr1
data1$ilr2= data1$ilr.comp[,2]
data1$ilr3= data1$ilr.comp[,3]
#redo the model (call it lm.bmi1)
lm.bmi1=lm(zBMI~ilr1+ ilr2 + ilr3+ses+sex+age, data=data1)
#compare with the previous model to see it's the same
car::Anova(lm.bmi)
car::Anova(lm.bmi1)#non-compositional terms are identical, but each ilr is separate. So setting up the model like this wont tell us about the overall composition. Can fix this and get the best of both worlds like this:
lm.bmi2=lm(zBMI~ cbind(ilr1,ilr2,ilr3)+ses+sex+age, data=data1)
car::Anova(lm.bmi2)
#Now we can predict zBMI for the "baseline" situation, we will call the predicted zBMI "mean.pred"
#get the ilrs for the mean composition (m)
ilrs.m=ilr(m)
(mean.pred <- predict(lm.bmi2, newdata=list(ilr1=ilrs.m[1], ilr2=ilrs.m[2] , ilr3=ilrs.m[3], # "newdata" is the data we will use to predict our new zBMI. We need to use the ilr of the mean composition as that's what we used in the initial model.
                                       ses=m.ses,  #here the mean of the covariates...
                                       age=m.age, 
                                       sex=m.sex))) 
##         1 
## 0.2202949
# Now predict zBMI for a new composition, where 15 minutes have been reallocated between two activity behaviours, while the remaining behaviours are held constant. 

ch.time <- 15/1440 #ch.time is the amount of time reallocated (divide by 1440 to put 15 minutes in to the proportional scale- remember that "m", our mean composition is in this scale: the components add to 1)


#Now we some new compositions where 15 minutes are reallocated from one component to another. In the code, m[1] refers to the fist component of our composition, from above, this is sleep. m[2] is sb, m[3] is lpa and m[4] is mvpa. So comp1 is a composition where 15 minutes have been added to sleep and 15 minutes have been subtracted from sb. Lpa and MVPA have been kept constant at their mean value.

comp1 <- acomp(c(m[1]+ch.time, m[2]-ch.time, m[3], m[4]))
ilrs.comp1=ilr(comp1)

#and now we predict zBMI for the new composition....
(comp1.pred <- predict(lm.bmi2, newdata=list(ilr1=ilrs.comp1[1], ilr2=ilrs.comp1[2] , ilr3=ilrs.comp1[3],
                                        ses=m.ses,  #here the covariates are kept constant at the mean...
                                       age=m.age, 
                                       sex=m.sex)))
##         1 
## 0.2715709
#So to find the difference in zBMI for the reallocation of 15 minutes from sleep to sb, we subtract the estimated zBMI from comp1 from the reference comp.

(sleep.to.sb <- comp1.pred- mean.pred)
##          1 
## 0.05127603
#this is predicted increase in zBMI when 15 miutes are reallocated from sleep to sb, at the mean composition of the sample.


###What about increasing one part, at the equal expense of all remaining parts?  "1:many reallocation".

#We can use the same model as above, but have to change our new predictive composition.

m #(this is the mean composition, in proportions)
## [1] "0.3809541" "0.3543493" "0.2463897" "0.0183069"
## attr(,"class")
## [1] "acomp"
#so increase sleep by 60 minutes, getting time from the remaining behaviours at equal proportions looks like this:
r <- 60/1440/m[1] #this is what we have to multiply sleep by to increase it by 60 minutes at the mean.
s <- r* m[1]/(1-m[1]) #this is what we have to multiply each of the remaining parts by, to reduce them evenly so that the daily total of 1440 minutes is maintained.
c1 <- cbind(m[1]*(1+r),m[2]*(1-s),m[3]*(1-s),m[4]*(1-s))  #create the new composition, c1
#just check that the minutes are what we want
clo(c1, total=1440)  #the new composition
##          [,1]     [,2]     [,3]     [,4]
## [1,] 608.5738 475.9183 330.9203 24.58757
clo(m, total=1440)  #the mean composition. We see that sleep has been increased by 60 minutes, and the remaining have been decreased. 
## [1] 548.57385 510.26305 354.80116  26.36194
#get the ilrs of the new composition c1
ilrs.c1=ilr(c1)

#Now use c1 in the predictive model
comp.c1.pred <- predict(lm.bmi2, newdata=list(ilr1=ilrs.c1[1], ilr2=ilrs.c1[2] , ilr3=ilrs.c1[3],
                                         ses=m.ses,
                                         age=m.age, 
                                         sex=m.sex))

comp.c1.pred - mean.pred  #estimated increase in zBMI when sleep is increased by 60 mins, at the expense of the remaining components equally.
##         1 
## 0.1703519
#You can change the predictive composition to be whatever interests you, then put it into the predictive ilr linear model.



#For the above 1:many reallocations, you can obtain a p-value which describes the significance of this reallocation. To do this, you need to use a specific type of ilr transformation.
##The resulting ilrs are sometimes called "Pivot coordinates"

#This tells R how you want the isometric log ratios to be set up. (sbp = sequential binary partition)
sbp  <-  matrix(c( 1, -1, -1,-1, #the sign matrix for the ilr
                0, 1, -1, -1,
                0, 0, 1, -1),
             ncol=4, byrow=TRUE)
colnames(sbp)  <- c("Sleep", "Sed", "LPA", "MVPA")
rownames(sbp) <- c("ilr1", "ilr2", "ilr3")
sbp #we can see that each row corresponds to an isometric log ratio coordinate, and each column to a component.
##      Sleep Sed LPA MVPA
## ilr1     1  -1  -1   -1
## ilr2     0   1  -1   -1
## ilr3     0   0   1   -1
#the first ilr has sleep in its numerator and the remaining components (i.e., the geometric mean of the remaining components) in its denominator. This is the ilr of interest to us, because it captures all relative information regarding sleep.

psi  <-  gsi.buildilrBase(t(sbp)) #this makes the above sbp into something useable for R

data1$ilr.Sleep <- ilr(data1$act.comp, V=psi) # V=psi speficies that we want to use the sbp from above, to make the special type of ilr where the first coordinate is the first component (in this case, sleep) relative to the geometric mean of the remaining components
#we have to make the ilrs separate variables again
data1$ilrSleep1 <-data1$ilr.Sleep[,1]
data1$ilrSleep2 <-data1$ilr.Sleep[,2]
data1$ilrSleep3 <-data1$ilr.Sleep[,3]

#Now we can use this set of ilrs in our model
lm.bmi3 <- lm(zBMI~cbind(ilrSleep1, ilrSleep2, ilrSleep3)+ses+sex+age, data=data1)
summary(lm.bmi3)  ##here we see the relationship between the first ilr coordinate and sleep is not significant, but it is positive. Therefore we can say that, as sleep increases at the expense of equal decrease in sed, lpa and mvpa, zbmi is predicted to increase. This supports what we saw when we performed our predictions. 
## 
## Call:
## lm(formula = zBMI ~ cbind(ilrSleep1, ilrSleep2, ilrSleep3) + 
##     ses + sex + age, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5479 -0.9345  0.0000  0.8399  2.8970 
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                     -9.530806   3.327566  -2.864
## cbind(ilrSleep1, ilrSleep2, ilrSleep3)ilrSleep1  1.133903   1.053249   1.077
## cbind(ilrSleep1, ilrSleep2, ilrSleep3)ilrSleep2 -0.616482   0.492951  -1.251
## cbind(ilrSleep1, ilrSleep2, ilrSleep3)ilrSleep3  1.322203   0.547279   2.416
## ses2                                            -0.005087   0.228894  -0.022
## ses3                                             0.431123   0.307571   1.402
## ses4                                            -0.391475   0.440406  -0.889
## ses5                                            -0.258360   0.892037  -0.290
## ses6                                             1.092290   0.734886   1.486
## ses7                                            -0.616311   1.253007  -0.492
## ses9                                            -0.161242   0.426155  -0.378
## sex1                                             0.341942   0.223817   1.528
## age                                              0.683116   0.319733   2.137
##                                                 Pr(>|t|)   
## (Intercept)                                      0.00476 **
## cbind(ilrSleep1, ilrSleep2, ilrSleep3)ilrSleep1  0.28333   
## cbind(ilrSleep1, ilrSleep2, ilrSleep3)ilrSleep2  0.21295   
## cbind(ilrSleep1, ilrSleep2, ilrSleep3)ilrSleep3  0.01685 * 
## ses2                                             0.98230   
## ses3                                             0.16299   
## ses4                                             0.37543   
## ses5                                             0.77248   
## ses6                                             0.13921   
## ses7                                             0.62351   
## ses9                                             0.70567   
## sex1                                             0.12859   
## age                                              0.03420 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.232 on 156 degrees of freedom
## Multiple R-squared:  0.1856, Adjusted R-squared:  0.1229 
## F-statistic: 2.962 on 12 and 156 DF,  p-value: 0.0009588
#Let's try MVPA, relative to the remainin components.
#we could re-order our components to put MVPA first, and use the same sign matrix (sbp) as before, or we could keep the same order of components and modify the sign matrix. Let's change the sign matrix, remembering MVPA is the fourth column and we want it as the numerator of the first ilr (give it a positive sign). We also want only the first ilr to contain all the information about MVPA, so we will not have MVPA in any of the remaining ilr coordinates.
sbp2  <-  matrix(c( -1, -1, -1,1, #the sign matrix for the ilr
                -1, -1, 1, 0,
                -1, 1, 0, 0),
             ncol=4, byrow=TRUE)
colnames(sbp2)  <- c("Sleep", "Sed", "LPA", "MVPA")
rownames(sbp2) <- c("ilr1", "ilr2", "ilr3")
sbp2
##      Sleep Sed LPA MVPA
## ilr1    -1  -1  -1    1
## ilr2    -1  -1   1    0
## ilr3    -1   1   0    0
psi2  <-  gsi.buildilrBase(t(sbp2))

#Now make the ilrs using this new sign matrix (sbp)
data1$ilr.MVPA <- ilr(data1$act.comp, V=psi2) #this set of ilrs has MVPA:remaining as the first coordinate
data1$ilrMVPA1 <-data1$ilr.MVPA[,1]
data1$ilrMVPA2 <-data1$ilr.MVPA[,2]
data1$ilrMVPA3 <-data1$ilr.MVPA[,3]

#And run the linear model again, with our new ilrs.
lm.bmi4 <- lm(zBMI~cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)+ses+sex+age, data=data1) 
summary(lm.bmi4) 
## 
## Call:
## lm(formula = zBMI ~ cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3) + ses + 
##     sex + age, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5479 -0.9345  0.0000  0.8399  2.8970 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 -9.530806   3.327566  -2.864
## cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)ilrMVPA1 -1.166929   0.263004  -4.437
## cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)ilrMVPA2  0.639835   0.734245   0.871
## cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)ilrMVPA3 -1.281754   1.021549  -1.255
## ses2                                        -0.005087   0.228894  -0.022
## ses3                                         0.431123   0.307571   1.402
## ses4                                        -0.391475   0.440406  -0.889
## ses5                                        -0.258360   0.892037  -0.290
## ses6                                         1.092290   0.734886   1.486
## ses7                                        -0.616311   1.253007  -0.492
## ses9                                        -0.161242   0.426155  -0.378
## sex1                                         0.341942   0.223817   1.528
## age                                          0.683116   0.319733   2.137
##                                             Pr(>|t|)    
## (Intercept)                                  0.00476 ** 
## cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)ilrMVPA1 1.72e-05 ***
## cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)ilrMVPA2  0.38486    
## cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)ilrMVPA3  0.21146    
## ses2                                         0.98230    
## ses3                                         0.16299    
## ses4                                         0.37543    
## ses5                                         0.77248    
## ses6                                         0.13921    
## ses7                                         0.62351    
## ses9                                         0.70567    
## sex1                                         0.12859    
## age                                          0.03420 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.232 on 156 degrees of freedom
## Multiple R-squared:  0.1856, Adjusted R-squared:  0.1229 
## F-statistic: 2.962 on 12 and 156 DF,  p-value: 0.0009588
#You will notice the regression beta for the first ilr is significant, and negative. Therefore, as MVPA is increased at the equal expense of the remaining activities, zBMI is predicted to decrease.

#why not make a change-prediction for MVPA (remember, MVPA is m[4], the fourth component)
r <- 60/1440/m[4] #this is what we have to multiply mvpa by to increase it by 60 minutes at the mean.
s <- r* m[4]/(1-m[4]) #this is what we have to multiply each of the remaining parts by, to reduce them evenly so that the daily total of 1440 minutes is maintained.
c2 <- cbind(m[1]*(1-s),m[2]*(1-s),m[3]*(1-s),m[4]*(1+r))  #create the new composition where mvpa is increased, c2
#just check that the minutes are what we want
clo(c2, total=1440)  #the new composition
##          [,1]     [,2]     [,3]     [,4]
## [1,] 525.2904 488.6056 339.7421 86.36194
clo(m, total=1440)  #the mean composition. We see that mvpa has been increased by 60 minutes, and the remaining have been decreased. 
## [1] 548.57385 510.26305 354.80116  26.36194
ilr.c2=ilr(c2)

#Now use c2 in the predictive model. Here I am using the original model to predict, lm.bmi, which has the default ilr transformation. We must be consistent with which ilr transformation is being used and ensure the new ilr set we are using to predict zBMI is created by the same transformation (sbp) as the ilr set used in the original linear model).
comp.c2.pred <- predict(lm.bmi2, newdata=list(ilr1=ilr.c2[1], ilr2=ilr.c2[2], ilr3=ilr.c2[3], #the model lm.bmi contained ilrs created by the default ilr transformation in R compositionsm so can use the default ilr transformation for our new c2.
                                         ses=m.ses,
                                         age=m.age, 
                                         sex=m.sex))

comp.c2.pred - mean.pred  
##         1 
## -1.243022
#so zBMI is predicted to decrease by 1.24 when MVPA is increased from the mean by 60 min at the expense of the remaining activities equally.

#Just to show that you can use any model for these predictions....here using the lm.bmi2 with specific ilr transformation...
lm.bmi4 <- lm(zBMI~cbind(ilrMVPA1, ilrMVPA2, ilrMVPA3)+ses+sex+age, data=data1) #the MVPA-first ILRS are made by sbp of V=psi2)
ilr.c2=ilr(c2, V=psi2)
comp.c2.pred <- predict(lm.bmi4, newdata=list(ilrMVPA1=ilr.c2[1], ilrMVPA2=ilr.c2[2],ilrMVPA3=ilr.c2[3], #using the ilrs from previous line of code, where V=psi2)
                                         ses=m.ses,
                                         age=m.age, 
                                         sex=m.sex))

comp.c2.pred - mean.pred
##         1 
## -1.243022
#Notice this is the same predicted difference as we got before.


#LINEAR MODELS :: COMPOSITION IS THE DEPENDENT 
#let's see if zbmi predicts the composition.

lmodel=lm(ilr.comp~zBMI+ses+age+sex, data=data1)
car::Anova(lmodel)
## 
## Type II MANOVA Tests: Pillai test statistic
##      Df test stat approx F num Df den Df    Pr(>F)    
## zBMI  1  0.118820   7.0118      3    156 0.0001872 ***
## ses   7  0.152980   1.2128     21    474 0.2343226    
## age   1  0.002745   0.1431      3    156 0.9339476    
## sex   1  0.267994  19.0377      3    156 1.426e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#this means that zBMI is a significant predictor of the composition. We use the car::Anova command because the Anova from the car package is a type II test- it adjusts each term in the model for each other term. It does not do a step-wise adjustment as some of the other anova commands do.

#to get an idea of what the relationship looks like, we can make some predictions using the linear model.

#what is the time-use composition at varying points of zBMI?
#let's predict time-use composition for zBMI = -3, and the continue to predict for each level of zBMI, until zBMI = 3

(ilr.pred.bmi3a <- predict(lmodel, newdata=list(zBMI=-3, ses=m.ses, age=m.age, sex=m.sex))) #the predicted composition is in ilr coordinates
##          [,1]       [,2]      [,3]
## 1 -0.09458545 -0.2025639 -2.194961
(comp.bmi3a <- ilrInv(ilr.pred.bmi3a)) #we express the ilr coordinates as a composition (i.e., in proportions)
##            1            2            3            4 
## "0.37412703" "0.32728501" "0.27304162" "0.02554634" 
## attr(,"class")
## [1] "acomp"
(c1 <- clo(comp.bmi3a, total=1440)) #we linearly adjust the proportions (zoom in), to sum to 1440 minutes, so we can consider minutes/day
##         1         2         3         4 
## 538.74293 471.29041 393.17994  36.78673
ilr.pred.bmi2a <- predict(lmodel, newdata=list(zBMI=-2, ses=m.ses, age=m.age, sex=m.sex))
comp.bmi2a <- ilrInv(ilr.pred.bmi2a)
c2 <- clo(comp.bmi2a, total=1440)

ilr.pred.bmi1a <- predict(lmodel, newdata=list(zBMI=-1, ses=m.ses, age=m.age, sex=m.sex))
comp.bmi1a <- ilrInv(ilr.pred.bmi1a)
c3 <- clo(comp.bmi1a, total=1440)

ilr.pred.bmi0 <- predict(lmodel, newdata=list(zBMI=0, ses=m.ses, age=m.age, sex=m.sex))
comp.bmi0 <- ilrInv(ilr.pred.bmi0)
c4 <- clo(comp.bmi0, total=1440)

ilr.pred.bmi1=predict(lmodel, newdata=list(zBMI=1, ses=m.ses, age=m.age, sex=m.sex))
comp.bmi1=ilrInv(ilr.pred.bmi1)
c5 <- clo(comp.bmi1, total=1440)

ilr.pred.bmi2=predict(lmodel, newdata=list(zBMI=2, ses=m.ses, age=m.age, sex=m.sex))
comp.bmi2=ilrInv(ilr.pred.bmi2)
c6 <- clo(comp.bmi2, total=1440)

ilr.pred.bmi3=predict(lmodel, newdata=list(zBMI=3, ses=m.ses, age=m.age, sex=m.sex))
comp.bmi3=ilrInv(ilr.pred.bmi3)
c7 <- clo(comp.bmi3, total=1440)


 #lets bind all the predicted time-use compositions together into a table- use the rbind() command to rowbind.
ans <- rbind(c1,c2,c3,c4,c5,c6,c7)
ans <- data.frame(ans) 
names(ans)=c("Sleep","SB","LPA","MVPA")
ans$zBMI=seq(-3,3,1)
ans#this is the table of predicted time-use compositions
#looking at the predicted compositions we see that as zBMI increases, sleep & sedentary are predictd to increase, whereas LPA and MVPA are predicted to decrease.

#plot it?
#make long dataset to help
require(tidyr)
## Loading required package: tidyr
require(ggplot2)
## Loading required package: ggplot2

long=gather(ans, Activity, Minutes, Sleep:MVPA)
long$Activity=factor(long$Activity, levels=c("Sleep","SB","LPA","MVPA"))#this is the order I want the activities plotted in
ggplot(long, aes(x=zBMI, y=Minutes))+
  geom_point()+
  geom_line()+
  facet_wrap(~Activity, scales="free_y")+
  theme_bw()

##STATISTICAL REFERENCES
##
##Dumuid D, Stanford TE, Martin-Fernandez JA, Pedisic Z, Maher CA, Lewis LK, Hron K, Katzmarzyk PT, Chaput JP, Fogelholm M, Hu G, Lambert EV, Maia J, Sarmiento OL, Standage M, Barreira TV, Broyles ST, Tudor-Locke C, Tremblay MS & Olds T (2018): Compositional data analysis for physical activity, sedentary time and sleep research. Statistical Methods in Medical Research, vol. 27, no. 12, pp. 3726-3738.

##Dumuid D, Pedisic Z, Stanford TE, Martin-Fernandez JA, Hron K, Maher CA, Lewis LK & Olds T (2019): The compositional isotemporal substitution model: a method for estimating changes in a health outcome for reallocation of time between sleep, physical activity and sedentary behaviour. Statistical Methods in Medical Research, vol. 28, no. 3, pp. 846-857.

##Dumuid D, Pedisic Z, Palarea-Albaladejo J, Martin-Fernandez JA, Hron K & Olds T (2020): Compositional data analysis in time-use epidemiology: what, why, how. International Journal of Environmental Research and Public Health, vol. 17, no. 7, article no. 2220.

##Dumuid D, Pedisic Z, Palarea-Albaladejo J, Martin-Fernandez JA, Hron K, Olds T (2021): Compositional data analysis in time-use epidemiology. In: Filzmoser, P., Hron, K., Martin-Fernandez, J.A., Palarea-Albaladejo, J (Eds.). Advances in compositional data analysis (pp. 383-404). Cham, CH: Springer Nature.