################################################################## # This R code inputs a dataset (from a textfile), creates analysis # variables from the dataset, and then carries out three testing # procedures decribed in Gilbert, Rossini, and Shankarappa (2005), # "Two-sample tests for comparing intra-individual genetic # sequence diversity between populations", Biometrics, 2005, # 61:107-118. These testing procedures are referred to in the # paper as the "pooled mean diversities" test (Tpoolmn), the # "pooled median diversities" test (Tpoolmed), and the # "mean subject-specific diversities" test (Tsubjmn). # # The procedures are carried out below on the dataset of # synonomous distances in HIV infected children, that was used # in the Example of Gilbert, Rossini, and Shankarappa (2005). ################################################################# # Read in the synonomous intra-individual pairwise sequence distances: datamat <- matrix(scan('Synonomousdistances.txt'),ncol=5,byrow=T) # Data columns of datamat: # 1. strand1: an integer that indicates the sequence number from an individual # 2. strand2: an integer that indicates the sequence number from an individual # 3. group: 1 if group 1; 2 if group 2 # 4. id: an integer that indicates the individual within a group # 5. distance: pairwise distance between sequence strand1 from id and # sequence strand2 from id # Carry out the three tests: source('diverstest.r') diverstest(datamat) # The ouput is written to the file diverstest.out