1 | # $Id$ |
---|
2 | |
---|
3 | ###################################################################### |
---|
4 | ################ MAIN PROGRAM ####################################### |
---|
5 | |
---|
6 | # Missing values should have the form of Na or NaN |
---|
7 | # [reporterId] [geneSymbol] [chromosome] [cytoBand] [startPosition] [endPosition] [assayData = M] |
---|
8 | |
---|
9 | aa<-scan("basematrix.txt",what="character",nlines=1) #aa is header! |
---|
10 | cc<-6 #related to format above! |
---|
11 | aa.s<-length(aa)-cc #number of samples |
---|
12 | my.colClasses<-c("numeric","character","character","character","numeric","numeric",rep("numeric",aa.s)) |
---|
13 | data<-read.table("basematrix.txt",colClasses=my.colClasses,header=TRUE,na.strings=c(NA)) |
---|
14 | data$chromosome<-as.integer(data$chromosome) |
---|
15 | |
---|
16 | nbrAssays<-aa.s |
---|
17 | |
---|
18 | assayNames<-read.table("assays.txt",colClasses="character",header=FALSE) |
---|
19 | |
---|
20 | ## sort data and subdivide it |
---|
21 | ii<-order(data$chromosome,data$startPosition) |
---|
22 | data<-data[ii,] #reorder all data according to chromosom and then to start kb |
---|
23 | |
---|
24 | cloneInfo<-data[,c(seq(1,cc))] |
---|
25 | Log2matrix<-as.matrix(data[,-c(seq(1,cc))]) |
---|
26 | rm(data) |
---|
27 | |
---|
28 | for(h in 1:length(chromosomes)){ #foreach chromosome |
---|
29 | print(h) |
---|
30 | workingChrom<-chromosomes[h] |
---|
31 | ## Foreach assay in my.assays |
---|
32 | for(u in 1:nbrAssays){ #foreach assay |
---|
33 | print(assayNames[u]) |
---|
34 | my.data<-Log2matrix[which(cloneInfo$chromosome==workingChrom),u] #ternary data sorted. |
---|
35 | my.info<-cloneInfo[which(cloneInfo$chromosome==workingChrom),] |
---|
36 | |
---|
37 | ## separate data into p-arm and q-arm, then run each arm independently for gain and losses |
---|
38 | p.indexes<-grep("p",my.info$cytoBand) |
---|
39 | q.indexes<-grep("q",my.info$cytoBand) |
---|
40 | ## |
---|
41 | |
---|
42 | ## check length of the p.indexes and q.indexes !!! >0 . Bundling not a problem! |
---|
43 | if(length(p.indexes)>0){ |
---|
44 | x1<-c(-99,my.data[p.indexes]) |
---|
45 | x2<-c(my.data[p.indexes],-99) |
---|
46 | z<- x2-x1 #all non-zero values in z corresponds to the breakpoints in the profile |
---|
47 | non.zero.z<-which(z !=0) #all breakpoints as c(start1,start2,start3,...,length(data.subset)+1) |
---|
48 | |
---|
49 | start.i<-non.zero.z[1:length(non.zero.z)-1] |
---|
50 | stop.i<-non.zero.z-1 |
---|
51 | stop.i<-stop.i[-c(1)] |
---|
52 | status<-my.data[start.i] #all changes |
---|
53 | |
---|
54 | ii.g<-which(status==1) |
---|
55 | ii.l<-which(status== -1) |
---|
56 | |
---|
57 | ######## |
---|
58 | ## write the start stop for the altered states one at a time in a file for each sample |
---|
59 | |
---|
60 | } |
---|
61 | if(length(q.indexes)>0){ |
---|
62 | x1<-c(-99,my.data[p.indexes]) |
---|
63 | x2<-c(my.data[p.indexes],-99) |
---|
64 | z<- x2-x1 #all non-zero values in z corresponds to the breakpoints in the profile |
---|
65 | non.zero.z<-which(z !=0) #all breakpoints as c(start1,start2,start3,...,length(data.subset)+1) |
---|
66 | |
---|
67 | start.i<-non.zero.z[1:length(non.zero.z)-1] |
---|
68 | stop.i<-non.zero.z-1 |
---|
69 | stop.i<-stop.i[-c(1)] |
---|
70 | status<-my.data[start.i] #all changes |
---|
71 | |
---|
72 | ii.g<-which(status==1) |
---|
73 | ii.l<-which(status== -1) |
---|
74 | } |
---|
75 | |
---|
76 | |
---|
77 | #print end resultat to file |
---|
78 | |
---|
79 | } |
---|
80 | } |
---|
81 | |
---|