source: plugins/base1/se.lu.onk.cgh_dataDumper/stac_format.R @ 720

Last change on this file since 720 was 720, checked in by Jari Häkkinen, 15 years ago

Importing CGH Data Dumper

File size: 2.7 KB
Line 
1# $Id$
2
3######################################################################
4################  MAIN PROGRAM #######################################
5
6# Missing values should have the form of Na or NaN
7# [reporterId]  [geneSymbol]  [chromosome]  [cytoBand]  [startPosition] [endPosition] [assayData = M]
8
9aa<-scan("basematrix.txt",what="character",nlines=1) #aa is header!
10cc<-6  #related to format above!
11aa.s<-length(aa)-cc #number of samples
12my.colClasses<-c("numeric","character","character","character","numeric","numeric",rep("numeric",aa.s))
13data<-read.table("basematrix.txt",colClasses=my.colClasses,header=TRUE,na.strings=c(NA))
14data$chromosome<-as.integer(data$chromosome)
15
16nbrAssays<-aa.s
17
18assayNames<-read.table("assays.txt",colClasses="character",header=FALSE)
19
20## sort data and subdivide it
21ii<-order(data$chromosome,data$startPosition)
22data<-data[ii,] #reorder all data according to chromosom and then to start kb
23
24cloneInfo<-data[,c(seq(1,cc))]
25Log2matrix<-as.matrix(data[,-c(seq(1,cc))])
26rm(data)
27 
28for(h in 1:length(chromosomes)){ #foreach chromosome
29  print(h)
30  workingChrom<-chromosomes[h]
31  ## Foreach assay in my.assays
32  for(u in 1:nbrAssays){ #foreach assay
33    print(assayNames[u])
34    my.data<-Log2matrix[which(cloneInfo$chromosome==workingChrom),u] #ternary data sorted.
35    my.info<-cloneInfo[which(cloneInfo$chromosome==workingChrom),]
36   
37    ## separate data into p-arm and q-arm, then run each arm independently for gain and losses
38    p.indexes<-grep("p",my.info$cytoBand)
39    q.indexes<-grep("q",my.info$cytoBand)
40    ##
41   
42    ## check length of the p.indexes and q.indexes !!! >0 . Bundling not a problem!
43    if(length(p.indexes)>0){
44      x1<-c(-99,my.data[p.indexes])
45      x2<-c(my.data[p.indexes],-99)
46      z<- x2-x1 #all non-zero values in z corresponds to the breakpoints in the profile
47      non.zero.z<-which(z !=0) #all breakpoints as c(start1,start2,start3,...,length(data.subset)+1)
48     
49      start.i<-non.zero.z[1:length(non.zero.z)-1]
50      stop.i<-non.zero.z-1
51      stop.i<-stop.i[-c(1)]
52      status<-my.data[start.i] #all changes
53         
54      ii.g<-which(status==1)
55      ii.l<-which(status== -1)
56     
57      ########
58      ## write the start stop for the altered states one at a time in a file for each sample
59   
60    }
61    if(length(q.indexes)>0){
62      x1<-c(-99,my.data[p.indexes])
63      x2<-c(my.data[p.indexes],-99)
64      z<- x2-x1 #all non-zero values in z corresponds to the breakpoints in the profile
65      non.zero.z<-which(z !=0) #all breakpoints as c(start1,start2,start3,...,length(data.subset)+1)
66     
67      start.i<-non.zero.z[1:length(non.zero.z)-1]
68      stop.i<-non.zero.z-1
69      stop.i<-stop.i[-c(1)]
70      status<-my.data[start.i] #all changes
71         
72      ii.g<-which(status==1)
73      ii.l<-which(status== -1)   
74    }
75   
76   
77    #print end resultat to file
78   
79  }
80}
81
Note: See TracBrowser for help on using the repository browser.