Changeset 4308 for trunk/ProjectFortress

Show
Ignore:
Timestamp:
11/03/09 13:15:52 (3 weeks ago)
Author:
chf
Message:

birdcount interesting regions

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/ProjectFortress/demos/BirdCount0.fss

    r4290 r4308  
    132132   __bigOperatorSugar[\dna,dna,dna,dna\](BIG OPLUS[\T\](), g) 
    133133 
     134 
    134135processReferenceFileLine(input:String):dna = do 
    135136    if input.get(0) = '>'  
     
    170171end 
    171172 
    172 object Event(fileName:String, refSeq:String, sampleSeq:String, startPos:ZZ32, endPos:ZZ32) 
    173     getter fileName():String = fileName 
    174     getter refSeq():String = refSeq 
    175     getter sampleSeq():String = sampleSeq 
    176     getter startPos():ZZ32 = startPos 
    177     getter endPos():ZZ32 = endPos 
    178  
    179     isValid():Boolean = true 
    180  
    181     compare(other:Event):Boolean = true 
    182     check_valid():Boolean = true 
    183 end 
    184  
    185173reverse(sequence:String):String = BIG || [c<-sequence.reverse()] c 
    186174 
     
    234222ChunkSize:ZZ32 = 10000 
    235223 
     224 
    236225(* Given two samples, this creates a list of the positions where they differ *) 
    237226 
    238 sampleCompare(ref:FlatString, sample:FlatString):String = do 
    239    last:ZZ32 = |ref| MIN |sample| 
    240    var result:String = "" 
    241    var index:ZZ32 = 0 
    242    while (index < last) do 
    243        if (ref.get(index) =/= sample.get(index)) then 
    244              result := result || 'X' 
    245        else result := result || '.' 
    246        end 
    247        index:= index + 1 
     227object Event(fileName:String, refSeq:String, sampleSeq:String, startPos:ZZ32, endPos:ZZ32) 
     228    getter fileName():String = fileName 
     229    getter refSeq():String = refSeq 
     230    getter sampleSeq():String = sampleSeq 
     231    getter startPos():ZZ32 = startPos 
     232    getter endPos():ZZ32 = endPos 
     233end 
     234 
     235(* This code is a little obtuse.  The idea is that if the sample differs from the reference  
     236   chicken in more than one consecutive location it may be an interesting mutation.  We look 
     237   for these potential interesting mutations and return a list of them. 
     238 *) 
     239 
     240makeTuple(loc:ZZ32, diffs:String):(ZZ32,ZZ32) = do 
     241    temp:String = diffs.asFlatString().javaSubstr(loc).javaRegExpSplit("\\.",0) 
     242    (loc, |temp|) 
     243end 
     244 
     245isInteresting(loc:ZZ32, diffs:String):Boolean = do 
     246   if loc = 0 OR: diffs.get(loc) = '.'  OR: loc = (|diffs| - 1) OR: (diffs.get(loc - 1) = 'X') OR: (|diffs.asFlatString().javaSubstr(loc).javaRegExpSplit("\\.",0)| < 2) then 
     247      false 
     248   else true 
    248249   end 
    249    result 
     250end 
     251 
     252sampleCompare(ref:String, sample:String):String = do 
     253   BIG || [(x,y) <- ref.zip[\Char\](sample)] (if (x =/= y) then "X" else "." end) 
    250254end 
    251255 
    252256(* Given two samples this creates events *) 
    253257 
    254 eventGenerator(ref:FlatString, sample:FlatString):List[\Event\] = do 
     258eventGenerator(ref:FlatString, sample:FlatString):() = do 
    255259    diffs = sampleCompare(ref, sample) 
    256     println(diffs) 
     260    println("ref     :" ref) 
     261    println("sample  :" sample) 
     262    println("diffs   :" diffs) 
     263 
     264    stop = |diffs| - 1 
     265 
     266    interestingRegions = <| makeTuple(x,diffs) | x<-0:stop, isInteresting(x,diffs) |> 
     267    println("interestingRegions = " interestingRegions)  
     268         
    257269end 
    258270