Show
Ignore:
Timestamp:
07/22/09 12:58:48 (4 months ago)
Author:
jmaessen
Message:

[demos] 100% mutation-free implementation of wordcount2, also upgraded
a bit to handle hamlet and thus be comparable to the demo version of
wordcount. Actually runs slightly faster in the interpreter than
wordcount does (though probably for silly reasons)!

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/ProjectFortress/demos/wordcount2.fss

    r3961 r3993  
    2525export Executable 
    2626 
     27delimiters : String= " ,.?*\t-/[]" 
     28delimMap : Map[\Char,Boolean\] = { c |-> true | c <- delimiters } 
     29isDelimiter(c:Char):Boolean = delimMap.member(c,false) 
     30(* 
     31This is much slower, even for the relatively short string above. 
     32isDelimiter(c:Char):Boolean = c IN delimiters 
     33*) 
     34 
    2735trait WordState comprises { Chunk, Segment } 
     36  getter words(): Map[\CaseInsensitiveString, ZZ32\] 
    2837  opr OPLUS(self, other: WordState): WordState 
    2938end 
    3039 
    3140object Chunk(s: String) extends WordState 
     41  getter words(): Map[\CaseInsensitiveString, ZZ32\] = 
     42    maybeWord(s, {[\CaseInsensitiveString,ZZ32\]}) 
    3243  opr OPLUS(self, other: WordState): WordState = Chunk("") 
    3344  opr OPLUS(self, other: Chunk): WordState = 
     
    3950object Segment(l: String, A: Map[\CaseInsensitiveString,ZZ32\], r: String) 
    4051    extends WordState 
     52  getter words(): Map[\CaseInsensitiveString, ZZ32\] = 
     53    maybeWord(l, maybeWord(r, A)) 
    4154  opr OPLUS(self, other: WordState): WordState = Chunk("") 
    4255  opr OPLUS(self, other: Chunk): WordState = 
    4356    Segment(l, A, r || other.s) 
    4457  opr OPLUS(self, other: Segment): WordState = do 
    45     Segment(l, maybeWord(r || other.l, A) UNIONSUM other.A, other.r) 
     58    Segment(l, A UNIONSUM maybeWord(r || other.l, other.A), other.r) 
    4659    end 
    4760end 
     
    6174*) 
    6275 
    63 maybeWord(s: String, d:Map[\CaseInsensitiveString,ZZ32\]): Map[\CaseInsensitiveString,ZZ32\] =  
    64   if s = "" then d else   
     76maybeWord(s: String, d:Map[\CaseInsensitiveString,ZZ32\]): Map[\CaseInsensitiveString,ZZ32\] = 
     77  if s.isEmpty then d else 
    6578      d.updateWith(fn(mx)=>Just[\ZZ32\](mx.getDefault(0)+1), CaseInsensitiveString(s)) 
    66 end  
     79end 
    6780 
    6881opr BIG OPLUS[\T\](): Reduction[\WordState\] = embiggen( fn(a,b) => a OPLUS b, Chunk("") ) 
    69   
    70 maybeWord(s: String): List[\String\] = 
    71   if s = "" then <|[\String\] |> else <|[\String\] s |> end 
    7282 
    73 processChar(c: String): WordState = 
    74   if (c = " ") then 
     83processChar(c: Char): WordState = 
     84  if (isDelimiter(c)) then 
    7585    Segment("", {[\CaseInsensitiveString,ZZ32\]}, "") 
    7686  else 
    77     Chunk(c) 
     87    Chunk("" c) 
    7888  end 
    7989 
    80 words(s: String):Map[\CaseInsensitiveString,ZZ32\] = do 
    81   var database:Map[\CaseInsensitiveString,ZZ32\] := {[\CaseInsensitiveString,ZZ32\]} 
    82   g = BIG OPLUS[k<-0#s.size] processChar(s[k#1]) 
    83   typecase g of 
    84     Chunk => maybeWord(g.s, database) 
    85     Segment => maybeWord(g.l, database) UNIONSUM g.A UNIONSUM maybeWord(g.r, database) 
    86   end 
    87 end 
     90words(s: String):Map[\CaseInsensitiveString,ZZ32\] = 
     91  (BIG OPLUS[c<-s] processChar(c)).words 
    8892 
    89 makeInv(x:String,y:ZZ32):Map[\ZZ32, List[\String\]\] = do 
    90     var database:Map[\ZZ32,List[\String\]\] := {[\ZZ32,List[\String\]\]} 
    91     database := database.add(y, <|[\String\] x |>) 
    92     database 
    93 end 
     93makeInv(x:String,y:ZZ32):Map[\ZZ32, List[\String\]\] = 
     94    {[\ZZ32,List[\String\]\] y |-> <|[\String\] x |> } 
    9495 
    9596opr UNIONSUM(a:Map[\CaseInsensitiveString,ZZ32\], 
     
    111112    time(str:String,s,f): () = println(str " in " ((f-s)/10.0^6) "ms") 
    112113    println("Processing file " name) 
    113     var rs:FileReadStream = FileReadStream(name) 
     114    rs:FileReadStream = FileReadStream(name) 
    114115    start = nanoTime() 
    115116    database:Map[\CaseInsensitiveString, ZZ32\] = 
     
    118119    acq = nanoTime() 
    119120    time("Acquired words",start,acq) 
    120     var invDatabase:Map[\ZZ32,List[\String\]\] = 
     121    invDatabase:Map[\ZZ32,List[\String\]\] = 
    121122        BIG UNIONUNION [(x,y) <-database] makeInv(x.asString(),y) 
    122123    inv = nanoTime() 
    123124    time("Inverted database",acq,inv) 
    124     try 
    125         for i <- seq(1#100) do 
    126             (c,m,newinv) = invDatabase.extractMaximum().get 
    127             invDatabase := newinv 
    128             println(m ": " c " times") 
    129         end 
    130     catch e 
    131             NotFound => println("end") 
     125    for (c,m) <- seq(invDatabase.reverse()) do 
     126        println(m ": " c " times") 
    132127    end 
    133128    finish = nanoTime() 
     
    138133 
    139134run() = do 
    140     processFile(getEnvironment("fortress.autohome", "ProjectFortress/demos/") "testData")  
     135    (* change the final string below to "testData" for quick test, 
     136       "hamlet" for thorough test comparable to wordcount.fss. *) 
     137    processFile(getEnvironment("fortress.autohome", "ProjectFortress/demos/") "hamlet") 
    141138end 
    142139end