Changeset 3993 for trunk/ProjectFortress/demos
- Timestamp:
- 07/22/09 12:58:48 (4 months ago)
- Files:
-
- 1 modified
-
trunk/ProjectFortress/demos/wordcount2.fss (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/ProjectFortress/demos/wordcount2.fss
r3961 r3993 25 25 export Executable 26 26 27 delimiters : String= " ,.?*\t-/[]" 28 delimMap : Map[\Char,Boolean\] = { c |-> true | c <- delimiters } 29 isDelimiter(c:Char):Boolean = delimMap.member(c,false) 30 (* 31 This is much slower, even for the relatively short string above. 32 isDelimiter(c:Char):Boolean = c IN delimiters 33 *) 34 27 35 trait WordState comprises { Chunk, Segment } 36 getter words(): Map[\CaseInsensitiveString, ZZ32\] 28 37 opr OPLUS(self, other: WordState): WordState 29 38 end 30 39 31 40 object Chunk(s: String) extends WordState 41 getter words(): Map[\CaseInsensitiveString, ZZ32\] = 42 maybeWord(s, {[\CaseInsensitiveString,ZZ32\]}) 32 43 opr OPLUS(self, other: WordState): WordState = Chunk("") 33 44 opr OPLUS(self, other: Chunk): WordState = … … 39 50 object Segment(l: String, A: Map[\CaseInsensitiveString,ZZ32\], r: String) 40 51 extends WordState 52 getter words(): Map[\CaseInsensitiveString, ZZ32\] = 53 maybeWord(l, maybeWord(r, A)) 41 54 opr OPLUS(self, other: WordState): WordState = Chunk("") 42 55 opr OPLUS(self, other: Chunk): WordState = 43 56 Segment(l, A, r || other.s) 44 57 opr OPLUS(self, other: Segment): WordState = do 45 Segment(l, maybeWord(r || other.l, A) UNIONSUM other.A, other.r)58 Segment(l, A UNIONSUM maybeWord(r || other.l, other.A), other.r) 46 59 end 47 60 end … … 61 74 *) 62 75 63 maybeWord(s: String, d:Map[\CaseInsensitiveString,ZZ32\]): Map[\CaseInsensitiveString,ZZ32\] = 64 if s = "" then d else76 maybeWord(s: String, d:Map[\CaseInsensitiveString,ZZ32\]): Map[\CaseInsensitiveString,ZZ32\] = 77 if s.isEmpty then d else 65 78 d.updateWith(fn(mx)=>Just[\ZZ32\](mx.getDefault(0)+1), CaseInsensitiveString(s)) 66 end 79 end 67 80 68 81 opr BIG OPLUS[\T\](): Reduction[\WordState\] = embiggen( fn(a,b) => a OPLUS b, Chunk("") ) 69 70 maybeWord(s: String): List[\String\] =71 if s = "" then <|[\String\] |> else <|[\String\] s |> end72 82 73 processChar(c: String): WordState =74 if ( c = " ") then83 processChar(c: Char): WordState = 84 if (isDelimiter(c)) then 75 85 Segment("", {[\CaseInsensitiveString,ZZ32\]}, "") 76 86 else 77 Chunk( c)87 Chunk("" c) 78 88 end 79 89 80 words(s: String):Map[\CaseInsensitiveString,ZZ32\] = do 81 var database:Map[\CaseInsensitiveString,ZZ32\] := {[\CaseInsensitiveString,ZZ32\]} 82 g = BIG OPLUS[k<-0#s.size] processChar(s[k#1]) 83 typecase g of 84 Chunk => maybeWord(g.s, database) 85 Segment => maybeWord(g.l, database) UNIONSUM g.A UNIONSUM maybeWord(g.r, database) 86 end 87 end 90 words(s: String):Map[\CaseInsensitiveString,ZZ32\] = 91 (BIG OPLUS[c<-s] processChar(c)).words 88 92 89 makeInv(x:String,y:ZZ32):Map[\ZZ32, List[\String\]\] = do 90 var database:Map[\ZZ32,List[\String\]\] := {[\ZZ32,List[\String\]\]} 91 database := database.add(y, <|[\String\] x |>) 92 database 93 end 93 makeInv(x:String,y:ZZ32):Map[\ZZ32, List[\String\]\] = 94 {[\ZZ32,List[\String\]\] y |-> <|[\String\] x |> } 94 95 95 96 opr UNIONSUM(a:Map[\CaseInsensitiveString,ZZ32\], … … 111 112 time(str:String,s,f): () = println(str " in " ((f-s)/10.0^6) "ms") 112 113 println("Processing file " name) 113 varrs:FileReadStream = FileReadStream(name)114 rs:FileReadStream = FileReadStream(name) 114 115 start = nanoTime() 115 116 database:Map[\CaseInsensitiveString, ZZ32\] = … … 118 119 acq = nanoTime() 119 120 time("Acquired words",start,acq) 120 varinvDatabase:Map[\ZZ32,List[\String\]\] =121 invDatabase:Map[\ZZ32,List[\String\]\] = 121 122 BIG UNIONUNION [(x,y) <-database] makeInv(x.asString(),y) 122 123 inv = nanoTime() 123 124 time("Inverted database",acq,inv) 124 try 125 for i <- seq(1#100) do 126 (c,m,newinv) = invDatabase.extractMaximum().get 127 invDatabase := newinv 128 println(m ": " c " times") 129 end 130 catch e 131 NotFound => println("end") 125 for (c,m) <- seq(invDatabase.reverse()) do 126 println(m ": " c " times") 132 127 end 133 128 finish = nanoTime() … … 138 133 139 134 run() = do 140 processFile(getEnvironment("fortress.autohome", "ProjectFortress/demos/") "testData") 135 (* change the final string below to "testData" for quick test, 136 "hamlet" for thorough test comparable to wordcount.fss. *) 137 processFile(getEnvironment("fortress.autohome", "ProjectFortress/demos/") "hamlet") 141 138 end 142 139 end

