| 1 | (******************************************************************************* |
|---|
| 2 | Generator-of-Generators Library is copyrighted free software by Kento Emoto |
|---|
| 3 | <emoto[at]ipl.t.u-tokyo.ac.jp> developed under the collaborative |
|---|
| 4 | research on Fortress Programming Language between Sun Microsystems, |
|---|
| 5 | Inc. and the University of Tokyo. |
|---|
| 6 | |
|---|
| 7 | You can redistribute it and/or modify it under the following |
|---|
| 8 | BSD-style license or the Sun Contributor Agreement that Kento Emoto signed. |
|---|
| 9 | |
|---|
| 10 | |
|---|
| 11 | Copyright (c) 2009 by Kento Emoto |
|---|
| 12 | All rights reserved. |
|---|
| 13 | |
|---|
| 14 | Redistribution and use in source and binary forms, with or without |
|---|
| 15 | modification, are permitted provided that the following conditions |
|---|
| 16 | are met: |
|---|
| 17 | |
|---|
| 18 | * Redistributions of source code must retain the above copyright |
|---|
| 19 | notice, this list of conditions and the following disclaimer. |
|---|
| 20 | * Redistributions in binary form must reproduce the above copyright |
|---|
| 21 | notice, this list of conditions and the following disclaimer |
|---|
| 22 | in the documentation and/or other materials provided with the |
|---|
| 23 | distribution. |
|---|
| 24 | * Neither the name of Kento Emoto nor the names of its |
|---|
| 25 | contributors may be used to endorse or promote products derived |
|---|
| 26 | from this software without specific prior written permission. |
|---|
| 27 | |
|---|
| 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|---|
| 29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
|---|
| 30 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|---|
| 31 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
|---|
| 32 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|---|
| 33 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|---|
| 34 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
|---|
| 35 | OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 36 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|---|
| 37 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|---|
| 38 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 39 | |
|---|
| 40 | |
|---|
| 41 | ******************************************************************************) |
|---|
| 42 | |
|---|
| 43 | component Generator22D |
|---|
| 44 | |
|---|
| 45 | import List.{...} |
|---|
| 46 | import Generator2.{...} |
|---|
| 47 | export Generator22D |
|---|
| 48 | |
|---|
| 49 | (*---------------------------------- Rows ----------------------------------*) |
|---|
| 50 | (* GoG rows generates all rows of the given 2D array. *) |
|---|
| 51 | |
|---|
| 52 | |
|---|
| 53 | rowsImpl[\E\](g) = |
|---|
| 54 | do |
|---|
| 55 | bs = g.bounds() |
|---|
| 56 | (il, jl) = bs.lower() |
|---|
| 57 | (iu, ju) = bs.upper() |
|---|
| 58 | array[\Generator[\E\]\](iu-il+1).fill(fn i =>g[(i,jl)#(1,ju-jl+1)]) |
|---|
| 59 | end |
|---|
| 60 | |
|---|
| 61 | (* Generator2 Rows *) |
|---|
| 62 | object Rows[\ E, nat b0, nat s0, nat b1, nat s1 \](g : Array2[\E, b0, s0, b1, s1\]) extends Generator2[\ E \] |
|---|
| 63 | getter seed() : Generator[\ E \] = g |
|---|
| 64 | toString() : String = rowsImpl[\ E \](g).toString() |
|---|
| 65 | |
|---|
| 66 | (* actual generation of the nested data structure *) |
|---|
| 67 | generate[\ R \](r : Reduction[\ R \], body : Generator[\ E \] -> R) : R = do |
|---|
| 68 | rowsImpl[\ E \](g).generate[\ R \](r, body) |
|---|
| 69 | end |
|---|
| 70 | |
|---|
| 71 | theorems[\R, L1, L2\]() = |
|---|
| 72 | <|[\((ActualReduction[\ R, L1 \], ActualReduction[\ R, L2 \], E -> R)->Boolean, (ActualReduction[\ R, L1 \], ActualReduction[\ R, L2 \], E -> R)->R)\] |
|---|
| 73 | (fn (r, q, f) => conditionAbiding[\ R, L1, L2 \](r, q, f), fn (r, q, f) => efficientImplAbiding[\ R, L1, L2 \](r, q, f)) |
|---|
| 74 | |> |
|---|
| 75 | |
|---|
| 76 | (*--- conditions ---*) |
|---|
| 77 | conditionAbiding[\ R, L1, L2 \](q : ActualReduction[\ R, L1 \], r : ActualReduction[\ R, L2 \], f : E -> R) : Boolean = abiding(q, r) |
|---|
| 78 | |
|---|
| 79 | (*--- implementation using the abide-tree reduction *) |
|---|
| 80 | efficientImplAbiding[\ R, L1, L2 \](q : ActualReduction[\ R, L1 \], r : ActualReduction[\ R, L2 \], f : E -> R) = do |
|---|
| 81 | |
|---|
| 82 | (* println("Abide!") *) |
|---|
| 83 | |
|---|
| 84 | (* we assume that two operators have the same lifting, i.e., L1 = L2 *) |
|---|
| 85 | r.unlift(GeneratorATWrapper(seed()).generateAT[\L1\](q, r, fn a => r.lift(f(a)))) |
|---|
| 86 | end |
|---|
| 87 | |
|---|
| 88 | end |
|---|
| 89 | |
|---|
| 90 | rows[\ E, nat b0, nat s0, nat b1, nat s1 \](g : Array2[\E, b0, s0, b1, s1\]) : Rows[\E, b0, s0, b1, s1\] = Rows[\E, b0, s0, b1, s1\](g) |
|---|
| 91 | |
|---|
| 92 | |
|---|
| 93 | (*---------------------------------- Cols ----------------------------------*) |
|---|
| 94 | (* GoG cols generates all columns of the given 2D array. *) |
|---|
| 95 | |
|---|
| 96 | colsImpl[\E\](g) = |
|---|
| 97 | do |
|---|
| 98 | bs = g.bounds() |
|---|
| 99 | (il, jl) = bs.lower() |
|---|
| 100 | (iu, ju) = bs.upper() |
|---|
| 101 | array[\Generator[\E\]\](ju-jl+1).fill(fn j =>g[(il,j)#(iu-il+1,1)]) |
|---|
| 102 | end |
|---|
| 103 | |
|---|
| 104 | (* Generator2 Cols *) |
|---|
| 105 | object Cols[\ E, nat b0, nat s0, nat b1, nat s1 \](g : Array2[\E, b0, s0, b1, s1\]) extends Generator2[\ E \] |
|---|
| 106 | getter seed() : Generator[\ E \] = g |
|---|
| 107 | toString() : String = colsImpl[\ E \](g).toString() |
|---|
| 108 | |
|---|
| 109 | (* actual generation of the nested data structure *) |
|---|
| 110 | generate[\ R \](r : Reduction[\ R \], body : Generator[\ E \] -> R) : R = do |
|---|
| 111 | colsImpl[\ E \](g).generate[\ R \](r, body) |
|---|
| 112 | end |
|---|
| 113 | |
|---|
| 114 | theorems[\R, L1, L2\]() = |
|---|
| 115 | <|[\((ActualReduction[\ R, L1 \], ActualReduction[\ R, L2 \], E -> R)->Boolean, (ActualReduction[\ R, L1 \], ActualReduction[\ R, L2 \], E -> R)->R)\] |
|---|
| 116 | (fn (r, q, f) => conditionAbiding[\ R, L1, L2 \](r, q, f), fn (r, q, f) => efficientImplAbiding[\ R, L1, L2 \](r, q, f)) |
|---|
| 117 | |> |
|---|
| 118 | |
|---|
| 119 | (*--- conditions ---*) |
|---|
| 120 | conditionAbiding[\ R, L1, L2 \](q : ActualReduction[\ R, L1 \], r : ActualReduction[\ R, L2 \], f : E -> R) : Boolean = abiding(q, r) |
|---|
| 121 | |
|---|
| 122 | (*--- implementation using the abide-tree reduction *) |
|---|
| 123 | efficientImplAbiding[\ R, L1, L2 \](q : ActualReduction[\ R, L1 \], r : ActualReduction[\ R, L2 \], f : E -> R) = do |
|---|
| 124 | |
|---|
| 125 | (* println("Abide!") *) |
|---|
| 126 | |
|---|
| 127 | (* we assume that two operators have the same lifting, i.e., L1 = L2 *) |
|---|
| 128 | r.unlift(GeneratorATWrapper(seed()).generateAT[\L1\](r, q, fn a => r.lift(f(a)))) |
|---|
| 129 | end |
|---|
| 130 | |
|---|
| 131 | end |
|---|
| 132 | |
|---|
| 133 | cols[\ E, nat b0, nat s0, nat b1, nat s1 \](g : Array2[\E, b0, s0, b1, s1\]) : Cols[\E, b0, s0, b1, s1\] = Cols[\E, b0, s0, b1, s1\](g) |
|---|
| 134 | |
|---|
| 135 | (*---------------------------------- Rects ----------------------------------*) |
|---|
| 136 | |
|---|
| 137 | rectsImpl[\E\](g) = |
|---|
| 138 | do |
|---|
| 139 | bs = g.bounds() |
|---|
| 140 | (il, jl) = bs.lower() |
|---|
| 141 | (iu, ju) = bs.upper() |
|---|
| 142 | s0 = iu - il + 1 |
|---|
| 143 | s1 = ju - jl + 1 |
|---|
| 144 | ss0 = (s0 (s0+1))/2 |
|---|
| 145 | ss1 = (s1 (s1+1))/2 |
|---|
| 146 | ret = array[\Generator[\E\]\](ss0, ss1).fill( |
|---|
| 147 | fn (i,j) => do |
|---|
| 148 | |
|---|
| 149 | est_n(u) = narrow(|\ SQRT(2 u) /|) |
|---|
| 150 | ss(n) = ((n-1) n) / 2 |
|---|
| 151 | chk(u, n) = do d = u - ss n; d > 0 AND d <= n end |
|---|
| 152 | fst(a,b) = a |
|---|
| 153 | snd(a,b) = b |
|---|
| 154 | |
|---|
| 155 | str_n(u) = do en = est_n u; cs = <|(chk(u,n),n) | n <- <|en-1, en, en+1|> |>; n = if fst cs[0] then snd cs[0] elif fst cs[1] then snd cs[1] elif fst cs[2] then snd cs[2] end; (n - 1, n - (u - ss n)) end |
|---|
| 156 | |
|---|
| 157 | (i0, i1) : (ZZ32, ZZ32) = str_n(ss0 - i) |
|---|
| 158 | (j0, j1) : (ZZ32, ZZ32) = str_n(ss1 - j) |
|---|
| 159 | |
|---|
| 160 | g[(iu-i0,ju-j0)#(i1+1,j1+1)] |
|---|
| 161 | end |
|---|
| 162 | ) |
|---|
| 163 | |
|---|
| 164 | (* println(BIG //[r <- rows ret] (BIG |||[a <- r] a)) *) |
|---|
| 165 | ret |
|---|
| 166 | end |
|---|
| 167 | |
|---|
| 168 | (* Generator2 Rects *) |
|---|
| 169 | (* wanted to be Array2? (GGs on 2D is preferred to be Array2?) *) |
|---|
| 170 | object Rects[\ E, nat b0, nat s0, nat b1, nat s1 \](g : Array2[\E, b0, s0, b1, s1\]) extends {Generator2[\ E \]} |
|---|
| 171 | getter seed() : Generator[\ E \] = g |
|---|
| 172 | toString() : String = rectsImpl[\ E \](g).toString() |
|---|
| 173 | |
|---|
| 174 | (* actual generation of the nested data structure *) |
|---|
| 175 | generate[\ R \](r : Reduction[\ R \], body : Generator[\ E \] -> R) : R = do |
|---|
| 176 | rectsImpl[\ E \](g).generate[\ R \](r, body) |
|---|
| 177 | end |
|---|
| 178 | |
|---|
| 179 | theorems[\R, L1, L2\]() = |
|---|
| 180 | <|[\((ActualReduction[\ R, L1 \], ActualReduction[\ R, L2 \], E -> R)->Boolean, (ActualReduction[\ R, L1 \], ActualReduction[\ R, L2 \], E -> R)->R)\] |
|---|
| 181 | (fn (r, q, f) => conditionDistributive[\ R, L1, L2 \](r, q, f), fn (r, q, f) => efficientImplDistributive[\ R, L1, L2 \](r, q, f)) |
|---|
| 182 | |> |
|---|
| 183 | |
|---|
| 184 | (*--- conditions ---*) |
|---|
| 185 | conditionDistributive[\ R, L1, L2 \](q : ActualReduction[\ R, L1 \], r : ActualReduction[\ R, L2 \], f : E -> R) : Boolean = distributes(q, r) |
|---|
| 186 | |
|---|
| 187 | (*--- wrapper methods for lifting/unlifting. *) |
|---|
| 188 | efficientImplDistributive[\ R, L1, L2 \](q : ActualReduction[\ R, L1 \], r : ActualReduction[\ R, L2 \], f : E -> R) = do |
|---|
| 189 | (* println("efficientImplDistributive") *) |
|---|
| 190 | |
|---|
| 191 | rp = r.distribute(q) |
|---|
| 192 | i = rp.inner() |
|---|
| 193 | o = rp.outer() |
|---|
| 194 | (* we suppose that the operators i and o are lifted to L1. |
|---|
| 195 | This lifting type should be replaced with the type L of ReductionPair[\R, L\] returned by r.distribute(q). |
|---|
| 196 | But, we do not have any way to capture the type L.... |
|---|
| 197 | *) |
|---|
| 198 | o.unlift(efficientImplDistributiveImpl[\L1\](o, i, fn a => i.lift(f(a)))) |
|---|
| 199 | end |
|---|
| 200 | (*--- efficient implementations for some conditions ---*) |
|---|
| 201 | efficientImplDistributiveImpl[\ R \](q : Reduction[\ R \], r : Reduction[\ R \], f : E -> R) = do |
|---|
| 202 | (* defintions of operators and functions are found below. *) |
|---|
| 203 | op = MSS2DReductionAbove[\R\](q, r) |
|---|
| 204 | ot = MSS2DReductionBeside[\R\](q, r) |
|---|
| 205 | |
|---|
| 206 | |
|---|
| 207 | tup = op.unlift(GeneratorATWrapper(seed()).generateAT[\AnyMaybe\](op, ot, fn a => op.lift(mssBody[\R\](f(a))))) |
|---|
| 208 | tup.b |
|---|
| 209 | end |
|---|
| 210 | |
|---|
| 211 | end |
|---|
| 212 | |
|---|
| 213 | rects[\ E, nat b0, nat s0, nat b1, nat s1 \](g : Array2[\E, b0, s0, b1, s1\]) : Rects[\E, b0, s0, b1, s1\] = Rects[\E, b0, s0, b1, s1\](g) |
|---|
| 214 | |
|---|
| 215 | |
|---|
| 216 | |
|---|
| 217 | (*-------------------- for abide-tree reductions. -------------------------*) |
|---|
| 218 | trait GeneratorAT[\E\] |
|---|
| 219 | generateAT[\R\](op:Reduction[\R\], ot:Reduction[\R\], body: E->R) : R |
|---|
| 220 | end |
|---|
| 221 | |
|---|
| 222 | object GeneratorATWrapper[\E, nat b1, nat s1, nat b2, nat s2\](x : Array2[\E, b1, s1, b2, s2\]) extends GeneratorAT[\E\] |
|---|
| 223 | generateAT[\R\](op:Reduction[\R\], ot:Reduction[\R\], body: E->R) : R = do |
|---|
| 224 | loop'(lo1:E,hi1:E,lo2:E,hi2:E) = |
|---|
| 225 | if lo1=hi1 AND lo2=hi2 then body(x[(lo1,lo2)]) |
|---|
| 226 | elif lo1=hi1 then |
|---|
| 227 | split2 = partitionL((lo2 BITXOR hi2)+1) |
|---|
| 228 | mid2 = hi2 BITAND (BITNOT (split2-1)) |
|---|
| 229 | ot.join(loop'(lo1,hi1,lo2,mid2-1),loop'(lo1,hi1,mid2,hi2)) |
|---|
| 230 | elif lo2=hi2 then |
|---|
| 231 | split1 = partitionL((lo1 BITXOR hi1)+1) |
|---|
| 232 | mid1 = hi1 BITAND (BITNOT (split1-1)) |
|---|
| 233 | op.join(loop'(lo1,mid1-1,lo2,hi2),loop'(mid1,hi1,lo2,hi2)) |
|---|
| 234 | else |
|---|
| 235 | split2 = partitionL((lo2 BITXOR hi2)+1) |
|---|
| 236 | mid2 = hi2 BITAND (BITNOT (split2-1)) |
|---|
| 237 | split1 = partitionL((lo1 BITXOR hi1)+1) |
|---|
| 238 | mid1 = hi1 BITAND (BITNOT (split1-1)) |
|---|
| 239 | op.join(ot.join(loop'(lo1,mid1-1,lo2,mid2-1),loop'(lo1,mid1-1,mid2,hi2)), |
|---|
| 240 | ot.join(loop'(mid1,hi1,lo2,mid2-1),loop'(mid1,hi1,mid2,hi2))) |
|---|
| 241 | end |
|---|
| 242 | loop'(b1,b1+s1-1,b2,b2+s2-1) |
|---|
| 243 | end |
|---|
| 244 | end |
|---|
| 245 | |
|---|
| 246 | (* for nested abide-tree reductions *) |
|---|
| 247 | trait Generator2AT[\E\] extends { Generator2[\ E \], GeneratorAT[\GeneratorAT[\E\]\] } |
|---|
| 248 | generate2AT[\R\](op:Reduction[\R\], ot:Reduction[\R\], om:Reduction[\R\], od:Reduction[\R\], body: E->R) : R |
|---|
| 249 | theoremsAT[\R\]() : List[\((Reduction[\ R \], Reduction[\ R \], Reduction[\ R \], Reduction[\ R \], E -> R)->Boolean, |
|---|
| 250 | (Reduction[\ R \], Reduction[\ R \], Reduction[\ R \], Reduction[\ R \], E -> R)->R)\] = <| |> |
|---|
| 251 | naiveImplAT[\ R \](op:Reduction[\R\], ot:Reduction[\R\], om:Reduction[\R\], od:Reduction[\R\], f : E -> R) : R = |
|---|
| 252 | generateAT[\R\](op, ot, (fn (x) => x.generateAT[\R\](om,od,f))) |
|---|
| 253 | end |
|---|
| 254 | |
|---|
| 255 | |
|---|
| 256 | |
|---|
| 257 | |
|---|
| 258 | |
|---|
| 259 | (*------------------------ for optimization of rects ---------------------- *) |
|---|
| 260 | |
|---|
| 261 | (* an exception for size-mismatching in array operations. *) |
|---|
| 262 | object SizeMismatchException end |
|---|
| 263 | |
|---|
| 264 | (* zipwith operations *) |
|---|
| 265 | zipWith(f, a, b, c) = a.ivmap(fn (ij, v) => do (i, j) = ij; f (v, b[(i,j)], c[(i,j)]) end ) |
|---|
| 266 | zipWith(f, a, b) = a.ivmap(fn (ij, v) => do (i, j) = ij; f (v, b[(i,j)]) end) |
|---|
| 267 | |
|---|
| 268 | (* generalized matrix multiplication: op and ot are used instead of + and * *) |
|---|
| 269 | gemm[\T, nat s0, nat s1, nat s2 \](op:Reduction[\T\], ot:Reduction[\T\], sel:Array2[\T,0,s0,0,s1\], other: Array2[\T,0,s1,0,s2\]): Array2[\T, 0, s0, 0, s2\] = do |
|---|
| 270 | res = array2[\T, s0, s2\]() |
|---|
| 271 | mma(a:ZZ32,i:ZZ32,b:ZZ32,j:ZZ32,c:ZZ32,k:ZZ32):() = |
|---|
| 272 | if k>=i AND k>=j then |
|---|
| 273 | if k=1 then |
|---|
| 274 | pr : T = ot.join(sel.get(a,b), other.get(b,c)) |
|---|
| 275 | (* If this were atomic, we could parallelize j-partition. *) |
|---|
| 276 | res.put((a,c), op.join(res.get(a,c), pr)) |
|---|
| 277 | else |
|---|
| 278 | (k0,k1) = partition(k) |
|---|
| 279 | (mma(a,i,b,j,c,k0),mma(a,i,b,j,c+k0,k1)) |
|---|
| 280 | end |
|---|
| 281 | elif j>=i then |
|---|
| 282 | (j0,j1) = partition(j) |
|---|
| 283 | mma(a,i,b,j0,c,k) |
|---|
| 284 | mma(a,i,b+j0,j1,c,k) |
|---|
| 285 | else |
|---|
| 286 | (i0,i1) = partition(i) |
|---|
| 287 | (mma(a,i0,b,j,c,k),mma(a+i0,i1,b,j,c,k)) |
|---|
| 288 | end |
|---|
| 289 | mm(a:ZZ32,i:ZZ32,b:ZZ32,j:ZZ32,c:ZZ32,k:ZZ32):() = |
|---|
| 290 | if k>=i AND k>=j then |
|---|
| 291 | if k=1 then |
|---|
| 292 | res.put((a,c), ot.join(sel.get(a,b), other.get(b,c))) |
|---|
| 293 | else |
|---|
| 294 | (k0,k1) = partition(k) |
|---|
| 295 | (mm(a,i,b,j,c,k0),mm(a,i,b,j,c+k0,k1)) |
|---|
| 296 | end |
|---|
| 297 | elif j>=i then |
|---|
| 298 | (j0,j1) = partition(j) |
|---|
| 299 | mm(a,i,b,j0,c,k) |
|---|
| 300 | mma(a,i,b+j0,j1,c,k) |
|---|
| 301 | else |
|---|
| 302 | (i0,i1) = partition(i) |
|---|
| 303 | (mm(a,i0,b,j,c,k),mm(a+i0,i1,b,j,c,k)) |
|---|
| 304 | end |
|---|
| 305 | if s0=0 OR s1=0 OR s2=0 then |
|---|
| 306 | res |
|---|
| 307 | else |
|---|
| 308 | mm(0,s0,0,s1,0,s2) |
|---|
| 309 | res |
|---|
| 310 | end |
|---|
| 311 | end |
|---|
| 312 | |
|---|
| 313 | (* concatenation of 2D arrays: [A] BSD [B] = [A B] *) |
|---|
| 314 | opr BSD[\T, nat s0, nat s1, nat s2\](x : Array2[\T, 0, s0, 0, s1\], y : Array2[\T, 0, s0, 0, s2\]) : Array2[\T, 0, s0, 0, s1 + s2\] = array2[\T, s0, s1 + s2\]().fill(fn (i, j) => if j < s1 then x[(i,j)] else y[(i, j - s1)] end) |
|---|
| 315 | |
|---|
| 316 | (* concatenation of 2D arrays: [A] ABV [B] = [A |
|---|
| 317 | B] *) |
|---|
| 318 | opr ABV[\T, nat s0, nat s1, nat s2\](x : Array2[\T, 0, s0, 0, s1\], y : Array2[\T, 0, s2, 0, s1\]) : Array2[\T, 0, s0 + s2, 0, s1\] = array2[\T, s0 + s2, s1\]().fill(fn (i, j) => if i < s0 then x[(i,j)] else y[(i-s0, j)] end) |
|---|
| 319 | |
|---|
| 320 | (* construction of block-upper-triangular array: |
|---|
| 321 | tri(A, B, C, z) = [A B |
|---|
| 322 | Z C] , where Z is an array filled with z (zero). |
|---|
| 323 | *) |
|---|
| 324 | tri[\T, nat s0, nat s1, nat s2, nat s3\](x : Array2[\T, 0, s0, 0, s1\], u: Array2[\T, 0, s0, 0, s2\], y : Array2[\T, 0, s2, 0, s3\], z : T) : Array2[\T, 0, s0 + s2, 0, s1 + s3\] = array2[\T, s0 + s2, s1 + s3\]().fill(fn (i, j) => if i < s0 AND j < s1 then x[(i,j)] elif i < s0 AND j >= s1 then u[(i, j-s1)] elif i >= s0 AND j >= s1 then y[(i-s0, j-s1)] else z end) |
|---|
| 325 | |
|---|
| 326 | (* generates with function an array of the same size as the given array. *) |
|---|
| 327 | createArray2[\T, nat s0, nat s1\](_:Array2[\T, 0, s0, 0, s1\], f:(ZZ32,ZZ32)->T) : Array2[\T, 0, s0, 0, s1\] = array2[\T, s0, s1\]().fill(f) |
|---|
| 328 | |
|---|
| 329 | (* the lifted data structure for |
|---|
| 330 | oprimized computation of nested reduction with rects. *) |
|---|
| 331 | trait SomeMSS2DTuple[\T\] end |
|---|
| 332 | object MSS2DTuple[\T, nat s0, nat s1\]( |
|---|
| 333 | b : T, |
|---|
| 334 | n : Array2[\T, 0, s1, 0, s1\], |
|---|
| 335 | s : Array2[\T, 0, s1, 0, s1\], |
|---|
| 336 | e : Array2[\T, 0, s0, 0, s0\], |
|---|
| 337 | w : Array2[\T, 0, s0, 0, s0\], |
|---|
| 338 | ne: Array2[\T, 0, s0, 0, s1\], |
|---|
| 339 | nw: Array2[\T, 0, s0, 0, s1\], |
|---|
| 340 | se: Array2[\T, 0, s0, 0, s1\], |
|---|
| 341 | sw: Array2[\T, 0, s0, 0, s1\], |
|---|
| 342 | c : Array2[\T, 0, s1, 0, s1\], |
|---|
| 343 | r : Array2[\T, 0, s0, 0, s0\] |
|---|
| 344 | ) extends SomeMSS2DTuple[\T\] |
|---|
| 345 | getter size() : (ZZ32, ZZ32) = (s0, s1) |
|---|
| 346 | getter asString() : String = "(" b ", " (BIG ||| <|[\String\] "" n, "" s, "" e, "" w, "" ne, "" nw, "" se, "" sw, "" c, "" r|>) ")" |
|---|
| 347 | end |
|---|
| 348 | |
|---|
| 349 | mssBody[\T\](v:T) : SomeMSS2DTuple[\T\] = do |
|---|
| 350 | m = array2[\T,1,1\](v) |
|---|
| 351 | MSS2DTuple[\T,1,1\](v, m, m, m, m, m, m, m, m, m, m) |
|---|
| 352 | end |
|---|
| 353 | |
|---|
| 354 | __MSS2DTuple[\T, nat s0, nat s1\]( |
|---|
| 355 | b : T, |
|---|
| 356 | n : Array2[\T, 0, s1, 0, s1\], |
|---|
| 357 | s : Array2[\T, 0, s1, 0, s1\], |
|---|
| 358 | e : Array2[\T, 0, s0, 0, s0\], |
|---|
| 359 | w : Array2[\T, 0, s0, 0, s0\], |
|---|
| 360 | ne: Array2[\T, 0, s0, 0, s1\], |
|---|
| 361 | nw: Array2[\T, 0, s0, 0, s1\], |
|---|
| 362 | se: Array2[\T, 0, s0, 0, s1\], |
|---|
| 363 | sw: Array2[\T, 0, s0, 0, s1\], |
|---|
| 364 | c : Array2[\T, 0, s1, 0, s1\], |
|---|
| 365 | r : Array2[\T, 0, s0, 0, s0\] |
|---|
| 366 | ) : SomeMSS2DTuple[\T\] = MSS2DTuple[\T, s0, s1\](b, n, s, e, w, ne, nw, se, nw, c, r) |
|---|
| 367 | |
|---|
| 368 | (* annotation for operators with the `abide' property*) |
|---|
| 369 | trait AbideWith[\T\] end |
|---|
| 370 | |
|---|
| 371 | abiding[\R, Q\]( r : R, q : Q ) = |
|---|
| 372 | typecase(r, q) of |
|---|
| 373 | (R, AbideWith[\R\]) => true |
|---|
| 374 | else => false |
|---|
| 375 | end |
|---|
| 376 | |
|---|
| 377 | object MSS2DReductionAbove[\T\](op, ot) extends { AssociativeReduction[\SomeMSS2DTuple[\T\]\], AbideWith[\MSS2DReductionBeside[\T\]\] } |
|---|
| 378 | getter asString(): String = "MSS2DReductionAbove" |
|---|
| 379 | simpleJoin(x: SomeMSS2DTuple[\T\], y: SomeMSS2DTuple[\T\]): SomeMSS2DTuple[\T\] = |
|---|
| 380 | do |
|---|
| 381 | (s0x, s1x) = x.size() |
|---|
| 382 | (s0y, s1y) = y.size() |
|---|
| 383 | (* make sure s1x = s1y *) |
|---|
| 384 | if (s1x =/= s1y) then |
|---|
| 385 | throw SizeMismatchException |
|---|
| 386 | end |
|---|
| 387 | b = op.join(x.s.indices().generate[\T\](op, fn ij => ot.join(x.s[ij], y.n[ij])), op.join(x.b, y.b)) |
|---|
| 388 | n = createArray2(x.n, fn ij => op.join(x.n[ij], ot.join(x.c[ij], y.n[ij]))) |
|---|
| 389 | s = createArray2(x.s, fn ij => op.join(y.s[ij], ot.join(y.c[ij], x.s[ij]))) |
|---|
| 390 | e = tri(x.e, gemm(op, ot, x.se, y.ne.t()), y.e, op.empty()) |
|---|
| 391 | w = tri(x.w, gemm(op, ot, x.sw, y.nw.t()), y.w, op.empty()) |
|---|
| 392 | |
|---|
| 393 | ne = (x.ne ABV (createArray2(y.ne, fn ij => do (i, j) = ij; ot.join(y.ne[ij], x.ne[(s0x-1, j)]) end))) |
|---|
| 394 | nw = (x.nw ABV (createArray2(y.nw, fn ij => do (i, j) = ij; ot.join(y.nw[ij], x.nw[(s0x-1, j)]) end))) |
|---|
| 395 | se = ((createArray2(x.se, fn ij => do (i, j) = ij; ot.join(x.se[ij], y.se[(0, j)]) end) ABV y.se)) |
|---|
| 396 | sw = ((createArray2(x.sw, fn ij => do (i, j) = ij; ot.join(x.sw[ij], y.sw[(0, j)]) end) ABV y.sw)) |
|---|
| 397 | |
|---|
| 398 | c = createArray2(x.c, fn ij => ot.join(x.c[ij], y.c[ij])) |
|---|
| 399 | r = tri(x.r, gemm(op, ot, x.r[(0,s0x-1)#(s0x, 1)], y.r[(0,0)#(1,s0y)]), y.r, op.empty()) |
|---|
| 400 | __MSS2DTuple(b, n, s, e, w, ne, nw, se, nw, c, r) |
|---|
| 401 | end |
|---|
| 402 | end |
|---|
| 403 | |
|---|
| 404 | object MSS2DReductionBeside[\T\](op, ot) extends { AssociativeReduction[\SomeMSS2DTuple[\T\]\], AbideWith[\MSS2DReductionAbove[\T\]\] } |
|---|
| 405 | getter asString(): String = "MSS2DReductionBeside" |
|---|
| 406 | simpleJoin(x: SomeMSS2DTuple[\T\], y: SomeMSS2DTuple[\T\]): SomeMSS2DTuple[\T\] = |
|---|
| 407 | do |
|---|
| 408 | (s0x, s1x) = x.size() |
|---|
| 409 | (s0y, s1y) = y.size() |
|---|
| 410 | (* make sure s0x = s0y *) |
|---|
| 411 | if (s0x =/= s0y) then |
|---|
| 412 | throw SizeMismatchException |
|---|
| 413 | end |
|---|
| 414 | b = op.join(x.e.indices().generate[\T\](op, fn ij => ot.join(x.e[ij], y.w[ij])), op.join(x.b, y.b)) |
|---|
| 415 | |
|---|
| 416 | n = tri(x.n, gemm(op, ot, x.ne.t(), y.nw), y.n, op.empty()) |
|---|
| 417 | s = tri(x.s, gemm(op, ot, x.se.t(), y.sw), y.s, op.empty()) |
|---|
| 418 | e = createArray2(x.e, fn ij => op.join(y.e[ij], ot.join(y.r[ij], x.e[ij]))) |
|---|
| 419 | w = createArray2(x.w, fn ij => op.join(x.w[ij], ot.join(x.r[ij], y.w[ij]))) |
|---|
| 420 | |
|---|
| 421 | ne = ((createArray2 (x.ne, fn ij => do (i, j) = ij; ot.join(x.ne[ij], y.ne[(i, 0)]) end) BSD y.ne)) |
|---|
| 422 | |
|---|
| 423 | nw = (x.nw BSD (createArray2 (y.nw, fn ij => do (i, j) = ij; ot.join(y.nw[ij], x.nw[(i, s1x-1)]) end))) |
|---|
| 424 | se = ((createArray2(x.se, fn ij => do (i, j) = ij; ot.join(x.se[ij], y.se[(i, 0)]) end) BSD y.se)) |
|---|
| 425 | sw = (x.sw BSD (createArray2(y.sw, fn ij => do (i, j) = ij; ot.join(y.sw[ij], x.sw[(i, s1x-1)]) end))) |
|---|
| 426 | |
|---|
| 427 | |
|---|
| 428 | c = tri(x.c, gemm(op, ot, (x.c[(0,s1x-1)#(s1x,1)]), (y.c[(0,0)#(1,s1y)])), y.c, op.empty()) |
|---|
| 429 | r = createArray2(x.r,fn ij => ot.join(x.r[ij], y.r[ij])) |
|---|
| 430 | |
|---|
| 431 | __MSS2DTuple(b, n, s, e, w, ne, nw, se, nw, c, r) |
|---|
| 432 | end |
|---|
| 433 | end |
|---|
| 434 | |
|---|
| 435 | |
|---|
| 436 | |
|---|
| 437 | end |
|---|