| 1 | (******************************************************************************* |
|---|
| 2 | Copyright 2008 Sun Microsystems, Inc., |
|---|
| 3 | 4150 Network Circle, Santa Clara, California 95054, U.S.A. |
|---|
| 4 | All rights reserved. |
|---|
| 5 | |
|---|
| 6 | U.S. Government Rights - Commercial software. |
|---|
| 7 | Government users are subject to the Sun Microsystems, Inc. standard |
|---|
| 8 | license agreement and applicable provisions of the FAR and its supplements. |
|---|
| 9 | |
|---|
| 10 | Use is subject to license terms. |
|---|
| 11 | |
|---|
| 12 | This distribution may include materials developed by third parties. |
|---|
| 13 | |
|---|
| 14 | Sun, Sun Microsystems, the Sun logo and Java are trademarks or registered |
|---|
| 15 | trademarks of Sun Microsystems, Inc. in the U.S. and other countries. |
|---|
| 16 | ******************************************************************************) |
|---|
| 17 | |
|---|
| 18 | component CordedString |
|---|
| 19 | import List.{...} |
|---|
| 20 | export CordedString |
|---|
| 21 | |
|---|
| 22 | (* I believe that these should eventually go with the range code. *) |
|---|
| 23 | (*Intersection of two ranges *) |
|---|
| 24 | opr ∩ (r0: Range⟦ZZ32⟧, r1: Range⟦ZZ32⟧) = (r0.lower MAX r1.lower) : (r0.upper MIN r1.upper) |
|---|
| 25 | |
|---|
| 26 | test rangeIntersect() = do |
|---|
| 27 | assert((0:5) ∩ (0:3), 0:3) |
|---|
| 28 | assert((0:5) ∩ (3:7), 3:5) |
|---|
| 29 | assert((0:5) ∩ (7:10), 7:5) |
|---|
| 30 | assert((5:3).size, 0, "The size of 5:3 is not zero!") |
|---|
| 31 | assert((9:8).size, 0, "The size of 9:8 is not zero!") |
|---|
| 32 | assert( (5:3).isEmpty, "5:3 is not empty!") |
|---|
| 33 | assert( (9:8).isEmpty, "9:8 is not empty!") |
|---|
| 34 | assert( ((0:5) ∩ (7:10)).isEmpty, "0:5 has a non-empty intersection with 7:10 !") |
|---|
| 35 | assert( (7:9)≪7, (0#3), "7:9≪7 isn't 0#3" ) |
|---|
| 36 | end |
|---|
| 37 | |
|---|
| 38 | (* Shift a range left by the specified amount. *) |
|---|
| 39 | opr ≪(r: Range⟦ZZ32⟧, leftShift: ZZ32) = (r.lower - leftShift)#r.extent |
|---|
| 40 | (* Shift a range right by the specified amount. *) |
|---|
| 41 | opr ≫(r: Range⟦ZZ32⟧, rightShift: ZZ32) = (r.lower + rightShift)#r.extent |
|---|
| 42 | |
|---|
| 43 | test testShiftLeft(): () = do |
|---|
| 44 | assert( (10:12) ≪ 9, 1:3) |
|---|
| 45 | end |
|---|
| 46 | |
|---|
| 47 | test testShiftRight(): () = do |
|---|
| 48 | assert( (2:3) ≫ 5, 7:8) |
|---|
| 49 | end |
|---|
| 50 | |
|---|
| 51 | test testRangeContainment = do |
|---|
| 52 | assert( (2:3) ≤ (2:3), "(2:3) not less than or equal to (2:3)") |
|---|
| 53 | (* println "(2#2) CMP (2:3) = " ((2#2) CMP (2:3)) *) |
|---|
| 54 | assert( (2#2) ≤ (2:3), "(2#2) not less than or equal to (2:3)") |
|---|
| 55 | end |
|---|
| 56 | (* |
|---|
| 57 | ==== Shared Traits ==== |
|---|
| 58 | *) |
|---|
| 59 | |
|---|
| 60 | trait Concatenable extends String |
|---|
| 61 | opr || (self, other:String): String = |
|---|
| 62 | if |other| = 0 then self else CatString(self, other) end |
|---|
| 63 | opr || (self, _:EmptyString): String = self |
|---|
| 64 | opr || (self, other:Char) = CatString(self, other.toString) |
|---|
| 65 | end |
|---|
| 66 | (* |
|---|
| 67 | ==== CatString ==== |
|---|
| 68 | *) |
|---|
| 69 | |
|---|
| 70 | object CatString(left: String, right:String) extends {String, Concatenable} |
|---|
| 71 | size = left.size + right.size |
|---|
| 72 | depthField = 1 + (left.depth MAX right.depth) |
|---|
| 73 | getter depth() = depthField |
|---|
| 74 | getter bounds() = 0#size |
|---|
| 75 | getter generator() = ConcatGenerator(left.generator, right.generator) |
|---|
| 76 | opr CMP(self, other: String) = |
|---|
| 77 | if |left| ≥ |other| |
|---|
| 78 | then |
|---|
| 79 | left CMP other |
|---|
| 80 | else |
|---|
| 81 | (left CMP other[0#|left|]) LEXICO: (right CMP other[(|left|):]) |
|---|
| 82 | end |
|---|
| 83 | opr CASE_INSENSITIVE_CMP(self, other:String): TotalComparison = EqualTo |
|---|
| 84 | |
|---|
| 85 | get(i): Char = |
|---|
| 86 | (* get is, and should be, without bounds checks. [ ] does the bounds checks,a and then delegates to get *) |
|---|
| 87 | if i ∈ left.indices then left[i] else right[i - |left|] end |
|---|
| 88 | |
|---|
| 89 | opr[r0:Range⟦ZZ32⟧] : String = do |
|---|
| 90 | r1 = self.bounds[r0] (* This will complete r0 if it is incomplete, and throw a bounds exception when necessary *) |
|---|
| 91 | if r1.isEmpty then |
|---|
| 92 | EmptyString |
|---|
| 93 | else |
|---|
| 94 | self.uncheckedSubstring(r1) |
|---|
| 95 | end |
|---|
| 96 | end |
|---|
| 97 | |
|---|
| 98 | verify() = do |
|---|
| 99 | assert(self.depth, (left.depth MAX right.depth) + 1, self) |
|---|
| 100 | assert(self.size(), |left| + |right|, self) |
|---|
| 101 | end |
|---|
| 102 | |
|---|
| 103 | showStructure(indent: ZZ32) = do |
|---|
| 104 | margin(indent) |
|---|
| 105 | println ("C" || |self| "/" self.depth || ":" ) |
|---|
| 106 | left.showStructure(indent+8) |
|---|
| 107 | right.showStructure(indent+8) |
|---|
| 108 | end |
|---|
| 109 | |
|---|
| 110 | (* without SubString nodes: |
|---|
| 111 | |
|---|
| 112 | uncheckedSubstring(r0: Range⟦ZZ32⟧) = do |
|---|
| 113 | r1 = (r0 ≪ left.size) |
|---|
| 114 | left.uncheckedSubstring(r0 ∩ left.bounds) || right.uncheckedSubstring(r1 ∩ right.bounds) |
|---|
| 115 | end |
|---|
| 116 | *) |
|---|
| 117 | |
|---|
| 118 | uncheckedSubstring(r0: Range⟦ZZ32⟧) = do |
|---|
| 119 | leftBounds' = r0 ∩ left.bounds |
|---|
| 120 | r1 = (r0 ≪ left.size) |
|---|
| 121 | rightBounds' = r1 ∩ right.bounds |
|---|
| 122 | (* atomic do |
|---|
| 123 | println "uncheckedSubstring " r0 " of " |
|---|
| 124 | self.showStructure() |
|---|
| 125 | println "===============" |
|---|
| 126 | println "leftBounds' = " leftBounds' ( if leftBounds'.isEmpty then "(empty)" else "(non-empty)" end ) |
|---|
| 127 | println "rightBounds' = " rightBounds' ( if rightBounds'.isEmpty then "(empty)" else "(non-empty)" end ) |
|---|
| 128 | println "===============" |
|---|
| 129 | end *) |
|---|
| 130 | if leftBounds'.isEmpty then |
|---|
| 131 | right.uncheckedSubstring(rightBounds') |
|---|
| 132 | elif rightBounds'.isEmpty then |
|---|
| 133 | left.uncheckedSubstring(leftBounds') |
|---|
| 134 | else |
|---|
| 135 | SubString'(self, r0) |
|---|
| 136 | end |
|---|
| 137 | end |
|---|
| 138 | |
|---|
| 139 | println(self): () = do print(left) ; println(right) end |
|---|
| 140 | print(self): () = do print(left) ; print(right) end |
|---|
| 141 | |
|---|
| 142 | subdivide(): Maybe⟦Generator⟦(ZZ32, String)⟧⟧ = Just ⟨ (0, left), (|left|, right) ⟩ |
|---|
| 143 | |
|---|
| 144 | end |
|---|
| 145 | |
|---|
| 146 | object ConcatGenerator⟦T⟧(first:Generator⟦T⟧, second:Generator⟦T⟧) |
|---|
| 147 | extends Generator⟦T⟧ |
|---|
| 148 | generate⟦R⟧(r: Reduction⟦R⟧, body:T->R):R = |
|---|
| 149 | r.join(first.generate⟦R⟧(r, body), second.generate⟦R⟧(r, body)) |
|---|
| 150 | end |
|---|
| 151 | |
|---|
| 152 | (* |
|---|
| 153 | ==== EmptyString ==== |
|---|
| 154 | *) |
|---|
| 155 | |
|---|
| 156 | value object EmptyString extends {String, Concatenable} |
|---|
| 157 | getter size() = 0 |
|---|
| 158 | getter bounds() = 0#0 |
|---|
| 159 | getter isEmpty() = true |
|---|
| 160 | getter generator() = Nothing⟦Char⟧ |
|---|
| 161 | opr CMP(self, other: String) = |
|---|
| 162 | |self| CMP |other| |
|---|
| 163 | opr CASE_INSENSITIVE_CMP(self, other:String): TotalComparison = |self| CMP |other| |
|---|
| 164 | |
|---|
| 165 | get(i): Char = fail("Can't get characters from an empty string") |
|---|
| 166 | |
|---|
| 167 | opr[r: Range⟦ZZ32⟧] : String = do |
|---|
| 168 | rr = self.indices[r] (* to raise a bounds error *) |
|---|
| 169 | EmptyString |
|---|
| 170 | end |
|---|
| 171 | |
|---|
| 172 | verify() = do |
|---|
| 173 | assert(self.depth, 0, self) |
|---|
| 174 | assert(self.size, 0, self) |
|---|
| 175 | end |
|---|
| 176 | |
|---|
| 177 | showStructure(indent) = do |
|---|
| 178 | margin(indent) |
|---|
| 179 | println "E" |self| "/" self.depth |
|---|
| 180 | end |
|---|
| 181 | |
|---|
| 182 | uncheckedSubstring(r0: Range⟦ZZ32⟧) = do |
|---|
| 183 | assert r0.isEmpty |
|---|
| 184 | self |
|---|
| 185 | end |
|---|
| 186 | |
|---|
| 187 | opr || (self, other:String): String = other |
|---|
| 188 | opr || (self, other:Char) = other.toString |
|---|
| 189 | |
|---|
| 190 | print(self): () = do end |
|---|
| 191 | println(self): () = println("") |
|---|
| 192 | |
|---|
| 193 | end |
|---|
| 194 | |
|---|
| 195 | subdivide() = Nothing⟦Generator⟦(ZZ32, String)⟧⟧ |
|---|
| 196 | |
|---|
| 197 | (* |
|---|
| 198 | ==== SubString ==== |
|---|
| 199 | *) |
|---|
| 200 | trait SubString extends {String, Concatenable} |
|---|
| 201 | comprises {SubString'} |
|---|
| 202 | end |
|---|
| 203 | |
|---|
| 204 | object SubString'(parent: String, range: Range⟦ZZ32⟧) extends SubString |
|---|
| 205 | getter size() = range.size |
|---|
| 206 | getter depth() = parent.depth (* Why not one more than the parent? Would this break the balancing invariant? *) |
|---|
| 207 | getter isEmpty() = false |
|---|
| 208 | |
|---|
| 209 | opr CMP(self, other: String) = do |
|---|
| 210 | (* if pgen ← parent.subdivide() then |
|---|
| 211 | BIG LEXICO [(start, str) ← pgen] self.uncheckedSubstring((start#|str|) ∩ self.bounds) CMP str |
|---|
| 212 | else |
|---|
| 213 | *) |
|---|
| 214 | (BIG LEXICO [i ← 0#(|self| MIN |other|)] self.get(i) CMP other.get(i)) LEXICO (|self| CMP |other|) |
|---|
| 215 | (* end |
|---|
| 216 | *) |
|---|
| 217 | end |
|---|
| 218 | |
|---|
| 219 | |
|---|
| 220 | verify() = do |
|---|
| 221 | parent.verify() |
|---|
| 222 | deny(range.isEmpty, "SubString (" range ") has empty range") |
|---|
| 223 | assert(range < parent.bounds, "SubString (" range ") has range equal to or greater than that of parent (" parent.bounds ")") |
|---|
| 224 | end |
|---|
| 225 | showStructure(indent) = do |
|---|
| 226 | margin(indent) |
|---|
| 227 | println "S" |self| "=[" range "]" |
|---|
| 228 | parent.showStructure(indent + 8) |
|---|
| 229 | end |
|---|
| 230 | |
|---|
| 231 | get(i:ZZ32) = do |
|---|
| 232 | assert(i < range.size, "getting char " i " from a substring of size " |range|) |
|---|
| 233 | parent.get(range.lower+i) |
|---|
| 234 | end |
|---|
| 235 | |
|---|
| 236 | uncheckedSubstring(r0: Range⟦ZZ32⟧) = do |
|---|
| 237 | assert((r0≫range.lower) ≤ range, "asking for substring [" r0 "] of S" self.size) |
|---|
| 238 | if r0.isEmpty then EmptyString |
|---|
| 239 | elif r0 = self.bounds then println "trivial substring"; self |
|---|
| 240 | else SubString'(parent, r0≫range.lower) |
|---|
| 241 | end |
|---|
| 242 | end |
|---|
| 243 | |
|---|
| 244 | println(self): () = do print(self) ; println() end |
|---|
| 245 | print(self): () = for ch ← seq(self) do print ch end |
|---|
| 246 | |
|---|
| 247 | subdivide(): Maybe⟦Generator⟦(ZZ32, String)⟧⟧ = do |
|---|
| 248 | (* The pairs (start, str) that are generated are such that start[0] = 0 and start[i] = | str[0] || ... || str[i-1] | |
|---|
| 249 | and str[0] || str [1] || ... || str[n] = self |
|---|
| 250 | *) |
|---|
| 251 | pieces = parent.subdivide() |
|---|
| 252 | offset = range.lower |
|---|
| 253 | if pgen ← pieces then |
|---|
| 254 | <| if (start#piece.size) ≤ range then |
|---|
| 255 | (start-offset, piece) |
|---|
| 256 | else |
|---|
| 257 | range' = range ∩ (start#piece.size) |
|---|
| 258 | (range'.lower-offset, SubString'(piece, range'≪start)) |
|---|
| 259 | end | (start, piece) ← pgen, (range ∩ (start#piece.size)).nonEmpty |> |
|---|
| 260 | else |
|---|
| 261 | Nothing⟦Generator⟦(ZZ32,String)⟧⟧ |
|---|
| 262 | end |
|---|
| 263 | end |
|---|
| 264 | end |
|---|
| 265 | |
|---|
| 266 | margin(indent) = do |
|---|
| 267 | for i ← 0#indent do print " " end |
|---|
| 268 | end |
|---|
| 269 | |
|---|
| 270 | end |
|---|