1 // Copyright (c) 1994 James Clark 2 // See the file COPYING for copying permission. 3 #pragma ident "%Z%%M% %I% %E% SMI" 4 5 #ifdef __GNUG__ 6 #pragma implementation 7 #endif 8 #include "splib.h" 9 #include "Text.h" 10 #include "Entity.h" 11 // for memcmp() 12 #include <string.h> 13 14 #ifdef SP_NAMESPACE 15 namespace SP_NAMESPACE { 16 #endif 17 18 Text::Text() 19 { 20 } 21 22 void Text::addChar(Char c, const Location &loc) 23 { 24 if (items_.size() == 0 25 || items_.back().type != TextItem::data 26 || loc.origin().pointer() != items_.back().loc.origin().pointer() 27 || loc.index() != (items_.back().loc.index() 28 + (chars_.size() - items_.back().index))) { 29 items_.resize(items_.size() + 1); 30 items_.back().loc = loc; 31 items_.back().type = TextItem::data; 32 items_.back().index = chars_.size(); 33 } 34 chars_ += c; 35 } 36 37 void Text::addChars(const Char *p, size_t length, const Location &loc) 38 { 39 if (items_.size() == 0 40 || items_.back().type != TextItem::data 41 || loc.origin().pointer() != items_.back().loc.origin().pointer() 42 || loc.index() != (items_.back().loc.index() 43 + (chars_.size() - items_.back().index))) { 44 items_.resize(items_.size() + 1); 45 items_.back().loc = loc; 46 items_.back().type = TextItem::data; 47 items_.back().index = chars_.size(); 48 } 49 chars_.append(p, length); 50 } 51 52 void Text::addCdata(const InternalEntity *entity, 53 const ConstPtr<Origin> &origin) 54 { 55 addSimple(TextItem::cdata, Location(origin, 0)); 56 chars_.append(entity->string().data(), entity->string().size()); 57 } 58 59 void Text::addSdata(const InternalEntity *entity, 60 const ConstPtr<Origin> &origin) 61 { 62 addSimple(TextItem::sdata, Location(origin, 0)); 63 chars_.append(entity->string().data(), entity->string().size()); 64 } 65 66 void Text::addNonSgmlChar(Char c, const Location &loc) 67 { 68 addSimple(TextItem::nonSgml, loc); 69 chars_ += c; 70 } 71 72 void Text::addCharsTokenize(const Char *str, size_t n, const Location &loc, 73 Char space) 74 { 75 Location loci(loc); 76 // FIXME speed this up 77 for (size_t i = 0; i < n; loci += 1, i++) { 78 if (str[i] == space && (size() == 0 || lastChar() == space)) 79 ignoreChar(str[i], loci); 80 else 81 addChar(str[i], loci); 82 } 83 } 84 85 void Text::tokenize(Char space, Text &text) const 86 { 87 TextIter iter(*this); 88 TextItem::Type type; 89 const Char *p; 90 size_t n; 91 const Location *loc; 92 while (iter.next(type, p, n, loc)) { 93 switch (type) { 94 case TextItem::data: 95 text.addCharsTokenize(p, n, *loc, space); 96 break; 97 case TextItem::sdata: 98 case TextItem::cdata: 99 { 100 text.addEntityStart(*loc); 101 text.addCharsTokenize(p, n, *loc, space); 102 Location tem(*loc); 103 tem += n; 104 text.addEntityEnd(tem); 105 } 106 break; 107 case TextItem::ignore: 108 text.ignoreChar(*p, *loc); 109 break; 110 default: 111 text.addSimple(type, *loc); 112 break; 113 } 114 } 115 if (text.size() > 0 && text.lastChar() == space) 116 text.ignoreLastChar(); 117 } 118 119 void Text::addSimple(TextItem::Type type, const Location &loc) 120 { 121 items_.resize(items_.size() + 1); 122 items_.back().loc = loc; 123 items_.back().type = type; 124 items_.back().index = chars_.size(); 125 } 126 127 void Text::ignoreChar(Char c, const Location &loc) 128 { 129 items_.resize(items_.size() + 1); 130 items_.back().loc = loc; 131 items_.back().type = TextItem::ignore; 132 items_.back().c = c; 133 items_.back().index = chars_.size(); 134 } 135 136 void Text::ignoreLastChar() 137 { 138 size_t lastIndex = chars_.size() - 1; 139 size_t i; 140 for (i = items_.size() - 1; items_[i].index > lastIndex; i--) 141 ; 142 // lastIndex >= items_[i].index 143 if (items_[i].index != lastIndex) { 144 items_.resize(items_.size() + 1); 145 i++; 146 for (size_t j = items_.size() - 1; j > i; j--) 147 items_[j] = items_[j - 1]; 148 items_[i].index = lastIndex; 149 items_[i].loc = items_[i - 1].loc; 150 items_[i].loc += lastIndex - items_[i - 1].index; 151 } 152 153 items_[i].c = chars_[chars_.size() - 1]; 154 items_[i].type = TextItem::ignore; 155 for (size_t j = i + 1; j < items_.size(); j++) 156 items_[j].index = lastIndex; 157 chars_.resize(chars_.size() - 1); 158 } 159 160 // All characters other than spaces are substed. 161 162 void Text::subst(const SubstTable<Char> &table, Char space) 163 { 164 for (size_t i = 0; i < items_.size(); i++) 165 if (items_[i].type == TextItem::data) { 166 size_t lim = (i + 1 < items_.size() 167 ? items_[i + 1].index 168 : chars_.size()); 169 size_t j; 170 for (j = items_[i].index; j < lim; j++) { 171 Char c = chars_[j]; 172 if (c != space && c != table[c]) 173 break; 174 } 175 if (j < lim) { 176 size_t start = items_[i].index; 177 StringC origChars(chars_.data() + start, lim - start); 178 for (; j < lim; j++) 179 if (chars_[j] != space) 180 table.subst(chars_[j]); 181 items_[i].loc = Location(new MultiReplacementOrigin(items_[i].loc, 182 origChars), 183 0); 184 } 185 } 186 } 187 188 void Text::clear() 189 { 190 chars_.resize(0); 191 items_.clear(); 192 } 193 194 Boolean Text::startDelimLocation(Location &loc) const 195 { 196 if (items_.size() == 0 || items_[0].type != TextItem::startDelim) 197 return 0; 198 loc = items_[0].loc; 199 return 1; 200 } 201 202 Boolean Text::endDelimLocation(Location &loc) const 203 { 204 if (items_.size() == 0) 205 return 0; 206 switch (items_.back().type) { 207 case TextItem::endDelim: 208 case TextItem::endDelimA: 209 break; 210 default: 211 return 0; 212 } 213 loc = items_.back().loc; 214 return 1; 215 } 216 217 Boolean Text::delimType(Boolean &lita) const 218 { 219 if (items_.size() == 0) 220 return 0; 221 switch (items_.back().type) { 222 case TextItem::endDelim: 223 lita = 0; 224 return 1; 225 case TextItem::endDelimA: 226 lita = 1; 227 return 1; 228 default: 229 break; 230 } 231 return 0; 232 } 233 234 TextItem::TextItem() 235 { 236 } 237 238 void Text::swap(Text &to) 239 { 240 items_.swap(to.items_); 241 chars_.swap(to.chars_); 242 } 243 244 TextIter::TextIter(const Text &text) 245 : ptr_(text.items_.begin()), text_(&text) 246 { 247 } 248 249 const Char *TextIter::chars(size_t &length) const 250 { 251 if (ptr_->type == TextItem::ignore) { 252 length = 1; 253 return &ptr_->c; 254 } 255 else { 256 const StringC &chars = text_->chars_; 257 size_t charsIndex = ptr_->index; 258 if (ptr_ + 1 != text_->items_.begin() + text_->items_.size()) 259 length = ptr_[1].index - charsIndex; 260 else 261 length = chars.size() - charsIndex; 262 return chars.data() + charsIndex; 263 } 264 } 265 266 Boolean TextIter::next(TextItem::Type &type, const Char *&str, size_t &length, 267 const Location *&loc) 268 { 269 const TextItem *end = text_->items_.begin() + text_->items_.size(); 270 if (ptr_ == end) 271 return 0; 272 type = ptr_->type; 273 loc = &ptr_->loc; 274 if (type == TextItem::ignore) { 275 str = &ptr_->c; 276 length = 1; 277 } 278 else { 279 const StringC &chars = text_->chars_; 280 size_t charsIndex = ptr_->index; 281 str = chars.data() + charsIndex; 282 if (ptr_ + 1 != end) 283 length = ptr_[1].index - charsIndex; 284 else 285 length = chars.size() - charsIndex; 286 } 287 ptr_++; 288 return 1; 289 } 290 291 void Text::insertChars(const StringC &s, const Location &loc) 292 { 293 chars_.insert(0, s); 294 items_.resize(items_.size() + 1); 295 for (size_t i = items_.size() - 1; i > 0; i--) { 296 items_[i] = items_[i - 1]; 297 items_[i].index += s.size(); 298 } 299 items_[0].loc = loc; 300 items_[0].type = TextItem::data; 301 items_[0].index = 0; 302 } 303 304 size_t Text::normalizedLength(size_t normsep) const 305 { 306 size_t n = size(); 307 n += normsep; 308 for (size_t i = 0; i < items_.size(); i++) 309 switch (items_[i].type) { 310 case TextItem::sdata: 311 case TextItem::cdata: 312 n += normsep; 313 break; 314 default: 315 break; 316 } 317 return n; 318 } 319 320 // This is used to determine for a FIXED CDATA attribute 321 // whether a specified value if equal to the default value. 322 323 Boolean Text::fixedEqual(const Text &text) const 324 { 325 if (string() != text.string()) 326 return 0; 327 size_t j = 0; 328 for (size_t i = 0; i < items_.size(); i++) 329 switch (items_[i].type) { 330 case TextItem::cdata: 331 case TextItem::sdata: 332 for (;;) { 333 if (j >= text.items_.size()) 334 return 0; 335 if (text.items_[j].type == TextItem::nonSgml) 336 return 0; 337 if (text.items_[j].type == TextItem::cdata 338 || text.items_[j].type == TextItem::sdata) 339 break; 340 j++; 341 } 342 if (text.items_[j].index != items_[i].index 343 || (text.items_[j].loc.origin()->asEntityOrigin()->entity() 344 != items_[i].loc.origin()->asEntityOrigin()->entity())) 345 return 0; 346 break; 347 case TextItem::nonSgml: 348 for (;;) { 349 if (j >= text.items_.size()) 350 return 0; 351 if (text.items_[j].type == TextItem::cdata 352 || text.items_[j].type == TextItem::sdata) 353 return 0; 354 if (text.items_[j].type == TextItem::nonSgml) 355 break; 356 j++; 357 } 358 if (text.items_[j].index != items_[i].index) 359 return 0; 360 break; 361 default: 362 break; 363 } 364 for (; j < text.items_.size(); j++) 365 switch (text.items_[j].type) { 366 case TextItem::cdata: 367 case TextItem::sdata: 368 case TextItem::nonSgml: 369 return 0; 370 default: 371 break; 372 } 373 return 1; 374 } 375 376 Boolean Text::charLocation(size_t ind, const ConstPtr<Origin> *&origin, Index &index) const 377 { 378 // Find the last item whose index <= ind. 379 // Invariant: 380 // indexes < i implies index <= ind 381 // indexes >= lim implies index > ind 382 // The first item will always have index 0. 383 size_t i = 1; 384 size_t lim = items_.size(); 385 while (i < lim) { 386 size_t mid = i + (lim - i)/2; 387 if (items_[mid].index > ind) 388 lim = mid; 389 else 390 i = mid + 1; 391 } 392 #if 0 393 for (size_t i = 1; i < items_.size(); i++) 394 if (items_[i].index > ind) 395 break; 396 #endif 397 i--; 398 // If items_.size() == 0, then i == lim. 399 if (i < lim) { 400 origin = &items_[i].loc.origin(); 401 index = items_[i].loc.index() + (ind - items_[i].index); 402 } 403 return 1; 404 } 405 406 #ifdef SP_NAMESPACE 407 } 408 #endif 409