Home | History | Annotate | Download | only in lib
      1 // Copyright (c) 1994 James Clark
      2 // See the file COPYING for copying permission.
      3 #pragma ident	"%Z%%M%	%I%	%E% SMI"
      4 
      5 #ifdef __GNUG__
      6 #pragma implementation
      7 #endif
      8 #include "splib.h"
      9 #include "Text.h"
     10 #include "Entity.h"
     11 // for memcmp()
     12 #include <string.h>
     13 
     14 #ifdef SP_NAMESPACE
     15 namespace SP_NAMESPACE {
     16 #endif
     17 
     18 Text::Text()
     19 {
     20 }
     21 
     22 void Text::addChar(Char c, const Location &loc)
     23 {
     24   if (items_.size() == 0
     25       || items_.back().type != TextItem::data
     26       || loc.origin().pointer() != items_.back().loc.origin().pointer()
     27       || loc.index() != (items_.back().loc.index()
     28 			 + (chars_.size() - items_.back().index))) {
     29     items_.resize(items_.size() + 1);
     30     items_.back().loc = loc;
     31     items_.back().type = TextItem::data;
     32     items_.back().index = chars_.size();
     33   }
     34   chars_ += c;
     35 }
     36 
     37 void Text::addChars(const Char *p, size_t length, const Location &loc)
     38 {
     39   if (items_.size() == 0
     40       || items_.back().type != TextItem::data
     41       || loc.origin().pointer() != items_.back().loc.origin().pointer()
     42       || loc.index() != (items_.back().loc.index()
     43 			 + (chars_.size() - items_.back().index))) {
     44     items_.resize(items_.size() + 1);
     45     items_.back().loc = loc;
     46     items_.back().type = TextItem::data;
     47     items_.back().index = chars_.size();
     48   }
     49   chars_.append(p, length);
     50 }
     51 
     52 void Text::addCdata(const InternalEntity *entity,
     53 		    const ConstPtr<Origin> &origin)
     54 {
     55   addSimple(TextItem::cdata, Location(origin, 0));
     56   chars_.append(entity->string().data(), entity->string().size());
     57 }
     58 
     59 void Text::addSdata(const InternalEntity *entity,
     60 		    const ConstPtr<Origin> &origin)
     61 {
     62   addSimple(TextItem::sdata, Location(origin, 0));
     63   chars_.append(entity->string().data(), entity->string().size());
     64 }
     65 
     66 void Text::addNonSgmlChar(Char c, const Location &loc)
     67 {
     68   addSimple(TextItem::nonSgml, loc);
     69   chars_ += c;
     70 }
     71 
     72 void Text::addCharsTokenize(const Char *str, size_t n, const Location &loc,
     73 			    Char space)
     74 {
     75   Location loci(loc);
     76   // FIXME speed this up
     77   for (size_t i = 0; i < n; loci += 1, i++) {
     78     if (str[i] == space && (size() == 0 || lastChar() == space))
     79       ignoreChar(str[i], loci);
     80     else
     81       addChar(str[i], loci);
     82   }
     83 }
     84 
     85 void Text::tokenize(Char space, Text &text) const
     86 {
     87   TextIter iter(*this);
     88   TextItem::Type type;
     89   const Char *p;
     90   size_t n;
     91   const Location *loc;
     92   while (iter.next(type, p, n, loc)) {
     93     switch (type) {
     94     case TextItem::data:
     95       text.addCharsTokenize(p, n, *loc, space);
     96       break;
     97     case TextItem::sdata:
     98     case TextItem::cdata:
     99       {
    100 	text.addEntityStart(*loc);
    101 	text.addCharsTokenize(p, n, *loc, space);
    102 	Location tem(*loc);
    103 	tem += n;
    104 	text.addEntityEnd(tem);
    105       }
    106       break;
    107     case TextItem::ignore:
    108       text.ignoreChar(*p, *loc);
    109       break;
    110     default:
    111       text.addSimple(type, *loc);
    112       break;
    113     }
    114   }
    115   if (text.size() > 0 && text.lastChar() == space)
    116     text.ignoreLastChar();
    117 }
    118 
    119 void Text::addSimple(TextItem::Type type, const Location &loc)
    120 {
    121   items_.resize(items_.size() + 1);
    122   items_.back().loc = loc;
    123   items_.back().type = type;
    124   items_.back().index = chars_.size();
    125 }
    126 
    127 void Text::ignoreChar(Char c, const Location &loc)
    128 {
    129   items_.resize(items_.size() + 1);
    130   items_.back().loc = loc;
    131   items_.back().type = TextItem::ignore;
    132   items_.back().c = c;
    133   items_.back().index = chars_.size();
    134 }
    135 
    136 void Text::ignoreLastChar()
    137 {
    138   size_t lastIndex = chars_.size() - 1;
    139   size_t i;
    140   for (i = items_.size() - 1; items_[i].index > lastIndex; i--)
    141     ;
    142   // lastIndex >= items_[i].index
    143   if (items_[i].index != lastIndex) {
    144     items_.resize(items_.size() + 1);
    145     i++;
    146     for (size_t j = items_.size() - 1; j > i; j--)
    147       items_[j] = items_[j - 1];
    148     items_[i].index = lastIndex;
    149     items_[i].loc = items_[i - 1].loc;
    150     items_[i].loc += lastIndex - items_[i - 1].index;
    151   }
    152 
    153   items_[i].c = chars_[chars_.size() - 1];
    154   items_[i].type = TextItem::ignore;
    155   for (size_t j = i + 1; j < items_.size(); j++)
    156     items_[j].index = lastIndex;
    157   chars_.resize(chars_.size() - 1);
    158 }
    159 
    160 // All characters other than spaces are substed.
    161 
    162 void Text::subst(const SubstTable<Char> &table, Char space)
    163 {
    164   for (size_t i = 0; i < items_.size(); i++)
    165     if (items_[i].type == TextItem::data) {
    166       size_t lim = (i + 1 < items_.size()
    167 		    ? items_[i + 1].index
    168 		    : chars_.size());
    169       size_t j;
    170       for (j = items_[i].index; j < lim; j++) {
    171 	Char c = chars_[j];
    172 	if (c != space && c != table[c])
    173 	  break;
    174       }
    175       if (j < lim) {
    176 	size_t start = items_[i].index;
    177 	StringC origChars(chars_.data() + start, lim - start);
    178 	for (; j < lim; j++)
    179 	  if (chars_[j] != space)
    180 	    table.subst(chars_[j]);
    181 	items_[i].loc = Location(new MultiReplacementOrigin(items_[i].loc,
    182 							    origChars),
    183 				 0);
    184       }
    185     }
    186 }
    187 
    188 void Text::clear()
    189 {
    190   chars_.resize(0);
    191   items_.clear();
    192 }
    193 
    194 Boolean Text::startDelimLocation(Location &loc) const
    195 {
    196   if (items_.size() == 0 || items_[0].type != TextItem::startDelim)
    197     return 0;
    198   loc = items_[0].loc;
    199   return 1;
    200 }
    201 
    202 Boolean Text::endDelimLocation(Location &loc) const
    203 {
    204   if (items_.size() == 0)
    205     return 0;
    206   switch (items_.back().type) {
    207   case TextItem::endDelim:
    208   case TextItem::endDelimA:
    209     break;
    210   default:
    211     return 0;
    212   }
    213   loc = items_.back().loc;
    214   return 1;
    215 }
    216 
    217 Boolean Text::delimType(Boolean &lita) const
    218 {
    219   if (items_.size() == 0)
    220     return 0;
    221   switch (items_.back().type) {
    222   case TextItem::endDelim:
    223     lita = 0;
    224     return 1;
    225   case TextItem::endDelimA:
    226     lita = 1;
    227     return 1;
    228   default:
    229     break;
    230   }
    231   return 0;
    232 }
    233 
    234 TextItem::TextItem()
    235 {
    236 }
    237 
    238 void Text::swap(Text &to)
    239 {
    240   items_.swap(to.items_);
    241   chars_.swap(to.chars_);
    242 }
    243 
    244 TextIter::TextIter(const Text &text)
    245 : ptr_(text.items_.begin()), text_(&text)
    246 {
    247 }
    248 
    249 const Char *TextIter::chars(size_t &length) const
    250 {
    251   if (ptr_->type == TextItem::ignore) {
    252     length = 1;
    253     return &ptr_->c;
    254   }
    255   else {
    256     const StringC &chars = text_->chars_;
    257     size_t charsIndex = ptr_->index;
    258     if (ptr_ + 1 != text_->items_.begin() + text_->items_.size())
    259       length = ptr_[1].index - charsIndex;
    260     else
    261       length = chars.size() - charsIndex;
    262     return chars.data() + charsIndex;
    263   }
    264 }
    265 
    266 Boolean TextIter::next(TextItem::Type &type, const Char *&str, size_t &length,
    267 		       const Location *&loc)
    268 {
    269   const TextItem *end = text_->items_.begin() + text_->items_.size();
    270   if (ptr_ == end)
    271     return 0;
    272   type = ptr_->type;
    273   loc = &ptr_->loc;
    274   if (type == TextItem::ignore) {
    275     str = &ptr_->c;
    276     length = 1;
    277   }
    278   else {
    279     const StringC &chars = text_->chars_;
    280     size_t charsIndex = ptr_->index;
    281     str = chars.data() + charsIndex;
    282     if (ptr_ + 1 != end)
    283       length = ptr_[1].index - charsIndex;
    284     else
    285       length = chars.size() - charsIndex;
    286   }
    287   ptr_++;
    288   return 1;
    289 }
    290 
    291 void Text::insertChars(const StringC &s, const Location &loc)
    292 {
    293   chars_.insert(0, s);
    294   items_.resize(items_.size() + 1);
    295   for (size_t i = items_.size() - 1; i > 0; i--) {
    296     items_[i] = items_[i - 1];
    297     items_[i].index += s.size();
    298   }
    299   items_[0].loc = loc;
    300   items_[0].type = TextItem::data;
    301   items_[0].index = 0;
    302 }
    303 
    304 size_t Text::normalizedLength(size_t normsep) const
    305 {
    306   size_t n = size();
    307   n += normsep;
    308   for (size_t i = 0; i < items_.size(); i++)
    309     switch (items_[i].type) {
    310     case TextItem::sdata:
    311     case TextItem::cdata:
    312       n += normsep;
    313       break;
    314     default:
    315       break;
    316     }
    317   return n;
    318 }
    319 
    320 // This is used to determine for a FIXED CDATA attribute
    321 // whether a specified value if equal to the default value.
    322 
    323 Boolean Text::fixedEqual(const Text &text) const
    324 {
    325   if (string() != text.string())
    326     return 0;
    327   size_t j = 0;
    328   for (size_t i = 0; i < items_.size(); i++)
    329     switch (items_[i].type) {
    330     case TextItem::cdata:
    331     case TextItem::sdata:
    332       for (;;) {
    333 	if (j >= text.items_.size())
    334 	  return 0;
    335 	if (text.items_[j].type == TextItem::nonSgml)
    336 	  return 0;
    337 	if (text.items_[j].type == TextItem::cdata
    338 	    || text.items_[j].type == TextItem::sdata)
    339 	  break;
    340 	j++;
    341       }
    342       if (text.items_[j].index != items_[i].index
    343 	  || (text.items_[j].loc.origin()->asEntityOrigin()->entity()
    344 	      != items_[i].loc.origin()->asEntityOrigin()->entity()))
    345 	return 0;
    346       break;
    347     case TextItem::nonSgml:
    348       for (;;) {
    349 	if (j >= text.items_.size())
    350 	  return 0;
    351 	if (text.items_[j].type == TextItem::cdata
    352 	    || text.items_[j].type == TextItem::sdata)
    353 	  return 0;
    354 	if (text.items_[j].type == TextItem::nonSgml)
    355 	  break;
    356 	j++;
    357       }
    358       if (text.items_[j].index != items_[i].index)
    359         return 0;
    360       break;
    361     default:
    362       break;
    363     }
    364   for (; j < text.items_.size(); j++)
    365     switch (text.items_[j].type) {
    366     case TextItem::cdata:
    367     case TextItem::sdata:
    368     case TextItem::nonSgml:
    369       return 0;
    370     default:
    371       break;
    372     }
    373   return 1;
    374 }
    375 
    376 Boolean Text::charLocation(size_t ind, const ConstPtr<Origin> *&origin, Index &index) const
    377 {
    378   // Find the last item whose index <= ind.
    379   // Invariant:
    380   // indexes < i implies index <= ind
    381   // indexes >= lim implies index > ind
    382   // The first item will always have index 0.
    383   size_t i = 1;
    384   size_t lim = items_.size();
    385   while (i < lim) {
    386     size_t mid = i + (lim - i)/2;
    387     if (items_[mid].index > ind)
    388       lim = mid;
    389     else
    390       i = mid + 1;
    391   }
    392 #if 0
    393   for (size_t i = 1; i < items_.size(); i++)
    394     if (items_[i].index > ind)
    395       break;
    396 #endif
    397   i--;
    398   // If items_.size() == 0, then i == lim.
    399   if (i < lim) {
    400     origin = &items_[i].loc.origin();
    401     index = items_[i].loc.index() + (ind - items_[i].index);
    402   }
    403   return 1;
    404 }
    405 
    406 #ifdef SP_NAMESPACE
    407 }
    408 #endif
    409