Main Page   Packages   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

C:/temp/src/j2k/Beta/GString.cc

Go to the documentation of this file.
00001 /* 
00002 Copyright (C) 1988 Free Software Foundation
00003     written by Doug Lea (dl@rocky.oswego.edu)
00004 
00005 This file is part of the GNU C++ Library.  This library is free
00006 software; you can redistribute it and/or modify it under the terms of
00007 the GNU Library General Public License as published by the Free
00008 Software Foundation; either version 2 of the License, or (at your
00009 option) any later version.  This library is distributed in the hope
00010 that it will be useful, but WITHOUT ANY WARRANTY; without even the
00011 implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
00012 PURPOSE.  See the GNU Library General Public License for more details.
00013 You should have received a copy of the GNU Library General Public
00014 License along with this library; if not, write to the Free Software
00015 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00016 */
00017 
00018 /* 
00019   String class implementation
00020  */
00021 
00022 #ifdef __GNUG__
00023 #pragma implementation
00024 #endif
00025 #include <String.h>
00026 #include <std.h>
00027 #include <ctype.h>
00028 #include <limits.h>
00029 #include <new.h>
00030 #include <builtin.h>
00031 
00032 #undef OK
00033 
00034 void String::error(const char* msg) const
00035 {
00036   (*lib_error_handler)("String", msg);
00037 }
00038 
00039 String::operator const char*() const
00040 { 
00041   return (const char*)chars();
00042 }
00043 
00044 //  globals
00045 
00046 StrRep  _nilStrRep = { 0, 1, { 0 } }; // nil strings point here
00047 String _nilString;               // nil SubStrings point here
00048 
00049 
00050 
00051 
00052 /*
00053  the following inline fcts are specially designed to work
00054  in support of String classes, and are not meant as generic replacements
00055  for libc "str" functions.
00056 
00057  inline copy fcts -  I like left-to-right from->to arguments.
00058  all versions assume that `to' argument is non-null
00059 
00060  These are worth doing inline, rather than through calls because,
00061  via procedural integration, adjacent copy calls can be smushed
00062  together by the optimizer.
00063 */
00064 
00065 // copy n bytes
00066 inline static void ncopy(const char* from, char* to, int n)
00067 {
00068   if (from != to) while (--n >= 0) *to++ = *from++;
00069 }
00070 
00071 // copy n bytes, null-terminate
00072 inline static void ncopy0(const char* from, char* to, int n)
00073 {
00074   if (from != to) 
00075   {
00076     while (--n >= 0) *to++ = *from++;
00077     *to = 0;
00078   }
00079   else
00080     to[n] = 0;
00081 }
00082 
00083 // copy until null
00084 inline static void scopy(const char* from, char* to)
00085 {
00086   if (from != 0) while((*to++ = *from++) != 0);
00087 }
00088 
00089 // copy right-to-left
00090 inline static void revcopy(const char* from, char* to, short n)
00091 {
00092   if (from != 0) while (--n >= 0) *to-- = *from--;
00093 }
00094 
00095 
00096 inline static int slen(const char* t) // inline  strlen
00097 {
00098   if (t == 0)
00099     return 0;
00100   else
00101   {
00102     const char* a = t;
00103     while (*a++ != 0);
00104     return a - 1 - t;
00105   }
00106 }
00107 
00108 // minimum & maximum representable rep size
00109 
00110 #define MAXStrRep_SIZE   ((1 << (sizeof(short) * CHAR_BIT - 1)) - 1)
00111 #define MINStrRep_SIZE   16
00112 
00113 #ifndef MALLOC_MIN_OVERHEAD
00114 #define MALLOC_MIN_OVERHEAD  4
00115 #endif
00116 
00117 // The basic allocation primitive:
00118 // Always round request to something close to a power of two.
00119 // This ensures a bit of padding, which often means that
00120 // concatenations don't have to realloc. Plus it tends to
00121 // be faster when lots of Strings are created and discarded,
00122 // since just about any version of malloc (op new()) will
00123 // be faster when it can reuse identically-sized chunks
00124 
00125 inline static StrRep* Snew(int newsiz)
00126 {
00127   unsigned int siz = sizeof(StrRep) + newsiz + MALLOC_MIN_OVERHEAD;
00128   unsigned int allocsiz = MINStrRep_SIZE;
00129   while (allocsiz < siz) allocsiz <<= 1;
00130   allocsiz -= MALLOC_MIN_OVERHEAD;
00131   if (allocsiz >= MAXStrRep_SIZE)
00132     (*lib_error_handler)("String", "Requested length out of range");
00133     
00134   StrRep* rep = new (operator new (allocsiz)) StrRep;
00135   rep->sz = allocsiz - sizeof(StrRep);
00136   return rep;
00137 }
00138 
00139 // Do-something-while-allocating routines.
00140 
00141 // We live with two ways to signify empty Sreps: either the
00142 // null pointer (0) or a pointer to the nilStrRep.
00143 
00144 // We always signify unknown source lengths (usually when fed a char*)
00145 // via len == -1, in which case it is computed.
00146 
00147 // allocate, copying src if nonull
00148 
00149 StrRep* Salloc(StrRep* old, const char* src, int srclen, int newlen)
00150 {
00151   if (old == &_nilStrRep) old = 0;
00152   if (srclen < 0) srclen = slen(src);
00153   if (newlen < srclen) newlen = srclen;
00154   StrRep* rep;
00155   if (old == 0 || newlen > old->sz)
00156     rep = Snew(newlen);
00157   else
00158     rep = old;
00159 
00160   rep->len = newlen;
00161   ncopy0(src, rep->s, srclen);
00162 
00163   if (old != rep && old != 0) delete old;
00164 
00165   return rep;
00166 }
00167 
00168 // reallocate: Given the initial allocation scheme, it will
00169 // generally be faster in the long run to get new space & copy
00170 // than to call realloc
00171 
00172 static StrRep*
00173 Sresize(StrRep* old, int newlen)
00174 {
00175   if (old == &_nilStrRep) old = 0;
00176   StrRep* rep;
00177   if (old == 0)
00178     rep = Snew(newlen);
00179   else if (newlen > old->sz)
00180   {
00181     rep = Snew(newlen);
00182     ncopy0(old->s, rep->s, old->len);
00183     delete old;
00184   }
00185   else
00186     rep = old;
00187 
00188   rep->len = newlen;
00189 
00190   return rep;
00191 }
00192 
00193 void
00194 String::alloc (int newsize)
00195 {
00196   unsigned short old_len = rep->len;
00197   rep = Sresize(rep, newsize);
00198   rep->len = old_len;
00199 }
00200 
00201 // like allocate, but we know that src is a StrRep
00202 
00203 StrRep* Scopy(StrRep* old, const StrRep* s)
00204 {
00205   if (old == &_nilStrRep) old = 0;
00206   if (s == &_nilStrRep) s = 0;
00207   if (old == s) 
00208     return (old == 0)? &_nilStrRep : old;
00209   else if (s == 0)
00210   {
00211     old->s[0] = 0;
00212     old->len = 0;
00213     return old;
00214   }
00215   else 
00216   {
00217     StrRep* rep;
00218     int newlen = (short) s->len; // Beware of sign extension!
00219     if (old == 0 || newlen > old->sz)
00220     {
00221       if (old != 0) delete old;
00222       rep = Snew(newlen);
00223     }
00224     else
00225       rep = old;
00226     rep->len = newlen;
00227     ncopy0(s->s, rep->s, newlen);
00228     return rep;
00229   }
00230 }
00231 
00232 // allocate & concatenate
00233 
00234 StrRep* Scat(StrRep* old, const char* s, int srclen, const char* t, int tlen)
00235 {
00236   if (old == &_nilStrRep) old = 0;
00237   if (srclen < 0) srclen = slen(s);
00238   if (tlen < 0) tlen = slen(t);
00239   int newlen = srclen + tlen;
00240   StrRep* rep;
00241 
00242   if (old == 0 || newlen > old->sz || 
00243       (t >= old->s && t < &(old->s[old->len]))) // beware of aliasing
00244     rep = Snew(newlen);
00245   else
00246     rep = old;
00247 
00248   rep->len = newlen;
00249 
00250   ncopy(s, rep->s, srclen);
00251   ncopy0(t, &(rep->s[srclen]), tlen);
00252 
00253   if (old != rep && old != 0) delete old;
00254 
00255   return rep;
00256 }
00257 
00258 // double-concatenate
00259 
00260 StrRep* Scat(StrRep* old, const char* s, int srclen, const char* t, int tlen,
00261              const char* u, int ulen)
00262 {
00263   if (old == &_nilStrRep) old = 0;
00264   if (srclen < 0) srclen = slen(s);
00265   if (tlen < 0) tlen = slen(t);
00266   if (ulen < 0) ulen = slen(u);
00267   int newlen = srclen + tlen + ulen;
00268   StrRep* rep;
00269   if (old == 0 || newlen > old->sz || 
00270       (t >= old->s && t < &(old->s[old->len])) ||
00271       (u >= old->s && u < &(old->s[old->len])))
00272     rep = Snew(newlen);
00273   else
00274     rep = old;
00275 
00276   rep->len = newlen;
00277 
00278   ncopy(s, rep->s, srclen);
00279   ncopy(t, &(rep->s[srclen]), tlen);
00280   ncopy0(u, &(rep->s[srclen+tlen]), ulen);
00281 
00282   if (old != rep && old != 0) delete old;
00283 
00284   return rep;
00285 }
00286 
00287 // like cat, but we know that new stuff goes in the front of existing rep
00288 
00289 StrRep* Sprepend(StrRep* old, const char* t, int tlen)
00290 {
00291   char* s;
00292   int srclen;
00293   if (old == &_nilStrRep || old == 0)
00294   {
00295     s = 0; old = 0; srclen = 0;
00296   }
00297   else
00298   {
00299     s = old->s; srclen = (short) old->len;
00300   }
00301   if (tlen < 0) tlen = slen(t);
00302   int newlen = srclen + tlen;
00303   StrRep* rep;
00304   if (old == 0 || newlen > old->sz || 
00305       (t >= old->s && t < &(old->s[old->len])))
00306     rep = Snew(newlen);
00307   else
00308     rep = old;
00309 
00310   rep->len = newlen;
00311 
00312   revcopy(&(s[srclen]), &(rep->s[newlen]), srclen+1);
00313   ncopy(t, rep->s, tlen);
00314 
00315   if (old != rep && old != 0) delete old;
00316 
00317   return rep;
00318 }
00319 
00320 
00321 // string compare: first argument is known to be non-null
00322 
00323 inline static int scmp(const char* a, const char* b)
00324 {
00325   if (b == 0)
00326     return *a != 0;
00327   else
00328   {
00329     int diff = 0;
00330     while ((diff = *a - *b++) == 0 && *a++ != 0);
00331     return diff;
00332   }
00333 }
00334 
00335 
00336 inline static int ncmp(const char* a, int al, const char* b, int bl)
00337 {
00338   int n = (al <= bl)? al : bl;
00339   int diff;
00340   while (n-- > 0) if ((diff = *a++ - *b++) != 0) return diff;
00341   return al - bl;
00342 }
00343 
00344 int fcompare(const String& x, const String& y)
00345 {
00346   const char* a = x.chars();
00347   const char* b = y.chars();
00348   int al = x.length();
00349   int bl = y.length();
00350   int n = (al <= bl)? al : bl;
00351   int diff = 0;
00352   while (n-- > 0)
00353   {
00354     char ac = *a++;
00355     char bc = *b++;
00356     if ((diff = ac - bc) != 0)
00357     {
00358       if (ac >= 'a' && ac <= 'z')
00359         ac = ac - 'a' + 'A';
00360       if (bc >= 'a' && bc <= 'z')
00361         bc = bc - 'a' + 'A';
00362       if ((diff = ac - bc) != 0)
00363         return diff;
00364     }
00365   }
00366   return al - bl;
00367 }
00368 
00369 // these are not inline, but pull in the above inlines, so are 
00370 // pretty fast
00371 
00372 int compare(const String& x, const char* b)
00373 {
00374   return scmp(x.chars(), b);
00375 }
00376 
00377 int compare(const String& x, const String& y)
00378 {
00379   return scmp(x.chars(), y.chars());
00380 }
00381 
00382 int compare(const String& x, const SubString& y)
00383 {
00384   return ncmp(x.chars(), x.length(), y.chars(), y.length());
00385 }
00386 
00387 int compare(const SubString& x, const String& y)
00388 {
00389   return ncmp(x.chars(), x.length(), y.chars(), y.length());
00390 }
00391 
00392 int compare(const SubString& x, const SubString& y)
00393 {
00394   return ncmp(x.chars(), x.length(), y.chars(), y.length());
00395 }
00396 
00397 int compare(const SubString& x, const char* b)
00398 {
00399   if (b == 0)
00400     return x.length();
00401   else
00402   {
00403     const char* a = x.chars();
00404     int n = x.length();
00405     int diff;
00406     while (n-- > 0) if ((diff = *a++ - *b++) != 0) return diff;
00407     return (*b == 0) ? 0 : -1;
00408   }
00409 }
00410 
00411 /*
00412  index fcts
00413 */
00414 
00415 int String::search(int start, int sl, char c) const
00416 {
00417   const char* s = chars();
00418   if (sl > 0)
00419   {
00420     if (start >= 0)
00421     {
00422       const char* a = &(s[start]);
00423       const char* lasta = &(s[sl]);
00424       while (a < lasta) if (*a++ == c) return --a - s;
00425     }
00426     else
00427     {
00428       const char* a = &(s[sl + start + 1]);
00429       while (--a >= s) if (*a == c) return a - s;
00430     }
00431   }
00432   return -1;
00433 }
00434 
00435 int String::search(int start, int sl, const char* t, int tl) const
00436 {
00437   const char* s = chars();
00438   if (tl < 0) tl = slen(t);
00439   if (sl > 0 && tl > 0)
00440   {
00441     if (start >= 0)
00442     {
00443       const char* lasts = &(s[sl - tl]);
00444       const char* lastt = &(t[tl]);
00445       const char* p = &(s[start]);
00446 
00447       while (p <= lasts)
00448       {
00449         const char* x = p++;
00450         const char* y = t;
00451         while (*x++ == *y++) if (y >= lastt) return --p - s;
00452       }
00453     }
00454     else
00455     {
00456       const char* firsts = &(s[tl - 1]);
00457       const char* lastt =  &(t[tl - 1]);
00458       const char* p = &(s[sl + start + 1]); 
00459 
00460       while (--p >= firsts)
00461       {
00462         const char* x = p;
00463         const char* y = lastt;
00464         while (*x-- == *y--) if (y < t) return ++x - s;
00465       }
00466     }
00467   }
00468   return -1;
00469 }
00470 
00471 int String::match(int start, int sl, int exact, const char* t, int tl) const
00472 {
00473   if (tl < 0) tl = slen(t);
00474 
00475   if (start < 0)
00476   {
00477     start = sl + start - tl + 1;
00478     if (start < 0 || (exact && start != 0))
00479       return -1;
00480   }
00481   else if (exact && sl - start != tl)
00482     return -1;
00483 
00484   if (sl == 0 || tl == 0 || sl - start < tl || start >= sl)
00485     return -1;
00486 
00487   int n = tl;
00488   const char* s = &(rep->s[start]);
00489   while (--n >= 0) if (*s++ != *t++) return -1;
00490   return tl;
00491 }
00492 
00493 void SubString::assign(const StrRep* ysrc, const char* ys, int ylen)
00494 {
00495   if (&S == &_nilString) return;
00496 
00497   if (ylen < 0) ylen = slen(ys);
00498   StrRep* targ = S.rep;
00499   int sl = (short) (targ->len) - len + ylen;
00500 
00501   if (ysrc == targ || sl >= targ->sz)
00502   {
00503     StrRep* oldtarg = targ;
00504     targ = Sresize(0, sl);
00505     ncopy(oldtarg->s, targ->s, pos);
00506     ncopy(ys, &(targ->s[pos]), ylen);
00507     scopy(&(oldtarg->s[pos + len]), &(targ->s[pos + ylen]));
00508     delete oldtarg;
00509   }
00510   else if (len == ylen)
00511     ncopy(ys, &(targ->s[pos]), len);
00512   else if (ylen < len)
00513   {
00514     ncopy(ys, &(targ->s[pos]), ylen);
00515     scopy(&(targ->s[pos + len]), &(targ->s[pos + ylen]));
00516   }
00517   else
00518   {
00519     revcopy(&(targ->s[targ->len]), &(targ->s[sl]), targ->len-pos-len +1);
00520     ncopy(ys, &(targ->s[pos]), ylen);
00521   }
00522   targ->len = sl;
00523   S.rep = targ;
00524 }
00525 
00526 
00527 
00528 /*
00529  * substitution
00530  */
00531 
00532 
00533 int String::_gsub(const char* pat, int pl, const char* r, int rl)
00534 {
00535   int nmatches = 0;
00536   if (pl < 0) pl = slen(pat);
00537   if (rl < 0) rl = slen(r);
00538   int sl = length();
00539   if (sl <= 0 || pl <= 0 || sl < pl)
00540     return nmatches;
00541   
00542   const char* s = chars();
00543 
00544   // prepare to make new rep
00545   StrRep* nrep = 0;
00546   int nsz = 0;
00547   char* x = 0;
00548 
00549   int si = 0;
00550   int xi = 0;
00551   int remaining = sl;
00552 
00553   while (remaining >= pl)
00554   {
00555     int pos = search(si, sl, pat, pl);
00556     if (pos < 0)
00557       break;
00558     else
00559     {
00560       ++nmatches;
00561       int mustfit = xi + remaining + rl - pl;
00562       if (mustfit >= nsz)
00563       {
00564         if (nrep != 0) nrep->len = xi;
00565         nrep = Sresize(nrep, mustfit);
00566         nsz = (short) nrep->sz;
00567         x = nrep->s;
00568       }
00569       pos -= si;
00570       ncopy(&(s[si]), &(x[xi]), pos);
00571       ncopy(r, &(x[xi + pos]), rl);
00572       si += pos + pl;
00573       remaining -= pos + pl;
00574       xi += pos + rl;
00575     }
00576   }
00577 
00578   if (nrep == 0)
00579   {
00580     if (nmatches == 0)
00581       return nmatches;
00582     else
00583       nrep = Sresize(nrep, xi+remaining);
00584   }
00585 
00586   ncopy0(&(s[si]), &(x[xi]), remaining);
00587   nrep->len = xi + remaining;
00588 
00589   if (nrep->len <= rep->sz)   // fit back in if possible
00590   {
00591     rep->len = nrep->len;
00592     ncopy0(nrep->s, rep->s, rep->len);
00593     delete(nrep);
00594   }
00595   else
00596   {
00597     delete(rep);
00598     rep = nrep;
00599   }
00600   return nmatches;
00601 }
00602 
00603 int String::_gsub(const Regex& pat, const char* r, int rl)
00604 {
00605   int nmatches = 0;
00606   int sl = length();
00607   if (sl <= 0)
00608     return nmatches;
00609 
00610   if (rl < 0) rl = slen(r);
00611 
00612   const char* s = chars();
00613 
00614   StrRep* nrep = 0;
00615   int nsz = 0;
00616 
00617   char* x = 0;
00618 
00619   int si = 0;
00620   int xi = 0;
00621   int remaining = sl;
00622   int  pos, pl = 0;             // how long is a regular expression?
00623 
00624   while (remaining > 0)
00625   {
00626     pos = pat.search(s, sl, pl, si); // unlike string search, the pos returned here is absolute
00627     if (pos < 0 || pl <= 0)
00628       break;
00629     else
00630     {
00631       ++nmatches;
00632       int mustfit = xi + remaining + rl - pl;
00633       if (mustfit >= nsz)
00634       {
00635         if (nrep != 0) nrep->len = xi;
00636         nrep = Sresize(nrep, mustfit);
00637         x = nrep->s;
00638         nsz = (short) nrep->sz;
00639       }
00640       pos -= si;
00641       ncopy(&(s[si]), &(x[xi]), pos);
00642       ncopy(r, &(x[xi + pos]), rl);
00643       si += pos + pl;
00644       remaining -= pos + pl;
00645       xi += pos + rl;
00646     }
00647   }
00648 
00649   if (nrep == 0)
00650   {
00651     if (nmatches == 0)
00652       return nmatches;
00653     else
00654       nrep = Sresize(nrep, xi+remaining);
00655   }
00656 
00657   ncopy0(&(s[si]), &(x[xi]), remaining);
00658   nrep->len = xi + remaining;
00659 
00660   if (nrep->len <= rep->sz)   // fit back in if possible
00661   {
00662     rep->len = nrep->len;
00663     ncopy0(nrep->s, rep->s, rep->len);
00664     delete(nrep);
00665   }
00666   else
00667   {
00668     delete(rep);
00669     rep = nrep;
00670   }
00671   return nmatches;
00672 }
00673 
00674 
00675 /*
00676  * deletion
00677  */
00678 
00679 void String::del(int pos, int len)
00680 {
00681   if (pos < 0 || len <= 0 || (unsigned)(pos + len) > length()) return;
00682   int nlen = length() - len;
00683   int first = pos + len;
00684   ncopy0(&(rep->s[first]), &(rep->s[pos]), length() - first);
00685   rep->len = nlen;
00686 }
00687 
00688 void String::del(const Regex& r, int startpos)
00689 {
00690   int mlen;
00691   int first = r.search(chars(), length(), mlen, startpos);
00692   del(first, mlen);
00693 }
00694 
00695 void String::del(const char* t, int startpos)
00696 {
00697   int tlen = slen(t);
00698   int p = search(startpos, length(), t, tlen);
00699   del(p, tlen);
00700 }
00701 
00702 void String::del(const String& y, int startpos)
00703 {
00704   del(search(startpos, length(), y.chars(), y.length()), y.length());
00705 }
00706 
00707 void String::del(const SubString& y, int startpos)
00708 {
00709   del(search(startpos, length(), y.chars(), y.length()), y.length());
00710 }
00711 
00712 void String::del(char c, int startpos)
00713 {
00714   del(search(startpos, length(), c), 1);
00715 }
00716 
00717 /*
00718  * substring extraction
00719  */
00720 
00721 
00722 SubString String::at(int first, int len)
00723 {
00724   return _substr(first, len);
00725 }
00726 
00727 SubString String::operator() (int first, int len)
00728 {
00729   return _substr(first, len);
00730 }
00731 
00732 SubString String::before(int pos)
00733 {
00734   return _substr(0, pos);
00735 }
00736 
00737 SubString String::through(int pos)
00738 {
00739   return _substr(0, pos+1);
00740 }
00741 
00742 SubString String::after(int pos)
00743 {
00744   return _substr(pos + 1, length() - (pos + 1));
00745 }
00746 
00747 SubString String::from(int pos)
00748 {
00749   return _substr(pos, length() - pos);
00750 }
00751 
00752 SubString String::at(const String& y, int startpos)
00753 {
00754   int first = search(startpos, length(), y.chars(), y.length());
00755   return _substr(first,  y.length());
00756 }
00757 
00758 SubString String::at(const SubString& y, int startpos)
00759 {
00760   int first = search(startpos, length(), y.chars(), y.length());
00761   return _substr(first, y.length());
00762 }
00763 
00764 SubString String::at(const Regex& r, int startpos)
00765 {
00766   int mlen;
00767   int first = r.search(chars(), length(), mlen, startpos);
00768   return _substr(first, mlen);
00769 }
00770 
00771 SubString String::at(const char* t, int startpos)
00772 {
00773   int tlen = slen(t);
00774   int first = search(startpos, length(), t, tlen);
00775   return _substr(first, tlen);
00776 }
00777 
00778 SubString String::at(char c, int startpos)
00779 {
00780   int first = search(startpos, length(), c);
00781   return _substr(first, 1);
00782 }
00783 
00784 SubString String::before(const String& y, int startpos)
00785 {
00786   int last = search(startpos, length(), y.chars(), y.length());
00787   return _substr(0, last);
00788 }
00789 
00790 SubString String::before(const SubString& y, int startpos)
00791 {
00792   int last = search(startpos, length(), y.chars(), y.length());
00793   return _substr(0, last);
00794 }
00795 
00796 SubString String::before(const Regex& r, int startpos)
00797 {
00798   int mlen;
00799   int first = r.search(chars(), length(), mlen, startpos);
00800   return _substr(0, first);
00801 }
00802 
00803 SubString String::before(char c, int startpos)
00804 {
00805   int last = search(startpos, length(), c);
00806   return _substr(0, last);
00807 }
00808 
00809 SubString String::before(const char* t, int startpos)
00810 {
00811   int tlen = slen(t);
00812   int last = search(startpos, length(), t, tlen);
00813   return _substr(0, last);
00814 }
00815 
00816 SubString String::through(const String& y, int startpos)
00817 {
00818   int last = search(startpos, length(), y.chars(), y.length());
00819   if (last >= 0) last += y.length();
00820   return _substr(0, last);
00821 }
00822 
00823 SubString String::through(const SubString& y, int startpos)
00824 {
00825   int last = search(startpos, length(), y.chars(), y.length());
00826   if (last >= 0) last += y.length();
00827   return _substr(0, last);
00828 }
00829 
00830 SubString String::through(const Regex& r, int startpos)
00831 {
00832   int mlen;
00833   int first = r.search(chars(), length(), mlen, startpos);
00834   if (first >= 0) first += mlen;
00835   return _substr(0, first);
00836 }
00837 
00838 SubString String::through(char c, int startpos)
00839 {
00840   int last = search(startpos, length(), c);
00841   if (last >= 0) last += 1;
00842   return _substr(0, last);
00843 }
00844 
00845 SubString String::through(const char* t, int startpos)
00846 {
00847   int tlen = slen(t);
00848   int last = search(startpos, length(), t, tlen);
00849   if (last >= 0) last += tlen;
00850   return _substr(0, last);
00851 }
00852 
00853 SubString String::after(const String& y, int startpos)
00854 {
00855   int first = search(startpos, length(), y.chars(), y.length());
00856   if (first >= 0) first += y.length();
00857   return _substr(first, length() - first);
00858 }
00859 
00860 SubString String::after(const SubString& y, int startpos)
00861 {
00862   int first = search(startpos, length(), y.chars(), y.length());
00863   if (first >= 0) first += y.length();
00864   return _substr(first, length() - first);
00865 }
00866 
00867 SubString String::after(char c, int startpos)
00868 {
00869   int first = search(startpos, length(), c);
00870   if (first >= 0) first += 1;
00871   return _substr(first, length() - first);
00872 }
00873 
00874 SubString String::after(const Regex& r, int startpos)
00875 {
00876   int mlen;
00877   int first = r.search(chars(), length(), mlen, startpos);
00878   if (first >= 0) first += mlen;
00879   return _substr(first, length() - first);
00880 }
00881 
00882 SubString String::after(const char* t, int startpos)
00883 {
00884   int tlen = slen(t);
00885   int first = search(startpos, length(), t, tlen);
00886   if (first >= 0) first += tlen;
00887   return _substr(first, length() - first);
00888 }
00889 
00890 SubString String::from(const String& y, int startpos)
00891 {
00892   int first = search(startpos, length(), y.chars(), y.length());
00893   return _substr(first, length() - first);
00894 }
00895 
00896 SubString String::from(const SubString& y, int startpos)
00897 {
00898   int first = search(startpos, length(), y.chars(), y.length());
00899   return _substr(first, length() - first);
00900 }
00901 
00902 SubString String::from(const Regex& r, int startpos)
00903 {
00904   int mlen;
00905   int first = r.search(chars(), length(), mlen, startpos);
00906   return _substr(first, length() - first);
00907 }
00908 
00909 SubString String::from(char c, int startpos)
00910 {
00911   int first = search(startpos, length(), c);
00912   return _substr(first, length() - first);
00913 }
00914 
00915 SubString String::from(const char* t, int startpos)
00916 {
00917   int tlen = slen(t);
00918   int first = search(startpos, length(), t, tlen);
00919   return _substr(first, length() - first);
00920 }
00921 
00922 
00923 
00924 /*
00925  * split/join
00926  */
00927 
00928 
00929 int split(const String& src, String results[], int n, const String& sep)
00930 {
00931   String x = src;
00932   const char* s = x.chars();
00933   int sl = x.length();
00934   int i = 0;
00935   int pos = 0;
00936   while (i < n && pos < sl)
00937   {
00938     int p = x.search(pos, sl, sep.chars(), sep.length());
00939     if (p < 0)
00940       p = sl;
00941     results[i].rep = Salloc(results[i].rep, &(s[pos]), p - pos, p - pos);
00942     i++;
00943     pos = p + sep.length();
00944   }
00945   return i;
00946 }
00947 
00948 int split(const String& src, String results[], int n, const Regex& r)
00949 {
00950   String x = src;
00951   const char* s = x.chars();
00952   int sl = x.length();
00953   int i = 0;
00954   int pos = 0;
00955   int p, matchlen;
00956   while (i < n && pos < sl)
00957   {
00958     p = r.search(s, sl, matchlen, pos);
00959     if (p < 0)
00960       p = sl;
00961     results[i].rep = Salloc(results[i].rep, &(s[pos]), p - pos, p - pos);
00962     i++;
00963     pos = p + matchlen;
00964   }
00965   return i;
00966 }
00967 
00968 
00969 #if defined(__GNUG__) && !defined(_G_NO_NRV)
00970 #define RETURN(r) return
00971 #define RETURNS(r) return r;
00972 #define RETURN_OBJECT(TYPE, NAME) /* nothing */
00973 #else /* _G_NO_NRV */
00974 #define RETURN(r) return r
00975 #define RETURNS(r) /* nothing */
00976 #define RETURN_OBJECT(TYPE, NAME) TYPE NAME;
00977 #endif
00978 
00979 String join(String src[], int n, const String& separator) RETURNS(x)
00980 {
00981   RETURN_OBJECT(String,x)
00982   String sep = separator;
00983   int xlen = 0;
00984   int i;
00985   for (i = 0; i < n; ++i)
00986     xlen += src[i].length();
00987   xlen += (n - 1) * sep.length();
00988 
00989   x.rep = Sresize (x.rep, xlen);
00990 
00991   int j = 0;
00992   
00993   for (i = 0; i < n - 1; ++i)
00994   {
00995     ncopy(src[i].chars(), &(x.rep->s[j]), src[i].length());
00996     j += src[i].length();
00997     ncopy(sep.chars(), &(x.rep->s[j]), sep.length());
00998     j += sep.length();
00999   }
01000   ncopy0(src[i].chars(), &(x.rep->s[j]), src[i].length());
01001   RETURN(x);
01002 }
01003   
01004 /*
01005  misc
01006 */
01007 
01008     
01009 StrRep* Sreverse(const StrRep* src, StrRep* dest)
01010 {
01011   int n = (short) src->len;
01012   if (src != dest)
01013     dest = Salloc(dest, src->s, n, n);
01014   if (n > 0)
01015   {
01016     char* a = dest->s;
01017     char* b = &(a[n - 1]);
01018     while (a < b)
01019     {
01020       char t = *a;
01021       *a++ = *b;
01022       *b-- = t;
01023     }
01024   }
01025   return dest;
01026 }
01027 
01028 
01029 StrRep* Supcase(const StrRep* src, StrRep* dest)
01030 {
01031   int n = (short) src->len;
01032   if (src != dest) dest = Salloc(dest, src->s, n, n);
01033   char* p = dest->s;
01034   char* e = &(p[n]);
01035   for (; p < e; ++p) if (islower(*p)) *p = toupper(*p);
01036   return dest;
01037 }
01038 
01039 StrRep* Sdowncase(const StrRep* src, StrRep* dest)
01040 {
01041   int n = (short) src->len;
01042   if (src != dest) dest = Salloc(dest, src->s, n, n);
01043   char* p = dest->s;
01044   char* e = &(p[n]);
01045   for (; p < e; ++p) if (isupper(*p)) *p = tolower(*p);
01046   return dest;
01047 }
01048 
01049 StrRep* Scapitalize(const StrRep* src, StrRep* dest)
01050 {
01051   int n = (short) src->len;
01052   if (src != dest) dest = Salloc(dest, src->s, n, n);
01053 
01054   char* p = dest->s;
01055   char* e = &(p[n]);
01056   for (; p < e; ++p)
01057   {
01058     int at_word = islower(*p);
01059 
01060     if (at_word)
01061       *p = toupper(*p);
01062     else 
01063       at_word = isupper(*p) || isdigit(*p);
01064 
01065     if (at_word)
01066     {
01067       while (++p < e)
01068       {
01069         if (isupper(*p))
01070           *p = tolower(*p);
01071    /* A '\'' does not break a word, so that "Nathan's" stays
01072       "Nathan's" rather than turning into "Nathan'S". */
01073         else if (!islower(*p) && !isdigit(*p) && (*p != '\''))
01074           break;
01075       }
01076     }
01077   }
01078   return dest;
01079 }
01080 
01081 #if defined(__GNUG__) && !defined(_G_NO_NRV)
01082 
01083 String replicate(char c, int n) return w;
01084 {
01085   w.rep = Sresize(w.rep, n);
01086   char* p = w.rep->s;
01087   while (n-- > 0) *p++ = c;
01088   *p = 0;
01089 }
01090 
01091 String replicate(const String& y, int n) return w
01092 {
01093   int len = y.length();
01094   w.rep = Sresize(w.rep, n * len);
01095   char* p = w.rep->s;
01096   while (n-- > 0)
01097   {
01098     ncopy(y.chars(), p, len);
01099     p += len;
01100   }
01101   *p = 0;
01102 }
01103 
01104 String common_prefix(const String& x, const String& y, int startpos) return r;
01105 {
01106   const char* xchars = x.chars();
01107   const char* ychars = y.chars();
01108   const char* xs = &(xchars[startpos]);
01109   const char* ss = xs;
01110   const char* topx = &(xchars[x.length()]);
01111   const char* ys = &(ychars[startpos]);
01112   const char* topy = &(ychars[y.length()]);
01113   int l;
01114   for (l = 0; xs < topx && ys < topy && *xs++ == *ys++; ++l);
01115   r.rep = Salloc(r.rep, ss, l, l);
01116 }
01117 
01118 String common_suffix(const String& x, const String& y, int startpos) return r;
01119 {
01120   const char* xchars = x.chars();
01121   const char* ychars = y.chars();
01122   const char* xs = &(xchars[x.length() + startpos]);
01123   const char* botx = xchars;
01124   const char* ys = &(ychars[y.length() + startpos]);
01125   const char* boty = ychars;
01126   int l;
01127   for (l = 0; xs >= botx && ys >= boty && *xs == *ys ; --xs, --ys, ++l);
01128   r.rep = Salloc(r.rep, ++xs, l, l);
01129 }
01130 
01131 #else
01132 
01133 String replicate(char c, int n)
01134 {
01135   String w;
01136   w.rep = Sresize(w.rep, n);
01137   char* p = w.rep->s;
01138   while (n-- > 0) *p++ = c;
01139   *p = 0;
01140   return w;
01141 }
01142 
01143 String replicate(const String& y, int n)
01144 {
01145   String w;
01146   int len = y.length();
01147   w.rep = Sresize(w.rep, n * len);
01148   char* p = w.rep->s;
01149   while (n-- > 0)
01150   {
01151     ncopy(y.chars(), p, len);
01152     p += len;
01153   }
01154   *p = 0;
01155   return w;
01156 }
01157 
01158 String common_prefix(const String& x, const String& y, int startpos)
01159 {
01160   String r;
01161   const char* xchars = x.chars();
01162   const char* ychars = y.chars();
01163   const char* xs = &(xchars[startpos]);
01164   const char* ss = xs;
01165   const char* topx = &(xchars[x.length()]);
01166   const char* ys = &(ychars[startpos]);
01167   const char* topy = &(ychars[y.length()]);
01168   int l;
01169   for (l = 0; xs < topx && ys < topy && *xs++ == *ys++; ++l);
01170   r.rep = Salloc(r.rep, ss, l, l);
01171   return r;
01172 }
01173 
01174 String common_suffix(const String& x, const String& y, int startpos) 
01175 {
01176   String r;
01177   const char* xchars = x.chars();
01178   const char* ychars = y.chars();
01179   const char* xs = &(xchars[x.length() + startpos]);
01180   const char* botx = xchars;
01181   const char* ys = &(ychars[y.length() + startpos]);
01182   const char* boty = ychars;
01183   int l;
01184   for (l = 0; xs >= botx && ys >= boty && *xs == *ys ; --xs, --ys, ++l);
01185   r.rep = Salloc(r.rep, ++xs, l, l);
01186   return r;
01187 }
01188 
01189 #endif
01190 
01191 // IO
01192 
01193 istream& operator>>(istream& s, String& x)
01194 {
01195   if (!s.ipfx(0) || (!(s.flags() & ios::skipws) && !ws(s)))
01196   {
01197     s.clear(ios::failbit|s.rdstate()); // Redundant if using GNU iostreams.
01198     return s;
01199   }
01200   int ch;
01201   int i = 0;
01202   x.rep = Sresize(x.rep, 20);
01203   register streambuf *sb = s.rdbuf();
01204   while ((ch = sb->sbumpc()) != EOF)
01205   {
01206     if (isspace(ch))
01207       break;
01208     if (i >= x.rep->sz - 1)
01209       x.rep = Sresize(x.rep, i+1);
01210     x.rep->s[i++] = ch;
01211   }
01212   x.rep->s[i] = 0;
01213   x.rep->len = i;
01214   int new_state = s.rdstate();
01215   if (i == 0) new_state |= ios::failbit;
01216   if (ch == EOF) new_state |= ios::eofbit;
01217   s.clear(new_state);
01218   return s;
01219 }
01220 
01221 int readline(istream& s, String& x, char terminator, int discard)
01222 {
01223   if (!s.ipfx(0))
01224     return 0;
01225   int ch;
01226   int i = 0;
01227   x.rep = Sresize(x.rep, 80);
01228   register streambuf *sb = s.rdbuf();
01229   while ((ch = sb->sbumpc()) != EOF)
01230   {
01231     if (ch != terminator || !discard)
01232     {
01233       if (i >= x.rep->sz - 1)
01234         x.rep = Sresize(x.rep, i+1);
01235       x.rep->s[i++] = ch;
01236     }
01237     if (ch == terminator)
01238       break;
01239   }
01240   x.rep->s[i] = 0;
01241   x.rep->len = i;
01242   if (ch == EOF) s.clear(ios::eofbit|s.rdstate());
01243   return i;
01244 }
01245 
01246 
01247 ostream& operator<<(ostream& s, const SubString& x)
01248 { 
01249   const char* a = x.chars();
01250   const char* lasta = &(a[x.length()]);
01251   while (a < lasta)
01252     s.put(*a++);
01253   return(s);
01254 }
01255 
01256 // from John.Willis@FAS.RI.CMU.EDU
01257 
01258 int String::freq(const SubString& y) const
01259 {
01260   int found = 0;
01261   for (unsigned int i = 0; i < length(); i++) 
01262     if (match(i,length(),0,y.chars(), y.length())>= 0) found++;
01263   return(found);
01264 }
01265 
01266 int String::freq(const String& y) const
01267 {
01268   int found = 0;
01269   for (unsigned int i = 0; i < length(); i++) 
01270     if (match(i,length(),0,y.chars(),y.length()) >= 0) found++;
01271   return(found);
01272 }
01273 
01274 int String::freq(const char* t) const
01275 {
01276   int found = 0;
01277   for (unsigned int i = 0; i < length(); i++) 
01278     if (match(i,length(),0,t) >= 0) found++;
01279   return(found);
01280 }
01281 
01282 int String::freq(char c) const
01283 {
01284   int found = 0;
01285   for (unsigned int i = 0; i < length(); i++) 
01286     if (match(i,length(),0,&c,1) >= 0) found++;
01287   return(found);
01288 }
01289 
01290 
01291 int String::OK() const
01292 {
01293   if (rep == 0             // don't have a rep
01294     || rep->len > rep->sz     // string oustide bounds
01295     || rep->s[rep->len] != 0)   // not null-terminated
01296       error("invariant failure");
01297   return 1;
01298 }
01299 
01300 int SubString::OK() const
01301 {
01302   int v = S != (const char*)0; // have a String;
01303   v &= S.OK();                 // that is legal
01304   v &= pos + len >= (short) S.rep->len;// pos and len within bounds
01305   if (!v) S.error("SubString invariant failure");
01306   return v;
01307 }
01308 

Generated on Sun Oct 14 18:46:11 2001 for Standard J2K Library by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001