unistr.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2009, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/utypes.h"
00030 #include "unicode/rep.h"
00031 #include "unicode/std_string.h"
00032 #include "unicode/stringpiece.h"
00033 #include "unicode/bytestream.h"
00034 
00035 struct UConverter;          // unicode/ucnv.h
00036 class  StringThreadTest;
00037 
00038 #ifndef U_COMPARE_CODE_POINT_ORDER
00039 /* see also ustring.h and unorm.h */
00045 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00046 #endif
00047 
00048 #ifndef USTRING_H
00049 
00052 U_STABLE int32_t U_EXPORT2
00053 u_strlen(const UChar *s);
00054 #endif
00055 
00056 U_NAMESPACE_BEGIN
00057 
00058 class Locale;               // unicode/locid.h
00059 class StringCharacterIterator;
00060 class BreakIterator;        // unicode/brkiter.h
00061 
00062 /* The <iostream> include has been moved to unicode/ustream.h */
00063 
00074 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
00075 
00093 #if defined(U_DECLARE_UTF16)
00094 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00095 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00096 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00097 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00098 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
00099 #else
00100 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
00101 #endif
00102 
00116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00117 
00187 class U_COMMON_API UnicodeString : public Replaceable
00188 {
00189 public:
00190 
00199   enum EInvariant {
00204     kInvariant
00205   };
00206 
00207   //========================================
00208   // Read-only operations
00209   //========================================
00210 
00211   /* Comparison - bitwise only - for international comparison use collation */
00212 
00220   inline UBool operator== (const UnicodeString& text) const;
00221 
00229   inline UBool operator!= (const UnicodeString& text) const;
00230 
00238   inline UBool operator> (const UnicodeString& text) const;
00239 
00247   inline UBool operator< (const UnicodeString& text) const;
00248 
00256   inline UBool operator>= (const UnicodeString& text) const;
00257 
00265   inline UBool operator<= (const UnicodeString& text) const;
00266 
00278   inline int8_t compare(const UnicodeString& text) const;
00279 
00294   inline int8_t compare(int32_t start,
00295          int32_t length,
00296          const UnicodeString& text) const;
00297 
00315    inline int8_t compare(int32_t start,
00316          int32_t length,
00317          const UnicodeString& srcText,
00318          int32_t srcStart,
00319          int32_t srcLength) const;
00320 
00333   inline int8_t compare(const UChar *srcChars,
00334          int32_t srcLength) const;
00335 
00350   inline int8_t compare(int32_t start,
00351          int32_t length,
00352          const UChar *srcChars) const;
00353 
00371   inline int8_t compare(int32_t start,
00372          int32_t length,
00373          const UChar *srcChars,
00374          int32_t srcStart,
00375          int32_t srcLength) const;
00376 
00394   inline int8_t compareBetween(int32_t start,
00395             int32_t limit,
00396             const UnicodeString& srcText,
00397             int32_t srcStart,
00398             int32_t srcLimit) const;
00399 
00417   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00418 
00438   inline int8_t compareCodePointOrder(int32_t start,
00439                                       int32_t length,
00440                                       const UnicodeString& srcText) const;
00441 
00463    inline int8_t compareCodePointOrder(int32_t start,
00464                                        int32_t length,
00465                                        const UnicodeString& srcText,
00466                                        int32_t srcStart,
00467                                        int32_t srcLength) const;
00468 
00487   inline int8_t compareCodePointOrder(const UChar *srcChars,
00488                                       int32_t srcLength) const;
00489 
00509   inline int8_t compareCodePointOrder(int32_t start,
00510                                       int32_t length,
00511                                       const UChar *srcChars) const;
00512 
00534   inline int8_t compareCodePointOrder(int32_t start,
00535                                       int32_t length,
00536                                       const UChar *srcChars,
00537                                       int32_t srcStart,
00538                                       int32_t srcLength) const;
00539 
00561   inline int8_t compareCodePointOrderBetween(int32_t start,
00562                                              int32_t limit,
00563                                              const UnicodeString& srcText,
00564                                              int32_t srcStart,
00565                                              int32_t srcLimit) const;
00566 
00585   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00586 
00607   inline int8_t caseCompare(int32_t start,
00608          int32_t length,
00609          const UnicodeString& srcText,
00610          uint32_t options) const;
00611 
00634   inline int8_t caseCompare(int32_t start,
00635          int32_t length,
00636          const UnicodeString& srcText,
00637          int32_t srcStart,
00638          int32_t srcLength,
00639          uint32_t options) const;
00640 
00660   inline int8_t caseCompare(const UChar *srcChars,
00661          int32_t srcLength,
00662          uint32_t options) const;
00663 
00684   inline int8_t caseCompare(int32_t start,
00685          int32_t length,
00686          const UChar *srcChars,
00687          uint32_t options) const;
00688 
00711   inline int8_t caseCompare(int32_t start,
00712          int32_t length,
00713          const UChar *srcChars,
00714          int32_t srcStart,
00715          int32_t srcLength,
00716          uint32_t options) const;
00717 
00740   inline int8_t caseCompareBetween(int32_t start,
00741             int32_t limit,
00742             const UnicodeString& srcText,
00743             int32_t srcStart,
00744             int32_t srcLimit,
00745             uint32_t options) const;
00746 
00754   inline UBool startsWith(const UnicodeString& text) const;
00755 
00766   inline UBool startsWith(const UnicodeString& srcText,
00767             int32_t srcStart,
00768             int32_t srcLength) const;
00769 
00778   inline UBool startsWith(const UChar *srcChars,
00779             int32_t srcLength) const;
00780 
00790   inline UBool startsWith(const UChar *srcChars,
00791             int32_t srcStart,
00792             int32_t srcLength) const;
00793 
00801   inline UBool endsWith(const UnicodeString& text) const;
00802 
00813   inline UBool endsWith(const UnicodeString& srcText,
00814           int32_t srcStart,
00815           int32_t srcLength) const;
00816 
00825   inline UBool endsWith(const UChar *srcChars,
00826           int32_t srcLength) const;
00827 
00838   inline UBool endsWith(const UChar *srcChars,
00839           int32_t srcStart,
00840           int32_t srcLength) const;
00841 
00842 
00843   /* Searching - bitwise only */
00844 
00853   inline int32_t indexOf(const UnicodeString& text) const;
00854 
00864   inline int32_t indexOf(const UnicodeString& text,
00865               int32_t start) const;
00866 
00878   inline int32_t indexOf(const UnicodeString& text,
00879               int32_t start,
00880               int32_t length) const;
00881 
00898   inline int32_t indexOf(const UnicodeString& srcText,
00899               int32_t srcStart,
00900               int32_t srcLength,
00901               int32_t start,
00902               int32_t length) const;
00903 
00915   inline int32_t indexOf(const UChar *srcChars,
00916               int32_t srcLength,
00917               int32_t start) const;
00918 
00931   inline int32_t indexOf(const UChar *srcChars,
00932               int32_t srcLength,
00933               int32_t start,
00934               int32_t length) const;
00935 
00952   int32_t indexOf(const UChar *srcChars,
00953               int32_t srcStart,
00954               int32_t srcLength,
00955               int32_t start,
00956               int32_t length) const;
00957 
00965   inline int32_t indexOf(UChar c) const;
00966 
00975   inline int32_t indexOf(UChar32 c) const;
00976 
00985   inline int32_t indexOf(UChar c,
00986               int32_t start) const;
00987 
00997   inline int32_t indexOf(UChar32 c,
00998               int32_t start) const;
00999 
01010   inline int32_t indexOf(UChar c,
01011               int32_t start,
01012               int32_t length) const;
01013 
01025   inline int32_t indexOf(UChar32 c,
01026               int32_t start,
01027               int32_t length) const;
01028 
01037   inline int32_t lastIndexOf(const UnicodeString& text) const;
01038 
01048   inline int32_t lastIndexOf(const UnicodeString& text,
01049               int32_t start) const;
01050 
01062   inline int32_t lastIndexOf(const UnicodeString& text,
01063               int32_t start,
01064               int32_t length) const;
01065 
01082   inline int32_t lastIndexOf(const UnicodeString& srcText,
01083               int32_t srcStart,
01084               int32_t srcLength,
01085               int32_t start,
01086               int32_t length) const;
01087 
01098   inline int32_t lastIndexOf(const UChar *srcChars,
01099               int32_t srcLength,
01100               int32_t start) const;
01101 
01114   inline int32_t lastIndexOf(const UChar *srcChars,
01115               int32_t srcLength,
01116               int32_t start,
01117               int32_t length) const;
01118 
01135   int32_t lastIndexOf(const UChar *srcChars,
01136               int32_t srcStart,
01137               int32_t srcLength,
01138               int32_t start,
01139               int32_t length) const;
01140 
01148   inline int32_t lastIndexOf(UChar c) const;
01149 
01158   inline int32_t lastIndexOf(UChar32 c) const;
01159 
01168   inline int32_t lastIndexOf(UChar c,
01169               int32_t start) const;
01170 
01180   inline int32_t lastIndexOf(UChar32 c,
01181               int32_t start) const;
01182 
01193   inline int32_t lastIndexOf(UChar c,
01194               int32_t start,
01195               int32_t length) const;
01196 
01208   inline int32_t lastIndexOf(UChar32 c,
01209               int32_t start,
01210               int32_t length) const;
01211 
01212 
01213   /* Character access */
01214 
01223   inline UChar charAt(int32_t offset) const;
01224 
01232   inline UChar operator[] (int32_t offset) const;
01233 
01245   inline UChar32 char32At(int32_t offset) const;
01246 
01262   inline int32_t getChar32Start(int32_t offset) const;
01263 
01280   inline int32_t getChar32Limit(int32_t offset) const;
01281 
01332   int32_t moveIndex32(int32_t index, int32_t delta) const;
01333 
01334   /* Substring extraction */
01335 
01351   inline void extract(int32_t start,
01352            int32_t length,
01353            UChar *dst,
01354            int32_t dstStart = 0) const;
01355 
01377   int32_t
01378   extract(UChar *dest, int32_t destCapacity,
01379           UErrorCode &errorCode) const;
01380 
01391   inline void extract(int32_t start,
01392            int32_t length,
01393            UnicodeString& target) const;
01394 
01406   inline void extractBetween(int32_t start,
01407               int32_t limit,
01408               UChar *dst,
01409               int32_t dstStart = 0) const;
01410 
01420   virtual void extractBetween(int32_t start,
01421               int32_t limit,
01422               UnicodeString& target) const;
01423 
01445   int32_t extract(int32_t start,
01446            int32_t startLength,
01447            char *target,
01448            int32_t targetCapacity,
01449            enum EInvariant inv) const;
01450 
01451 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
01452 
01472   int32_t extract(int32_t start,
01473            int32_t startLength,
01474            char *target,
01475            uint32_t targetLength) const;
01476 
01477 #endif
01478 
01479 #if !UCONFIG_NO_CONVERSION
01480 
01506   inline int32_t extract(int32_t start,
01507                  int32_t startLength,
01508                  char *target,
01509                  const char *codepage = 0) const;
01510 
01540   int32_t extract(int32_t start,
01541            int32_t startLength,
01542            char *target,
01543            uint32_t targetLength,
01544            const char *codepage) const;
01545 
01563   int32_t extract(char *dest, int32_t destCapacity,
01564                   UConverter *cnv,
01565                   UErrorCode &errorCode) const;
01566 
01567 #endif
01568 
01579   void toUTF8(ByteSink &sink) const;
01580 
01581 #if U_HAVE_STD_STRING
01582 
01595   template<typename StringClass>
01596   StringClass &toUTF8String(StringClass &result) const {
01597     StringByteSink<StringClass> sbs(&result);
01598     toUTF8(sbs);
01599     return result;
01600   }
01601 
01602 #endif
01603 
01619   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
01620 
01621   /* Length operations */
01622 
01631   inline int32_t length(void) const;
01632 
01646   int32_t
01647   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01648 
01672   UBool
01673   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01674 
01680   inline UBool isEmpty(void) const;
01681 
01691   inline int32_t getCapacity(void) const;
01692 
01693   /* Other operations */
01694 
01700   inline int32_t hashCode(void) const;
01701 
01713   inline UBool isBogus(void) const;
01714 
01715 
01716   //========================================
01717   // Write operations
01718   //========================================
01719 
01720   /* Assignment operations */
01721 
01729   UnicodeString &operator=(const UnicodeString &srcText);
01730 
01751   UnicodeString &fastCopyFrom(const UnicodeString &src);
01752 
01760   inline UnicodeString& operator= (UChar ch);
01761 
01769   inline UnicodeString& operator= (UChar32 ch);
01770 
01782   inline UnicodeString& setTo(const UnicodeString& srcText,
01783                int32_t srcStart);
01784 
01798   inline UnicodeString& setTo(const UnicodeString& srcText,
01799                int32_t srcStart,
01800                int32_t srcLength);
01801 
01810   inline UnicodeString& setTo(const UnicodeString& srcText);
01811 
01820   inline UnicodeString& setTo(const UChar *srcChars,
01821                int32_t srcLength);
01822 
01831   UnicodeString& setTo(UChar srcChar);
01832 
01841   UnicodeString& setTo(UChar32 srcChar);
01842 
01863   UnicodeString &setTo(UBool isTerminated,
01864                        const UChar *text,
01865                        int32_t textLength);
01866 
01886   UnicodeString &setTo(UChar *buffer,
01887                        int32_t buffLength,
01888                        int32_t buffCapacity);
01889 
01930   void setToBogus();
01931 
01939   UnicodeString& setCharAt(int32_t offset,
01940                UChar ch);
01941 
01942 
01943   /* Append operations */
01944 
01952  inline  UnicodeString& operator+= (UChar ch);
01953 
01961  inline  UnicodeString& operator+= (UChar32 ch);
01962 
01971   inline UnicodeString& operator+= (const UnicodeString& srcText);
01972 
01987   inline UnicodeString& append(const UnicodeString& srcText,
01988             int32_t srcStart,
01989             int32_t srcLength);
01990 
01998   inline UnicodeString& append(const UnicodeString& srcText);
01999 
02013   inline UnicodeString& append(const UChar *srcChars,
02014             int32_t srcStart,
02015             int32_t srcLength);
02016 
02025   inline UnicodeString& append(const UChar *srcChars,
02026             int32_t srcLength);
02027 
02034   inline UnicodeString& append(UChar srcChar);
02035 
02042   inline UnicodeString& append(UChar32 srcChar);
02043 
02044 
02045   /* Insert operations */
02046 
02060   inline UnicodeString& insert(int32_t start,
02061             const UnicodeString& srcText,
02062             int32_t srcStart,
02063             int32_t srcLength);
02064 
02073   inline UnicodeString& insert(int32_t start,
02074             const UnicodeString& srcText);
02075 
02089   inline UnicodeString& insert(int32_t start,
02090             const UChar *srcChars,
02091             int32_t srcStart,
02092             int32_t srcLength);
02093 
02103   inline UnicodeString& insert(int32_t start,
02104             const UChar *srcChars,
02105             int32_t srcLength);
02106 
02115   inline UnicodeString& insert(int32_t start,
02116             UChar srcChar);
02117 
02126   inline UnicodeString& insert(int32_t start,
02127             UChar32 srcChar);
02128 
02129 
02130   /* Replace operations */
02131 
02149   UnicodeString& replace(int32_t start,
02150              int32_t length,
02151              const UnicodeString& srcText,
02152              int32_t srcStart,
02153              int32_t srcLength);
02154 
02167   UnicodeString& replace(int32_t start,
02168              int32_t length,
02169              const UnicodeString& srcText);
02170 
02188   UnicodeString& replace(int32_t start,
02189              int32_t length,
02190              const UChar *srcChars,
02191              int32_t srcStart,
02192              int32_t srcLength);
02193 
02206   inline UnicodeString& replace(int32_t start,
02207              int32_t length,
02208              const UChar *srcChars,
02209              int32_t srcLength);
02210 
02222   inline UnicodeString& replace(int32_t start,
02223              int32_t length,
02224              UChar srcChar);
02225 
02237   inline UnicodeString& replace(int32_t start,
02238              int32_t length,
02239              UChar32 srcChar);
02240 
02250   inline UnicodeString& replaceBetween(int32_t start,
02251                 int32_t limit,
02252                 const UnicodeString& srcText);
02253 
02268   inline UnicodeString& replaceBetween(int32_t start,
02269                 int32_t limit,
02270                 const UnicodeString& srcText,
02271                 int32_t srcStart,
02272                 int32_t srcLimit);
02273 
02284   virtual void handleReplaceBetween(int32_t start,
02285                                     int32_t limit,
02286                                     const UnicodeString& text);
02287 
02293   virtual UBool hasMetaData() const;
02294 
02310   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02311 
02312   /* Search and replace operations */
02313 
02322   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02323                 const UnicodeString& newText);
02324 
02336   inline UnicodeString& findAndReplace(int32_t start,
02337                 int32_t length,
02338                 const UnicodeString& oldText,
02339                 const UnicodeString& newText);
02340 
02358   UnicodeString& findAndReplace(int32_t start,
02359                 int32_t length,
02360                 const UnicodeString& oldText,
02361                 int32_t oldStart,
02362                 int32_t oldLength,
02363                 const UnicodeString& newText,
02364                 int32_t newStart,
02365                 int32_t newLength);
02366 
02367 
02368   /* Remove operations */
02369 
02375   inline UnicodeString& remove(void);
02376 
02385   inline UnicodeString& remove(int32_t start,
02386                                int32_t length = (int32_t)INT32_MAX);
02387 
02396   inline UnicodeString& removeBetween(int32_t start,
02397                                       int32_t limit = (int32_t)INT32_MAX);
02398 
02399 
02400   /* Length operations */
02401 
02413   UBool padLeading(int32_t targetLength,
02414                     UChar padChar = 0x0020);
02415 
02427   UBool padTrailing(int32_t targetLength,
02428                      UChar padChar = 0x0020);
02429 
02436   inline UBool truncate(int32_t targetLength);
02437 
02443   UnicodeString& trim(void);
02444 
02445 
02446   /* Miscellaneous operations */
02447 
02453   inline UnicodeString& reverse(void);
02454 
02463   inline UnicodeString& reverse(int32_t start,
02464              int32_t length);
02465 
02472   UnicodeString& toUpper(void);
02473 
02481   UnicodeString& toUpper(const Locale& locale);
02482 
02489   UnicodeString& toLower(void);
02490 
02498   UnicodeString& toLower(const Locale& locale);
02499 
02500 #if !UCONFIG_NO_BREAK_ITERATION
02501 
02528   UnicodeString &toTitle(BreakIterator *titleIter);
02529 
02557   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02558 
02590   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02591 
02592 #endif
02593 
02605   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02606 
02607   //========================================
02608   // Access to the internal buffer
02609   //========================================
02610 
02654   UChar *getBuffer(int32_t minCapacity);
02655 
02676   void releaseBuffer(int32_t newLength=-1);
02677 
02708   inline const UChar *getBuffer() const;
02709 
02743   inline const UChar *getTerminatedBuffer();
02744 
02745   //========================================
02746   // Constructors
02747   //========================================
02748 
02752   UnicodeString();
02753 
02765   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02766 
02772   UnicodeString(UChar ch);
02773 
02779   UnicodeString(UChar32 ch);
02780 
02787   UnicodeString(const UChar *text);
02788 
02796   UnicodeString(const UChar *text,
02797         int32_t textLength);
02798 
02818   UnicodeString(UBool isTerminated,
02819                 const UChar *text,
02820                 int32_t textLength);
02821 
02840   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02841 
02842 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
02843 
02850   UnicodeString(const char *codepageData);
02851 
02858   UnicodeString(const char *codepageData, int32_t dataLength);
02859 
02860 #endif
02861 
02862 #if !UCONFIG_NO_CONVERSION
02863 
02881   UnicodeString(const char *codepageData, const char *codepage);
02882 
02900   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
02901 
02923   UnicodeString(
02924         const char *src, int32_t srcLength,
02925         UConverter *cnv,
02926         UErrorCode &errorCode);
02927 
02928 #endif
02929 
02954   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
02955 
02956 
02962   UnicodeString(const UnicodeString& that);
02963 
02970   UnicodeString(const UnicodeString& src, int32_t srcStart);
02971 
02979   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
02980 
02997   virtual Replaceable *clone() const;
02998 
03002   virtual ~UnicodeString();
03003 
03017   static UnicodeString fromUTF8(const StringPiece &utf8);
03018 
03030   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
03031 
03032   /* Miscellaneous operations */
03033 
03068   UnicodeString unescape() const;
03069 
03089   UChar32 unescapeAt(int32_t &offset) const;
03090 
03096   static UClassID U_EXPORT2 getStaticClassID();
03097 
03103   virtual UClassID getDynamicClassID() const;
03104 
03105   //========================================
03106   // Implementation methods
03107   //========================================
03108 
03109 protected:
03114   virtual int32_t getLength() const;
03115 
03121   virtual UChar getCharAt(int32_t offset) const;
03122 
03128   virtual UChar32 getChar32At(int32_t offset) const;
03129 
03130 private:
03131   // For char* constructors. Could be made public.
03132   UnicodeString &setToUTF8(const StringPiece &utf8);
03133   // For extract(char*).
03134   // We could make a toUTF8(target, capacity, errorCode) public but not
03135   // this version: New API will be cleaner if we make callers create substrings
03136   // rather than having start+length on every method,
03137   // and it should take a UErrorCode&.
03138   int32_t
03139   toUTF8(int32_t start, int32_t len,
03140          char *target, int32_t capacity) const;
03141 
03142 
03143   inline int8_t
03144   doCompare(int32_t start,
03145            int32_t length,
03146            const UnicodeString& srcText,
03147            int32_t srcStart,
03148            int32_t srcLength) const;
03149 
03150   int8_t doCompare(int32_t start,
03151            int32_t length,
03152            const UChar *srcChars,
03153            int32_t srcStart,
03154            int32_t srcLength) const;
03155 
03156   inline int8_t
03157   doCompareCodePointOrder(int32_t start,
03158                           int32_t length,
03159                           const UnicodeString& srcText,
03160                           int32_t srcStart,
03161                           int32_t srcLength) const;
03162 
03163   int8_t doCompareCodePointOrder(int32_t start,
03164                                  int32_t length,
03165                                  const UChar *srcChars,
03166                                  int32_t srcStart,
03167                                  int32_t srcLength) const;
03168 
03169   inline int8_t
03170   doCaseCompare(int32_t start,
03171                 int32_t length,
03172                 const UnicodeString &srcText,
03173                 int32_t srcStart,
03174                 int32_t srcLength,
03175                 uint32_t options) const;
03176 
03177   int8_t
03178   doCaseCompare(int32_t start,
03179                 int32_t length,
03180                 const UChar *srcChars,
03181                 int32_t srcStart,
03182                 int32_t srcLength,
03183                 uint32_t options) const;
03184 
03185   int32_t doIndexOf(UChar c,
03186             int32_t start,
03187             int32_t length) const;
03188 
03189   int32_t doIndexOf(UChar32 c,
03190                         int32_t start,
03191                         int32_t length) const;
03192 
03193   int32_t doLastIndexOf(UChar c,
03194                 int32_t start,
03195                 int32_t length) const;
03196 
03197   int32_t doLastIndexOf(UChar32 c,
03198                             int32_t start,
03199                             int32_t length) const;
03200 
03201   void doExtract(int32_t start,
03202          int32_t length,
03203          UChar *dst,
03204          int32_t dstStart) const;
03205 
03206   inline void doExtract(int32_t start,
03207          int32_t length,
03208          UnicodeString& target) const;
03209 
03210   inline UChar doCharAt(int32_t offset)  const;
03211 
03212   UnicodeString& doReplace(int32_t start,
03213                int32_t length,
03214                const UnicodeString& srcText,
03215                int32_t srcStart,
03216                int32_t srcLength);
03217 
03218   UnicodeString& doReplace(int32_t start,
03219                int32_t length,
03220                const UChar *srcChars,
03221                int32_t srcStart,
03222                int32_t srcLength);
03223 
03224   UnicodeString& doReverse(int32_t start,
03225                int32_t length);
03226 
03227   // calculate hash code
03228   int32_t doHashCode(void) const;
03229 
03230   // get pointer to start of array
03231   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03232   inline UChar* getArrayStart(void);
03233   inline const UChar* getArrayStart(void) const;
03234 
03235   // A UnicodeString object (not necessarily its current buffer)
03236   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03237   inline UBool isWritable() const;
03238 
03239   // Is the current buffer writable?
03240   inline UBool isBufferWritable() const;
03241 
03242   // None of the following does releaseArray().
03243   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03244   inline void setToEmpty();                  // sets fFlags=kShortString
03245   inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
03246   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03247 
03248   // allocate the array; result may be fStackBuffer
03249   // sets refCount to 1 if appropriate
03250   // sets fArray, fCapacity, and fFlags
03251   // returns boolean for success or failure
03252   UBool allocate(int32_t capacity);
03253 
03254   // release the array if owned
03255   void releaseArray(void);
03256 
03257   // turn a bogus string into an empty one
03258   void unBogus();
03259 
03260   // implements assigment operator, copy constructor, and fastCopyFrom()
03261   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03262 
03263   // Pin start and limit to acceptable values.
03264   inline void pinIndex(int32_t& start) const;
03265   inline void pinIndices(int32_t& start,
03266                          int32_t& length) const;
03267 
03268 #if !UCONFIG_NO_CONVERSION
03269 
03270   /* Internal extract() using UConverter. */
03271   int32_t doExtract(int32_t start, int32_t length,
03272                     char *dest, int32_t destCapacity,
03273                     UConverter *cnv,
03274                     UErrorCode &errorCode) const;
03275 
03276   /*
03277    * Real constructor for converting from codepage data.
03278    * It assumes that it is called with !fRefCounted.
03279    *
03280    * If <code>codepage==0</code>, then the default converter
03281    * is used for the platform encoding.
03282    * If <code>codepage</code> is an empty string (<code>""</code>),
03283    * then a simple conversion is performed on the codepage-invariant
03284    * subset ("invariant characters") of the platform encoding. See utypes.h.
03285    */
03286   void doCodepageCreate(const char *codepageData,
03287                         int32_t dataLength,
03288                         const char *codepage);
03289 
03290   /*
03291    * Worker function for creating a UnicodeString from
03292    * a codepage string using a UConverter.
03293    */
03294   void
03295   doCodepageCreate(const char *codepageData,
03296                    int32_t dataLength,
03297                    UConverter *converter,
03298                    UErrorCode &status);
03299 
03300 #endif
03301 
03302   /*
03303    * This function is called when write access to the array
03304    * is necessary.
03305    *
03306    * We need to make a copy of the array if
03307    * the buffer is read-only, or
03308    * the buffer is refCounted (shared), and refCount>1, or
03309    * the buffer is too small.
03310    *
03311    * Return FALSE if memory could not be allocated.
03312    */
03313   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03314                             int32_t growCapacity = -1,
03315                             UBool doCopyArray = TRUE,
03316                             int32_t **pBufferToDelete = 0,
03317                             UBool forceClone = FALSE);
03318 
03319   // common function for case mappings
03320   UnicodeString &
03321   caseMap(BreakIterator *titleIter,
03322           const char *locale,
03323           uint32_t options,
03324           int32_t toWhichCase);
03325 
03326   // ref counting
03327   void addRef(void);
03328   int32_t removeRef(void);
03329   int32_t refCount(void) const;
03330 
03331   // constants
03332   enum {
03333     // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
03334     // 32-bit pointers: 4+1+1+13*2 = 32 bytes
03335     // 64-bit pointers: 8+1+1+15*2 = 40 bytes
03336     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
03337     kInvalidUChar=0xffff, // invalid UChar index
03338     kGrowSize=128, // grow size for this buffer
03339     kInvalidHashCode=0, // invalid hash code
03340     kEmptyHashCode=1, // hash code for empty string
03341 
03342     // bit flag values for fFlags
03343     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03344     kUsingStackBuffer=2,// fArray==fStackBuffer
03345     kRefCounted=4,      // there is a refCount field before the characters in fArray
03346     kBufferIsReadonly=8,// do not write to this buffer
03347     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03348                         // and releaseBuffer(newLength) must be called
03349 
03350     // combined values for convenience
03351     kShortString=kUsingStackBuffer,
03352     kLongString=kRefCounted,
03353     kReadonlyAlias=kBufferIsReadonly,
03354     kWritableAlias=0
03355   };
03356 
03357   friend class StringThreadTest;
03358 
03359   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03360   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03361 
03362   /*
03363    * The following are all the class fields that are stored
03364    * in each UnicodeString object.
03365    * Note that UnicodeString has virtual functions,
03366    * therefore there is an implicit vtable pointer
03367    * as the first real field.
03368    * The fields should be aligned such that no padding is
03369    * necessary, mostly by having larger types first.
03370    * On 32-bit machines, the size should be 32 bytes,
03371    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03372    */
03373   // (implicit) *vtable;
03374   int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
03375   uint8_t   fFlags;         // bit flags: see constants above
03376   union StackBufferOrFields {
03377     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03378     // else fFields is used
03379     UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
03380     struct {
03381       uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
03382       int32_t   fLength;    // number of characters in fArray if >127; else undefined
03383       UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
03384       int32_t   fCapacity;  // sizeof fArray
03385     } fFields;
03386   } fUnion;
03387 };
03388 
03397 U_COMMON_API UnicodeString U_EXPORT2
03398 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03399 
03400 //========================================
03401 // Inline members
03402 //========================================
03403 
03404 //========================================
03405 // Privates
03406 //========================================
03407 
03408 inline void
03409 UnicodeString::pinIndex(int32_t& start) const
03410 {
03411   // pin index
03412   if(start < 0) {
03413     start = 0;
03414   } else if(start > length()) {
03415     start = length();
03416   }
03417 }
03418 
03419 inline void
03420 UnicodeString::pinIndices(int32_t& start,
03421                           int32_t& _length) const
03422 {
03423   // pin indices
03424   int32_t len = length();
03425   if(start < 0) {
03426     start = 0;
03427   } else if(start > len) {
03428     start = len;
03429   }
03430   if(_length < 0) {
03431     _length = 0;
03432   } else if(_length > (len - start)) {
03433     _length = (len - start);
03434   }
03435 }
03436 
03437 inline UChar*
03438 UnicodeString::getArrayStart()
03439 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03440 
03441 inline const UChar*
03442 UnicodeString::getArrayStart() const
03443 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03444 
03445 //========================================
03446 // Read-only implementation methods
03447 //========================================
03448 inline int32_t
03449 UnicodeString::length() const
03450 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03451 
03452 inline int32_t
03453 UnicodeString::getCapacity() const
03454 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03455 
03456 inline int32_t
03457 UnicodeString::hashCode() const
03458 { return doHashCode(); }
03459 
03460 inline UBool
03461 UnicodeString::isBogus() const
03462 { return (UBool)(fFlags & kIsBogus); }
03463 
03464 inline UBool
03465 UnicodeString::isWritable() const
03466 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03467 
03468 inline UBool
03469 UnicodeString::isBufferWritable() const
03470 {
03471   return (UBool)(
03472       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03473       (!(fFlags&kRefCounted) || refCount()==1));
03474 }
03475 
03476 inline const UChar *
03477 UnicodeString::getBuffer() const {
03478   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03479     return 0;
03480   } else if(fFlags&kUsingStackBuffer) {
03481     return fUnion.fStackBuffer;
03482   } else {
03483     return fUnion.fFields.fArray;
03484   }
03485 }
03486 
03487 //========================================
03488 // Read-only alias methods
03489 //========================================
03490 inline int8_t
03491 UnicodeString::doCompare(int32_t start,
03492               int32_t thisLength,
03493               const UnicodeString& srcText,
03494               int32_t srcStart,
03495               int32_t srcLength) const
03496 {
03497   if(srcText.isBogus()) {
03498     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03499   } else {
03500     srcText.pinIndices(srcStart, srcLength);
03501     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03502   }
03503 }
03504 
03505 inline UBool
03506 UnicodeString::operator== (const UnicodeString& text) const
03507 {
03508   if(isBogus()) {
03509     return text.isBogus();
03510   } else {
03511     int32_t len = length(), textLength = text.length();
03512     return
03513       !text.isBogus() &&
03514       len == textLength &&
03515       doCompare(0, len, text, 0, textLength) == 0;
03516   }
03517 }
03518 
03519 inline UBool
03520 UnicodeString::operator!= (const UnicodeString& text) const
03521 { return (! operator==(text)); }
03522 
03523 inline UBool
03524 UnicodeString::operator> (const UnicodeString& text) const
03525 { return doCompare(0, length(), text, 0, text.length()) == 1; }
03526 
03527 inline UBool
03528 UnicodeString::operator< (const UnicodeString& text) const
03529 { return doCompare(0, length(), text, 0, text.length()) == -1; }
03530 
03531 inline UBool
03532 UnicodeString::operator>= (const UnicodeString& text) const
03533 { return doCompare(0, length(), text, 0, text.length()) != -1; }
03534 
03535 inline UBool
03536 UnicodeString::operator<= (const UnicodeString& text) const
03537 { return doCompare(0, length(), text, 0, text.length()) != 1; }
03538 
03539 inline int8_t
03540 UnicodeString::compare(const UnicodeString& text) const
03541 { return doCompare(0, length(), text, 0, text.length()); }
03542 
03543 inline int8_t
03544 UnicodeString::compare(int32_t start,
03545                int32_t _length,
03546                const UnicodeString& srcText) const
03547 { return doCompare(start, _length, srcText, 0, srcText.length()); }
03548 
03549 inline int8_t
03550 UnicodeString::compare(const UChar *srcChars,
03551                int32_t srcLength) const
03552 { return doCompare(0, length(), srcChars, 0, srcLength); }
03553 
03554 inline int8_t
03555 UnicodeString::compare(int32_t start,
03556                int32_t _length,
03557                const UnicodeString& srcText,
03558                int32_t srcStart,
03559                int32_t srcLength) const
03560 { return doCompare(start, _length, srcText, srcStart, srcLength); }
03561 
03562 inline int8_t
03563 UnicodeString::compare(int32_t start,
03564                int32_t _length,
03565                const UChar *srcChars) const
03566 { return doCompare(start, _length, srcChars, 0, _length); }
03567 
03568 inline int8_t
03569 UnicodeString::compare(int32_t start,
03570                int32_t _length,
03571                const UChar *srcChars,
03572                int32_t srcStart,
03573                int32_t srcLength) const
03574 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
03575 
03576 inline int8_t
03577 UnicodeString::compareBetween(int32_t start,
03578                   int32_t limit,
03579                   const UnicodeString& srcText,
03580                   int32_t srcStart,
03581                   int32_t srcLimit) const
03582 { return doCompare(start, limit - start,
03583            srcText, srcStart, srcLimit - srcStart); }
03584 
03585 inline int8_t
03586 UnicodeString::doCompareCodePointOrder(int32_t start,
03587                                        int32_t thisLength,
03588                                        const UnicodeString& srcText,
03589                                        int32_t srcStart,
03590                                        int32_t srcLength) const
03591 {
03592   if(srcText.isBogus()) {
03593     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03594   } else {
03595     srcText.pinIndices(srcStart, srcLength);
03596     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03597   }
03598 }
03599 
03600 inline int8_t
03601 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
03602 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
03603 
03604 inline int8_t
03605 UnicodeString::compareCodePointOrder(int32_t start,
03606                                      int32_t _length,
03607                                      const UnicodeString& srcText) const
03608 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
03609 
03610 inline int8_t
03611 UnicodeString::compareCodePointOrder(const UChar *srcChars,
03612                                      int32_t srcLength) const
03613 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
03614 
03615 inline int8_t
03616 UnicodeString::compareCodePointOrder(int32_t start,
03617                                      int32_t _length,
03618                                      const UnicodeString& srcText,
03619                                      int32_t srcStart,
03620                                      int32_t srcLength) const
03621 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
03622 
03623 inline int8_t
03624 UnicodeString::compareCodePointOrder(int32_t start,
03625                                      int32_t _length,
03626                                      const UChar *srcChars) const
03627 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
03628 
03629 inline int8_t
03630 UnicodeString::compareCodePointOrder(int32_t start,
03631                                      int32_t _length,
03632                                      const UChar *srcChars,
03633                                      int32_t srcStart,
03634                                      int32_t srcLength) const
03635 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
03636 
03637 inline int8_t
03638 UnicodeString::compareCodePointOrderBetween(int32_t start,
03639                                             int32_t limit,
03640                                             const UnicodeString& srcText,
03641                                             int32_t srcStart,
03642                                             int32_t srcLimit) const
03643 { return doCompareCodePointOrder(start, limit - start,
03644            srcText, srcStart, srcLimit - srcStart); }
03645 
03646 inline int8_t
03647 UnicodeString::doCaseCompare(int32_t start,
03648                              int32_t thisLength,
03649                              const UnicodeString &srcText,
03650                              int32_t srcStart,
03651                              int32_t srcLength,
03652                              uint32_t options) const
03653 {
03654   if(srcText.isBogus()) {
03655     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03656   } else {
03657     srcText.pinIndices(srcStart, srcLength);
03658     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
03659   }
03660 }
03661 
03662 inline int8_t
03663 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
03664   return doCaseCompare(0, length(), text, 0, text.length(), options);
03665 }
03666 
03667 inline int8_t
03668 UnicodeString::caseCompare(int32_t start,
03669                            int32_t _length,
03670                            const UnicodeString &srcText,
03671                            uint32_t options) const {
03672   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
03673 }
03674 
03675 inline int8_t
03676 UnicodeString::caseCompare(const UChar *srcChars,
03677                            int32_t srcLength,
03678                            uint32_t options) const {
03679   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
03680 }
03681 
03682 inline int8_t
03683 UnicodeString::caseCompare(int32_t start,
03684                            int32_t _length,
03685                            const UnicodeString &srcText,
03686                            int32_t srcStart,
03687                            int32_t srcLength,
03688                            uint32_t options) const {
03689   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
03690 }
03691 
03692 inline int8_t
03693 UnicodeString::caseCompare(int32_t start,
03694                            int32_t _length,
03695                            const UChar *srcChars,
03696                            uint32_t options) const {
03697   return doCaseCompare(start, _length, srcChars, 0, _length, options);
03698 }
03699 
03700 inline int8_t
03701 UnicodeString::caseCompare(int32_t start,
03702                            int32_t _length,
03703                            const UChar *srcChars,
03704                            int32_t srcStart,
03705                            int32_t srcLength,
03706                            uint32_t options) const {
03707   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
03708 }
03709 
03710 inline int8_t
03711 UnicodeString::caseCompareBetween(int32_t start,
03712                                   int32_t limit,
03713                                   const UnicodeString &srcText,
03714                                   int32_t srcStart,
03715                                   int32_t srcLimit,
03716                                   uint32_t options) const {
03717   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
03718 }
03719 
03720 inline int32_t
03721 UnicodeString::indexOf(const UnicodeString& srcText,
03722                int32_t srcStart,
03723                int32_t srcLength,
03724                int32_t start,
03725                int32_t _length) const
03726 {
03727   if(!srcText.isBogus()) {
03728     srcText.pinIndices(srcStart, srcLength);
03729     if(srcLength > 0) {
03730       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03731     }
03732   }
03733   return -1;
03734 }
03735 
03736 inline int32_t
03737 UnicodeString::indexOf(const UnicodeString& text) const
03738 { return indexOf(text, 0, text.length(), 0, length()); }
03739 
03740 inline int32_t
03741 UnicodeString::indexOf(const UnicodeString& text,
03742                int32_t start) const {
03743   pinIndex(start);
03744   return indexOf(text, 0, text.length(), start, length() - start);
03745 }
03746 
03747 inline int32_t
03748 UnicodeString::indexOf(const UnicodeString& text,
03749                int32_t start,
03750                int32_t _length) const
03751 { return indexOf(text, 0, text.length(), start, _length); }
03752 
03753 inline int32_t
03754 UnicodeString::indexOf(const UChar *srcChars,
03755                int32_t srcLength,
03756                int32_t start) const {
03757   pinIndex(start);
03758   return indexOf(srcChars, 0, srcLength, start, length() - start);
03759 }
03760 
03761 inline int32_t
03762 UnicodeString::indexOf(const UChar *srcChars,
03763                int32_t srcLength,
03764                int32_t start,
03765                int32_t _length) const
03766 { return indexOf(srcChars, 0, srcLength, start, _length); }
03767 
03768 inline int32_t
03769 UnicodeString::indexOf(UChar c,
03770                int32_t start,
03771                int32_t _length) const
03772 { return doIndexOf(c, start, _length); }
03773 
03774 inline int32_t
03775 UnicodeString::indexOf(UChar32 c,
03776                int32_t start,
03777                int32_t _length) const
03778 { return doIndexOf(c, start, _length); }
03779 
03780 inline int32_t
03781 UnicodeString::indexOf(UChar c) const
03782 { return doIndexOf(c, 0, length()); }
03783 
03784 inline int32_t
03785 UnicodeString::indexOf(UChar32 c) const
03786 { return indexOf(c, 0, length()); }
03787 
03788 inline int32_t
03789 UnicodeString::indexOf(UChar c,
03790                int32_t start) const {
03791   pinIndex(start);
03792   return doIndexOf(c, start, length() - start);
03793 }
03794 
03795 inline int32_t
03796 UnicodeString::indexOf(UChar32 c,
03797                int32_t start) const {
03798   pinIndex(start);
03799   return indexOf(c, start, length() - start);
03800 }
03801 
03802 inline int32_t
03803 UnicodeString::lastIndexOf(const UChar *srcChars,
03804                int32_t srcLength,
03805                int32_t start,
03806                int32_t _length) const
03807 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
03808 
03809 inline int32_t
03810 UnicodeString::lastIndexOf(const UChar *srcChars,
03811                int32_t srcLength,
03812                int32_t start) const {
03813   pinIndex(start);
03814   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
03815 }
03816 
03817 inline int32_t
03818 UnicodeString::lastIndexOf(const UnicodeString& srcText,
03819                int32_t srcStart,
03820                int32_t srcLength,
03821                int32_t start,
03822                int32_t _length) const
03823 {
03824   if(!srcText.isBogus()) {
03825     srcText.pinIndices(srcStart, srcLength);
03826     if(srcLength > 0) {
03827       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03828     }
03829   }
03830   return -1;
03831 }
03832 
03833 inline int32_t
03834 UnicodeString::lastIndexOf(const UnicodeString& text,
03835                int32_t start,
03836                int32_t _length) const
03837 { return lastIndexOf(text, 0, text.length(), start, _length); }
03838 
03839 inline int32_t
03840 UnicodeString::lastIndexOf(const UnicodeString& text,
03841                int32_t start) const {
03842   pinIndex(start);
03843   return lastIndexOf(text, 0, text.length(), start, length() - start);
03844 }
03845 
03846 inline int32_t
03847 UnicodeString::lastIndexOf(const UnicodeString& text) const
03848 { return lastIndexOf(text, 0, text.length(), 0, length()); }
03849 
03850 inline int32_t
03851 UnicodeString::lastIndexOf(UChar c,
03852                int32_t start,
03853                int32_t _length) const
03854 { return doLastIndexOf(c, start, _length); }
03855 
03856 inline int32_t
03857 UnicodeString::lastIndexOf(UChar32 c,
03858                int32_t start,
03859                int32_t _length) const {
03860   return doLastIndexOf(c, start, _length);
03861 }
03862 
03863 inline int32_t
03864 UnicodeString::lastIndexOf(UChar c) const
03865 { return doLastIndexOf(c, 0, length()); }
03866 
03867 inline int32_t
03868 UnicodeString::lastIndexOf(UChar32 c) const {
03869   return lastIndexOf(c, 0, length());
03870 }
03871 
03872 inline int32_t
03873 UnicodeString::lastIndexOf(UChar c,
03874                int32_t start) const {
03875   pinIndex(start);
03876   return doLastIndexOf(c, start, length() - start);
03877 }
03878 
03879 inline int32_t
03880 UnicodeString::lastIndexOf(UChar32 c,
03881                int32_t start) const {
03882   pinIndex(start);
03883   return lastIndexOf(c, start, length() - start);
03884 }
03885 
03886 inline UBool
03887 UnicodeString::startsWith(const UnicodeString& text) const
03888 { return compare(0, text.length(), text, 0, text.length()) == 0; }
03889 
03890 inline UBool
03891 UnicodeString::startsWith(const UnicodeString& srcText,
03892               int32_t srcStart,
03893               int32_t srcLength) const
03894 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
03895 
03896 inline UBool
03897 UnicodeString::startsWith(const UChar *srcChars,
03898               int32_t srcLength) const
03899 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
03900 
03901 inline UBool
03902 UnicodeString::startsWith(const UChar *srcChars,
03903               int32_t srcStart,
03904               int32_t srcLength) const
03905 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
03906 
03907 inline UBool
03908 UnicodeString::endsWith(const UnicodeString& text) const
03909 { return doCompare(length() - text.length(), text.length(),
03910            text, 0, text.length()) == 0; }
03911 
03912 inline UBool
03913 UnicodeString::endsWith(const UnicodeString& srcText,
03914             int32_t srcStart,
03915             int32_t srcLength) const {
03916   srcText.pinIndices(srcStart, srcLength);
03917   return doCompare(length() - srcLength, srcLength,
03918                    srcText, srcStart, srcLength) == 0;
03919 }
03920 
03921 inline UBool
03922 UnicodeString::endsWith(const UChar *srcChars,
03923             int32_t srcLength) const {
03924   if(srcLength < 0) {
03925     srcLength = u_strlen(srcChars);
03926   }
03927   return doCompare(length() - srcLength, srcLength,
03928                    srcChars, 0, srcLength) == 0;
03929 }
03930 
03931 inline UBool
03932 UnicodeString::endsWith(const UChar *srcChars,
03933             int32_t srcStart,
03934             int32_t srcLength) const {
03935   if(srcLength < 0) {
03936     srcLength = u_strlen(srcChars + srcStart);
03937   }
03938   return doCompare(length() - srcLength, srcLength,
03939                    srcChars, srcStart, srcLength) == 0;
03940 }
03941 
03942 //========================================
03943 // replace
03944 //========================================
03945 inline UnicodeString&
03946 UnicodeString::replace(int32_t start,
03947                int32_t _length,
03948                const UnicodeString& srcText)
03949 { return doReplace(start, _length, srcText, 0, srcText.length()); }
03950 
03951 inline UnicodeString&
03952 UnicodeString::replace(int32_t start,
03953                int32_t _length,
03954                const UnicodeString& srcText,
03955                int32_t srcStart,
03956                int32_t srcLength)
03957 { return doReplace(start, _length, srcText, srcStart, srcLength); }
03958 
03959 inline UnicodeString&
03960 UnicodeString::replace(int32_t start,
03961                int32_t _length,
03962                const UChar *srcChars,
03963                int32_t srcLength)
03964 { return doReplace(start, _length, srcChars, 0, srcLength); }
03965 
03966 inline UnicodeString&
03967 UnicodeString::replace(int32_t start,
03968                int32_t _length,
03969                const UChar *srcChars,
03970                int32_t srcStart,
03971                int32_t srcLength)
03972 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
03973 
03974 inline UnicodeString&
03975 UnicodeString::replace(int32_t start,
03976                int32_t _length,
03977                UChar srcChar)
03978 { return doReplace(start, _length, &srcChar, 0, 1); }
03979 
03980 inline UnicodeString&
03981 UnicodeString::replace(int32_t start,
03982                int32_t _length,
03983                UChar32 srcChar) {
03984   UChar buffer[U16_MAX_LENGTH];
03985   int32_t count = 0;
03986   UBool isError = FALSE;
03987   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
03988   return doReplace(start, _length, buffer, 0, count);
03989 }
03990 
03991 inline UnicodeString&
03992 UnicodeString::replaceBetween(int32_t start,
03993                   int32_t limit,
03994                   const UnicodeString& srcText)
03995 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
03996 
03997 inline UnicodeString&
03998 UnicodeString::replaceBetween(int32_t start,
03999                   int32_t limit,
04000                   const UnicodeString& srcText,
04001                   int32_t srcStart,
04002                   int32_t srcLimit)
04003 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
04004 
04005 inline UnicodeString&
04006 UnicodeString::findAndReplace(const UnicodeString& oldText,
04007                   const UnicodeString& newText)
04008 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
04009             newText, 0, newText.length()); }
04010 
04011 inline UnicodeString&
04012 UnicodeString::findAndReplace(int32_t start,
04013                   int32_t _length,
04014                   const UnicodeString& oldText,
04015                   const UnicodeString& newText)
04016 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
04017             newText, 0, newText.length()); }
04018 
04019 // ============================
04020 // extract
04021 // ============================
04022 inline void
04023 UnicodeString::doExtract(int32_t start,
04024              int32_t _length,
04025              UnicodeString& target) const
04026 { target.replace(0, target.length(), *this, start, _length); }
04027 
04028 inline void
04029 UnicodeString::extract(int32_t start,
04030                int32_t _length,
04031                UChar *target,
04032                int32_t targetStart) const
04033 { doExtract(start, _length, target, targetStart); }
04034 
04035 inline void
04036 UnicodeString::extract(int32_t start,
04037                int32_t _length,
04038                UnicodeString& target) const
04039 { doExtract(start, _length, target); }
04040 
04041 #if !UCONFIG_NO_CONVERSION
04042 
04043 inline int32_t
04044 UnicodeString::extract(int32_t start,
04045                int32_t _length,
04046                char *dst,
04047                const char *codepage) const
04048 
04049 {
04050   // This dstSize value will be checked explicitly
04051   return extract(start, _length, dst, dst!=0 ? (((size_t)dst >= ((size_t)-1) - UINT32_MAX) ? (((char*)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage);
04052 }
04053 
04054 #endif
04055 
04056 inline void
04057 UnicodeString::extractBetween(int32_t start,
04058                   int32_t limit,
04059                   UChar *dst,
04060                   int32_t dstStart) const {
04061   pinIndex(start);
04062   pinIndex(limit);
04063   doExtract(start, limit - start, dst, dstStart);
04064 }
04065 
04066 inline UChar
04067 UnicodeString::doCharAt(int32_t offset) const
04068 {
04069   if((uint32_t)offset < (uint32_t)length()) {
04070     return getArrayStart()[offset];
04071   } else {
04072     return kInvalidUChar;
04073   }
04074 }
04075 
04076 inline UChar
04077 UnicodeString::charAt(int32_t offset) const
04078 { return doCharAt(offset); }
04079 
04080 inline UChar
04081 UnicodeString::operator[] (int32_t offset) const
04082 { return doCharAt(offset); }
04083 
04084 inline UChar32
04085 UnicodeString::char32At(int32_t offset) const
04086 {
04087   int32_t len = length();
04088   if((uint32_t)offset < (uint32_t)len) {
04089     const UChar *array = getArrayStart();
04090     UChar32 c;
04091     U16_GET(array, 0, offset, len, c);
04092     return c;
04093   } else {
04094     return kInvalidUChar;
04095   }
04096 }
04097 
04098 inline int32_t
04099 UnicodeString::getChar32Start(int32_t offset) const {
04100   if((uint32_t)offset < (uint32_t)length()) {
04101     const UChar *array = getArrayStart();
04102     U16_SET_CP_START(array, 0, offset);
04103     return offset;
04104   } else {
04105     return 0;
04106   }
04107 }
04108 
04109 inline int32_t
04110 UnicodeString::getChar32Limit(int32_t offset) const {
04111   int32_t len = length();
04112   if((uint32_t)offset < (uint32_t)len) {
04113     const UChar *array = getArrayStart();
04114     U16_SET_CP_LIMIT(array, 0, offset, len);
04115     return offset;
04116   } else {
04117     return len;
04118   }
04119 }
04120 
04121 inline UBool
04122 UnicodeString::isEmpty() const {
04123   return fShortLength == 0;
04124 }
04125 
04126 //========================================
04127 // Write implementation methods
04128 //========================================
04129 inline void
04130 UnicodeString::setLength(int32_t len) {
04131   if(len <= 127) {
04132     fShortLength = (int8_t)len;
04133   } else {
04134     fShortLength = (int8_t)-1;
04135     fUnion.fFields.fLength = len;
04136   }
04137 }
04138 
04139 inline void
04140 UnicodeString::setToEmpty() {
04141   fShortLength = 0;
04142   fFlags = kShortString;
04143 }
04144 
04145 inline void
04146 UnicodeString::setToStackBuffer(int32_t len) {
04147   fShortLength = (int8_t)len;
04148   fFlags = kShortString;
04149 }
04150 
04151 inline void
04152 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
04153   setLength(len);
04154   fUnion.fFields.fArray = array;
04155   fUnion.fFields.fCapacity = capacity;
04156 }
04157 
04158 inline const UChar *
04159 UnicodeString::getTerminatedBuffer() {
04160   if(!isWritable()) {
04161     return 0;
04162   } else {
04163     UChar *array = getArrayStart();
04164     int32_t len = length();
04165 #ifndef U_VALGRIND
04166     if(len < getCapacity() && array[len] == 0) {
04167       return array;
04168     }
04169 #endif
04170     if(cloneArrayIfNeeded(len+1)) {
04171       array = getArrayStart();
04172       array[len] = 0;
04173       return array;
04174     } else {
04175       return 0;
04176     }
04177   }
04178 }
04179 
04180 inline UnicodeString&
04181 UnicodeString::operator= (UChar ch)
04182 { return doReplace(0, length(), &ch, 0, 1); }
04183 
04184 inline UnicodeString&
04185 UnicodeString::operator= (UChar32 ch)
04186 { return replace(0, length(), ch); }
04187 
04188 inline UnicodeString&
04189 UnicodeString::setTo(const UnicodeString& srcText,
04190              int32_t srcStart,
04191              int32_t srcLength)
04192 {
04193   unBogus();
04194   return doReplace(0, length(), srcText, srcStart, srcLength);
04195 }
04196 
04197 inline UnicodeString&
04198 UnicodeString::setTo(const UnicodeString& srcText,
04199              int32_t srcStart)
04200 {
04201   unBogus();
04202   srcText.pinIndex(srcStart);
04203   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
04204 }
04205 
04206 inline UnicodeString&
04207 UnicodeString::setTo(const UnicodeString& srcText)
04208 {
04209   unBogus();
04210   return doReplace(0, length(), srcText, 0, srcText.length());
04211 }
04212 
04213 inline UnicodeString&
04214 UnicodeString::setTo(const UChar *srcChars,
04215              int32_t srcLength)
04216 {
04217   unBogus();
04218   return doReplace(0, length(), srcChars, 0, srcLength);
04219 }
04220 
04221 inline UnicodeString&
04222 UnicodeString::setTo(UChar srcChar)
04223 {
04224   unBogus();
04225   return doReplace(0, length(), &srcChar, 0, 1);
04226 }
04227 
04228 inline UnicodeString&
04229 UnicodeString::setTo(UChar32 srcChar)
04230 {
04231   unBogus();
04232   return replace(0, length(), srcChar);
04233 }
04234 
04235 inline UnicodeString&
04236 UnicodeString::append(const UnicodeString& srcText,
04237               int32_t srcStart,
04238               int32_t srcLength)
04239 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
04240 
04241 inline UnicodeString&
04242 UnicodeString::append(const UnicodeString& srcText)
04243 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04244 
04245 inline UnicodeString&
04246 UnicodeString::append(const UChar *srcChars,
04247               int32_t srcStart,
04248               int32_t srcLength)
04249 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
04250 
04251 inline UnicodeString&
04252 UnicodeString::append(const UChar *srcChars,
04253               int32_t srcLength)
04254 { return doReplace(length(), 0, srcChars, 0, srcLength); }
04255 
04256 inline UnicodeString&
04257 UnicodeString::append(UChar srcChar)
04258 { return doReplace(length(), 0, &srcChar, 0, 1); }
04259 
04260 inline UnicodeString&
04261 UnicodeString::append(UChar32 srcChar) {
04262   UChar buffer[U16_MAX_LENGTH];
04263   int32_t _length = 0;
04264   UBool isError = FALSE;
04265   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
04266   return doReplace(length(), 0, buffer, 0, _length);
04267 }
04268 
04269 inline UnicodeString&
04270 UnicodeString::operator+= (UChar ch)
04271 { return doReplace(length(), 0, &ch, 0, 1); }
04272 
04273 inline UnicodeString&
04274 UnicodeString::operator+= (UChar32 ch) {
04275   return append(ch);
04276 }
04277 
04278 inline UnicodeString&
04279 UnicodeString::operator+= (const UnicodeString& srcText)
04280 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04281 
04282 inline UnicodeString&
04283 UnicodeString::insert(int32_t start,
04284               const UnicodeString& srcText,
04285               int32_t srcStart,
04286               int32_t srcLength)
04287 { return doReplace(start, 0, srcText, srcStart, srcLength); }
04288 
04289 inline UnicodeString&
04290 UnicodeString::insert(int32_t start,
04291               const UnicodeString& srcText)
04292 { return doReplace(start, 0, srcText, 0, srcText.length()); }
04293 
04294 inline UnicodeString&
04295 UnicodeString::insert(int32_t start,
04296               const UChar *srcChars,
04297               int32_t srcStart,
04298               int32_t srcLength)
04299 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
04300 
04301 inline UnicodeString&
04302 UnicodeString::insert(int32_t start,
04303               const UChar *srcChars,
04304               int32_t srcLength)
04305 { return doReplace(start, 0, srcChars, 0, srcLength); }
04306 
04307 inline UnicodeString&
04308 UnicodeString::insert(int32_t start,
04309               UChar srcChar)
04310 { return doReplace(start, 0, &srcChar, 0, 1); }
04311 
04312 inline UnicodeString&
04313 UnicodeString::insert(int32_t start,
04314               UChar32 srcChar)
04315 { return replace(start, 0, srcChar); }
04316 
04317 
04318 inline UnicodeString&
04319 UnicodeString::remove()
04320 {
04321   // remove() of a bogus string makes the string empty and non-bogus
04322   if(isBogus()) {
04323     unBogus();
04324   } else {
04325     setLength(0);
04326   }
04327   return *this;
04328 }
04329 
04330 inline UnicodeString&
04331 UnicodeString::remove(int32_t start,
04332              int32_t _length)
04333 {
04334     if(start <= 0 && _length == INT32_MAX) {
04335         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
04336         return remove();
04337     }
04338     return doReplace(start, _length, NULL, 0, 0);
04339 }
04340 
04341 inline UnicodeString&
04342 UnicodeString::removeBetween(int32_t start,
04343                 int32_t limit)
04344 { return doReplace(start, limit - start, NULL, 0, 0); }
04345 
04346 inline UBool
04347 UnicodeString::truncate(int32_t targetLength)
04348 {
04349   if(isBogus() && targetLength == 0) {
04350     // truncate(0) of a bogus string makes the string empty and non-bogus
04351     unBogus();
04352     return FALSE;
04353   } else if((uint32_t)targetLength < (uint32_t)length()) {
04354     setLength(targetLength);
04355     return TRUE;
04356   } else {
04357     return FALSE;
04358   }
04359 }
04360 
04361 inline UnicodeString&
04362 UnicodeString::reverse()
04363 { return doReverse(0, length()); }
04364 
04365 inline UnicodeString&
04366 UnicodeString::reverse(int32_t start,
04367                int32_t _length)
04368 { return doReverse(start, _length); }
04369 
04370 U_NAMESPACE_END
04371 
04372 #endif

Generated on 18 Sep 2013 for ICU 4.2.1 by  doxygen 1.4.7