uniset.h

Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 * Copyright (C) 1999-2009, International Business Machines Corporation
00004 * and others. All Rights Reserved.
00005 ***************************************************************************
00006 *   Date        Name        Description
00007 *   10/20/99    alan        Creation.
00008 ***************************************************************************
00009 */
00010 
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013 
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017 
00023 U_NAMESPACE_BEGIN
00024 
00025 class BMPSet;
00026 class ParsePosition;
00027 class SymbolTable;
00028 class UnicodeSetStringSpan;
00029 class UVector;
00030 class RuleCharacterIterator;
00031 
00272 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00273 
00274     int32_t len; // length of list used; 0 <= len <= capacity
00275     int32_t capacity; // capacity of list
00276     UChar32* list; // MUST be terminated with HIGH
00277     BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
00278     UChar32* buffer; // internal buffer, may be NULL
00279     int32_t bufferCapacity; // capacity of buffer
00280     int32_t patLen;
00281 
00291     UChar *pat;
00292     UVector* strings; // maintained in sorted order
00293     UnicodeSetStringSpan *stringSpan;
00294 
00295 private:
00296     enum { // constants
00297         kIsBogus = 1       // This set is bogus (i.e. not valid)
00298     };
00299     uint8_t fFlags;         // Bit flag (see constants above)
00300 public:
00310     inline UBool isBogus(void) const;
00311     
00328     void setToBogus();
00329 
00330 public:
00331 
00332     enum {
00337         MIN_VALUE = 0,
00338 
00343         MAX_VALUE = 0x10ffff
00344     };
00345 
00346     //----------------------------------------------------------------
00347     // Constructors &c
00348     //----------------------------------------------------------------
00349 
00350 public:
00351 
00356     UnicodeSet();
00357 
00366     UnicodeSet(UChar32 start, UChar32 end);
00367 
00376     UnicodeSet(const UnicodeString& pattern,
00377                UErrorCode& status);
00378 
00391     UnicodeSet(const UnicodeString& pattern,
00392                uint32_t options,
00393                const SymbolTable* symbols,
00394                UErrorCode& status);
00395 
00409     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00410                uint32_t options,
00411                const SymbolTable* symbols,
00412                UErrorCode& status);
00413 
00418     UnicodeSet(const UnicodeSet& o);
00419 
00424     virtual ~UnicodeSet();
00425 
00431     UnicodeSet& operator=(const UnicodeSet& o);
00432 
00444     virtual UBool operator==(const UnicodeSet& o) const;
00445 
00451     UBool operator!=(const UnicodeSet& o) const;
00452 
00462     virtual UnicodeFunctor* clone() const;
00463 
00471     virtual int32_t hashCode(void) const;
00472 
00481     inline static UnicodeSet *fromUSet(USet *uset);
00482 
00491     inline static const UnicodeSet *fromUSet(const USet *uset);
00492     
00500     inline USet *toUSet();
00501 
00502 
00510     inline const USet * toUSet() const;
00511 
00512 
00513     //----------------------------------------------------------------
00514     // Freezable API
00515     //----------------------------------------------------------------
00516 
00525     inline UBool isFrozen() const;
00526 
00540     UnicodeFunctor *freeze();
00541 
00550     UnicodeFunctor *cloneAsThawed() const;
00551 
00552     //----------------------------------------------------------------
00553     // Public API
00554     //----------------------------------------------------------------
00555 
00566     UnicodeSet& set(UChar32 start, UChar32 end);
00567 
00573     static UBool resemblesPattern(const UnicodeString& pattern,
00574                                   int32_t pos);
00575 
00588     UnicodeSet& applyPattern(const UnicodeString& pattern,
00589                              UErrorCode& status);
00590 
00607     UnicodeSet& applyPattern(const UnicodeString& pattern,
00608                              uint32_t options,
00609                              const SymbolTable* symbols,
00610                              UErrorCode& status);
00611 
00643     UnicodeSet& applyPattern(const UnicodeString& pattern,
00644                              ParsePosition& pos,
00645                              uint32_t options,
00646                              const SymbolTable* symbols,
00647                              UErrorCode& status);
00648 
00662     virtual UnicodeString& toPattern(UnicodeString& result,
00663                              UBool escapeUnprintable = FALSE) const;
00664 
00687     UnicodeSet& applyIntPropertyValue(UProperty prop,
00688                                       int32_t value,
00689                                       UErrorCode& ec);
00690 
00720     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00721                                    const UnicodeString& value,
00722                                    UErrorCode& ec);
00723 
00732     virtual int32_t size(void) const;
00733 
00740     virtual UBool isEmpty(void) const;
00741 
00749     virtual UBool contains(UChar32 c) const;
00750 
00759     virtual UBool contains(UChar32 start, UChar32 end) const;
00760 
00768     UBool contains(const UnicodeString& s) const;
00769 
00777     virtual UBool containsAll(const UnicodeSet& c) const;
00778 
00786     UBool containsAll(const UnicodeString& s) const;
00787 
00796     UBool containsNone(UChar32 start, UChar32 end) const;
00797 
00805     UBool containsNone(const UnicodeSet& c) const;
00806 
00814     UBool containsNone(const UnicodeString& s) const;
00815 
00824     inline UBool containsSome(UChar32 start, UChar32 end) const;
00825 
00833     inline UBool containsSome(const UnicodeSet& s) const;
00834 
00842     inline UBool containsSome(const UnicodeString& s) const;
00843 
00862     int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00863 
00881     int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00882 
00901     int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00902 
00920     int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00921 
00926     virtual UMatchDegree matches(const Replaceable& text,
00927                          int32_t& offset,
00928                          int32_t limit,
00929                          UBool incremental);
00930 
00931 private:
00953     static int32_t matchRest(const Replaceable& text,
00954                              int32_t start, int32_t limit,
00955                              const UnicodeString& s);
00956 
00966     int32_t findCodePoint(UChar32 c) const;
00967 
00968 public:
00969 
00977     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00978 
00987     int32_t indexOf(UChar32 c) const;
00988 
00998     UChar32 charAt(int32_t index) const;
00999 
01014     virtual UnicodeSet& add(UChar32 start, UChar32 end);
01015 
01023     UnicodeSet& add(UChar32 c);
01024 
01036     UnicodeSet& add(const UnicodeString& s);
01037 
01038  private:
01044     static int32_t getSingleCP(const UnicodeString& s);
01045 
01046     void _add(const UnicodeString& s);
01047 
01048  public:
01057     UnicodeSet& addAll(const UnicodeString& s);
01058 
01067     UnicodeSet& retainAll(const UnicodeString& s);
01068 
01077     UnicodeSet& complementAll(const UnicodeString& s);
01078 
01087     UnicodeSet& removeAll(const UnicodeString& s);
01088 
01097     static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01098 
01099 
01107     static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01108 
01122     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01123 
01124 
01130     UnicodeSet& retain(UChar32 c);
01131 
01145     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01146 
01154     UnicodeSet& remove(UChar32 c);
01155 
01165     UnicodeSet& remove(const UnicodeString& s);
01166 
01174     virtual UnicodeSet& complement(void);
01175 
01190     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01191 
01199     UnicodeSet& complement(UChar32 c);
01200 
01211     UnicodeSet& complement(const UnicodeString& s);
01212 
01225     virtual UnicodeSet& addAll(const UnicodeSet& c);
01226 
01238     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01239 
01251     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01252 
01263     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01264 
01271     virtual UnicodeSet& clear(void);
01272 
01298     UnicodeSet& closeOver(int32_t attribute);
01299 
01306     virtual UnicodeSet &removeAllStrings();
01307 
01315     virtual int32_t getRangeCount(void) const;
01316 
01324     virtual UChar32 getRangeStart(int32_t index) const;
01325 
01333     virtual UChar32 getRangeEnd(int32_t index) const;
01334 
01383     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01384 
01391     virtual UnicodeSet& compact();
01392 
01404     static UClassID U_EXPORT2 getStaticClassID(void);
01405 
01414     virtual UClassID getDynamicClassID(void) const;
01415 
01416 private:
01417 
01418     // Private API for the USet API
01419 
01420     friend class USetAccess;
01421 
01422     int32_t getStringCount() const;
01423 
01424     const UnicodeString* getString(int32_t index) const;
01425 
01426     //----------------------------------------------------------------
01427     // RuleBasedTransliterator support
01428     //----------------------------------------------------------------
01429 
01430 private:
01431 
01437     virtual UBool matchesIndexValue(uint8_t v) const;
01438 
01439 private:
01440 
01441     //----------------------------------------------------------------
01442     // Implementation: Clone as thawed (see ICU4J Freezable)
01443     //----------------------------------------------------------------
01444 
01445     UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
01446 
01447     //----------------------------------------------------------------
01448     // Implementation: Pattern parsing
01449     //----------------------------------------------------------------
01450 
01451     void applyPattern(RuleCharacterIterator& chars,
01452                       const SymbolTable* symbols,
01453                       UnicodeString& rebuiltPat,
01454                       uint32_t options,
01455                       UErrorCode& ec);
01456 
01457     //----------------------------------------------------------------
01458     // Implementation: Utility methods
01459     //----------------------------------------------------------------
01460 
01461     void ensureCapacity(int32_t newLen, UErrorCode& ec);
01462 
01463     void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
01464 
01465     void swapBuffers(void);
01466 
01467     UBool allocateStrings(UErrorCode &status);
01468 
01469     UnicodeString& _toPattern(UnicodeString& result,
01470                               UBool escapeUnprintable) const;
01471 
01472     UnicodeString& _generatePattern(UnicodeString& result,
01473                                     UBool escapeUnprintable) const;
01474 
01475     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01476 
01477     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01478 
01479     //----------------------------------------------------------------
01480     // Implementation: Fundamental operators
01481     //----------------------------------------------------------------
01482 
01483     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01484 
01485     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01486 
01487     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01488 
01494     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01495                                           int32_t pos);
01496 
01497     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01498                                           int32_t iterOpts);
01499 
01538     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01539                                      ParsePosition& ppos,
01540                                      UErrorCode &ec);
01541 
01542     void applyPropertyPattern(RuleCharacterIterator& chars,
01543                               UnicodeString& rebuiltPat,
01544                               UErrorCode& ec);
01545 
01546     static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01547 
01552     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01553 
01563     void applyFilter(Filter filter,
01564                      void* context,
01565                      int32_t src,
01566                      UErrorCode &status);
01567 
01571     void setPattern(const UnicodeString& newPat);
01575     void releasePattern();
01576 
01577     friend class UnicodeSetIterator;
01578 };
01579 
01580 
01581 
01582 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01583     return !operator==(o);
01584 }
01585 
01586 inline UBool UnicodeSet::isFrozen() const {
01587     return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01588 }
01589 
01590 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01591     return !containsNone(start, end);
01592 }
01593 
01594 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01595     return !containsNone(s);
01596 }
01597 
01598 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01599     return !containsNone(s);
01600 }
01601 
01602 inline UBool UnicodeSet::isBogus() const {
01603     return (UBool)(fFlags & kIsBogus);
01604 }
01605 
01606 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
01607     return reinterpret_cast<UnicodeSet *>(uset);
01608 }
01609 
01610 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
01611     return reinterpret_cast<const UnicodeSet *>(uset);
01612 }
01613 
01614 inline USet *UnicodeSet::toUSet() {
01615     return reinterpret_cast<USet *>(this);
01616 }
01617 
01618 inline const USet *UnicodeSet::toUSet() const {
01619     return reinterpret_cast<const USet *>(this);
01620 }
01621 
01622 U_NAMESPACE_END
01623 
01624 #endif

Generated on 18 Sep 2013 for ICU 4.2.1 by  doxygen 1.4.7