Archive / std.check.src.charset.hpp

ru en cn

с начала прошло: 9104 д. 19:03
страница обновлена: 04.12.2024 19:03

std.check.src.charset.hpp: src/charset.hpp

/*!
  \file charset.hpp
  \brief Header file which provides CharSet class and related enumerations.
*/
#ifndef CHECK_CHARSET_HPP
#define CHECK_CHARSET_HPP

#include 
#include 
#include 
#include 
#include 
#include "typeinfo.hpp"

#undef CHECK_UNSIGNIFY_CHAR
#if (defined(CHECK_GCC) && defined(__CHAR_UNSIGNED__)) || \
  (defined(CHECK_MSC) && defined(_CHAR_UNSIGNED))
#  define CHECK_UNSIGNIFY_CHAR(SYMBOL) (SYMBOL)
#else
#  define CHECK_UNSIGNIFY_CHAR(SYMBOL) (static_cast(SYMBOL) - CHAR_MIN)
#endif


namespace Check
{


/*!
  \ingroup text
  \relates CharSet

  \brief This enum specifies how CharSet should construct a character set from
  a string.

  \see CharSet::CharSet()
*/
enum CharSetSyntax
{
  PlainText, //!< Implicitly include every symbol of a string in a character set.
  ScanSet //!< Treat a string as scanf() scan-set. Square brackets are omitted.
};

/*!
  \ingroup text
  \relates CharSet

  \brief This enum specifies which characters CharSet::toString() should return.

  \see CharSet::toString()
*/
enum CharCategory
{
  AnyChar, //!< Return all symbols from a character set.
  PrintChar //!< Put in a string only printable symbols.
};

/*!
  \ingroup text

  \brief The %CharSet class provides an ASCII character set.

  This class stores a number of \c char 's (usually 8-bit), where each \c char
  corresponds one ASCII character, and provides fast lookup of a symbol.

  Behind the scenes, in order to be compact the %CharSet class uses
  \e bit \e sets.
*/
class CharSet
{
private:
  friend class CharRef;
  class CharRef
  {
  private:
    CharSet &obj;
    const unsigned char chr;

  public:
    CharRef(CharSet &charSet, const char symbol)
      : obj(charSet), chr(CHECK_UNSIGNIFY_CHAR(symbol))
    { /* Do Nothing */ }
    operator bool() const { return obj.contains(chr); }
    CharRef &operator = (const CharRef &ref)
    { obj.chars.set(chr, ref.obj.contains(ref.chr)); return *this; }
    CharRef &operator = (const bool value)
    { obj.chars.set(chr, value); return *this; }
  };


  std::bitset chars;

public:
  /*!
    \var NotFound
    Value which indicates unsuccessful search of symbols of a character set in
    a string.

    \see indexIn(), lastIndexIn(), invIndexIn(), invLastIndexIn()
  */
  enum { NotFound = -1 };

  /*!
    Constructs an empty character set.

    \see clear()
  */
  CharSet() { /* Do Nothing */ }

  /*!
    Constructs a copy of \a other.

    This operation occurs in constant time, because CharSet stores only ASCII
    characters and uses bit sets.

    \see operator = ()
  */
  CharSet(const CharSet &other) : chars(other.chars) { /* Do Nothing */ }

  //! Constructs a character set containing the character \a symbol only.
  explicit CharSet(const char symbol) { insert(symbol); }

  /*!
    Constructs a character set initialized with elements of \a line. Either
    copies symbols of \a line into a new set, or builds it counting upon
    scan-set rules depending on \a syntax.

    \throws InternalError

    \see CharSetSyntax
  */
  explicit CharSet(const char *line, const CharSetSyntax syntax = PlainText);

  /*!
    Constructs a character set initialized with symbols in a range from
    \a fromChar to \a toChar, including bounds itself.
  */
  CharSet(const char fromChar, const char toChar);

  /*!
    Removes all elements from the character set.

    \see remove()
  */
  CharSet &clear() { chars.reset(); return *this; }

  /*!
    Returns true if the character set contains no elements;
    otherwise returns false.

    \see size()
  */
  bool isEmpty() const { return chars.none(); }

  /*!
    Returns true if the character set contains every possible to store in
    \c char symbol; otherwise returns false.

    \see size()
  */
  bool isFull() const { return chars.count() == chars.size(); }

  //! Returns the number of symbols in the character set.
  size_t size() const { return chars.count(); }

  /*!
    Inserts a new character \a symbol. If \a symbol already exists in the
    character set, nothing happens.

    \see operator << (), remove(), and contains()
  */
  CharSet &insert(const char symbol)
  { chars.set(CHECK_UNSIGNIFY_CHAR(symbol)); return *this; }

  /*!
    Removes any occurrence of character \a symbol from the set.

    \see insert() and contains()
  */
  CharSet &remove(const char symbol)
  { chars.reset(CHECK_UNSIGNIFY_CHAR(symbol)); return *this; }

  /*!
    Inverses state of \a symbol in the character set. If \a symbol exists,
    it will be removed, otherwise it will be inserted.

    \see insert() and remove()
  */
  CharSet &inverse(const char symbol)
  { chars.flip(CHECK_UNSIGNIFY_CHAR(symbol)); return *this; }

  /*!
    Returns true if the character set contains \a symbol; otherwise returns
    false.

    \see insert() and remove()
  */
  bool contains(const char symbol) const
  { return chars.test(CHECK_UNSIGNIFY_CHAR(symbol)); }

  /*!
    Inverses state of every possible character in the character set. All
    existing symbols will be removed and all missing ones will be inserted.
  */
  CharSet &inverse() { chars.flip(); return *this; }

  /*!
    Inserts all the items in the \a other set into this set.

    \see operator |= (), intersect(), subtract(), and difference()
  */
  CharSet &unite(const CharSet &other) { chars |= other.chars; return *this; }

  /*!
    Removes any items in this set that don't exist in the \a other set.

    \see operator &= (), unite(), subtract(), and difference()
  */
  CharSet &intersect(const CharSet &other)
  { chars &= other.chars; return *this; }

  /*!
    Removes all the items in the \a other set from this set.

    \see operator -= (), unite(), intersect(), and difference()
  */
  CharSet &subtract(const CharSet &other)
  { chars &= ~other.chars; return *this; }

  /*!
    Makes the symmetric difference of this set with \a other.

    \see operator ^= (), unite(), intersect(), and subtract()
  */
  CharSet &difference(const CharSet &other)
  { chars ^= other.chars; return *this; }

  /*!
    Returns a \c std::string object with the data contained in this CharSet. The
    order of elements is lexicographical.

    Elements of the character set could be restricted to a specified
    \a category.

    \see CharCategory
  */
  std::string toString(const CharCategory category = AnyChar) const;

  /*!
    This is an overloaded member function, provided for convenience.

    Same as \e contains(symbol).
  */
  bool operator [] (const char symbol) const { return contains(symbol); }

  /*!
    Returns state of the character \a symbol in the set as a modifiable
    reference.

    The return value is of type CharRef, a helper class for CharSet. When you
    get an object of type CharRef, you can use it as if it were a \c bool \c &.
    If you assign to it, the assignment will apply to state of the character in
    the CharSet from which you got the reference.
  */
  CharRef operator [] (const char symbol) { return CharRef(*this, symbol); }

  /*!
    Inserts a new character \a symbol and returns a reference to this set.
    If \a symbol already exists in the set, the set is left unchanged.

    \see insert()
  */
  CharSet &operator << (const char symbol) { return insert(symbol); }

  /*!
    Removes \a symbol from the character set.
    If \a symbol does not exist in the set, the set is left unchanged.

    \see remove()
  */
  CharSet &operator >> (const char symbol) { return remove(symbol); }

  /*!
    Returns true if \a other is equal to this set; otherwise returns false.

    Two sets are considered equal if they contain the same elements.

    \see operator != ()
  */
  bool operator == (const CharSet &other) const { return chars == other.chars; }

  /*!
    Returns true if \a other is not equal to this set; otherwise returns false.

    Two sets are considered equal if they contain the same elements.

    \see operator == ()
  */
  bool operator != (const CharSet &other) const { return chars != other.chars; }

  /*!
    Syntactic sugar for inverse().

    \see operator &= (), operator |= (), and operator -= ()
  */
  CharSet operator ~ () const { return CharSet(*this).inverse(); }

  /*!
    Returns the union of this character set and \a other.

    \see unite(), operator |= (), operator & (), and operator - ()
  */
  CharSet operator + (const CharSet &other) const
  { return CharSet(*this) += other; }

  /*!
    Returns the set difference of this character set with \a other.

    \see subtract(), operator -= (), operator | (), and operator & ()
  */
  CharSet operator - (const CharSet &other) const
  { return CharSet(*this) -= other; }

  /*!
    Returns the intersection of this set and \a other.

    \see intersect(), operator &= (), operator | (), and operator - ()
  */
  CharSet operator * (const CharSet &other) const
  { return CharSet(*this) *= other; }

  /*!
    Returns the intersection of this set and \a other.

    \see intersect(), operator &= (), operator | (), and operator - ()
  */
  CharSet operator & (const CharSet &other) const
  { return CharSet(*this) &= other; }

  /*!
    Returns the union of this character set and \a other.

    \see unite(), operator |= (), operator & (), and operator - ()
  */
  CharSet operator | (const CharSet &other) const
  { return CharSet(*this) |= other; }

  /*!
    Returns the symmetric difference of this character set and \a other.

    \see operator ^= (), operator & (), operator | (), and operator - ()
  */
  CharSet operator ^ (const CharSet &other) const
  { return CharSet(*this) ^= other; }

  //! Assigns \a other to this character set and returns a reference to this set.
  CharSet &operator = (const CharSet &other)
  { chars = other.chars; return *this; }

  /*!
    Syntactic sugar for \e unite(other).

    \see operator | (), operator &= (), and operator -= ()
  */
  CharSet &operator += (const CharSet &other) { return unite(other); }

  /*!
    Syntactic sugar for \e subtract(other).

    \see operator - (), operator |= (), and operator &= ()
  */
  CharSet &operator -= (const CharSet &other) { return subtract(other); }

  /*!
    Syntactic sugar for \e intersect(other).

    \see operator & (), operator |= (), and operator -= ()
  */
  CharSet &operator *= (const CharSet &other) { return intersect(other); }

  /*!
    Syntactic sugar for \e intersect(other).

    \see operator & (), operator |= (), and operator -= ()
  */
  CharSet &operator &= (const CharSet &other) { return intersect(other); }

  /*!
    Syntactic sugar for \e unite(other).

    \see operator | (), operator &= (), and operator -= ()
  */
  CharSet &operator |= (const CharSet &other) { return unite(other); }

  /*!
    Syntactic sugar for \e difference(other).

    \see operator ^ (), operator |= (), and operator &= ()
  */
  CharSet &operator ^= (const CharSet &other) { return difference(other); }

  /*!
    Returns the index position of the first occurrence of any character from
    the character set in the string \a line, searching forward in the first
    \a length symbols of \a line.
    Returns NotFound if characters could not be found.

    \see invIndexIn() and lastIndexIn()
  */
  int indexIn(const char *line, const size_t length = MaxSizeValue) const;

  /*!
    Returns the index position of the last occurrence of any character from
    the character set in the string \a line, searching backward in the first
    \a length symbols of \a line.
    Returns NotFound if characters could not be found.

    \see invLastIndexIn() and indexIn()
  */
  int lastIndexIn(const char *line, const size_t length = MaxSizeValue) const;

  /*!
    This is an overloaded member function, provided for convenience.

    Returns the index position of the first occurrence of any character from
    the character set in the string \a line, searching forward in the first
    \a length symbols of \a line from index position \a offset.
    Returns NotFound if characters could not be found.

    \see invIndexIn() and lastIndexIn()
  */
  int indexIn(
    const std::string &line, const size_t offset = 0,
    const size_t length = MaxSizeValue) const;

  /*!
    This is an overloaded member function, provided for convenience.

    Returns the index position of the last occurrence of any character from
    the character set in the string \a line, searching backward in the first
    \a length symbols of \a line from index position \a offset.
    Returns NotFound if characters could not be found.

    \see invLastIndexIn() and indexIn()
  */
  int lastIndexIn(
    const std::string &line, const size_t offset = 0,
    const size_t length = MaxSizeValue) const;

  /*!
    This is an overloaded member function, provided for convenience.

    Exactly alike indexIn(), but looks for any character that does not belongs
    to the character set.
  */
  int invIndexIn(const char *line, const size_t length = MaxSizeValue) const
  { return (~*this).indexIn(line, length); }

  /*!
    This is an overloaded member function, provided for convenience.

    Exactly alike lastIndexIn(), but looks for any character that does not
    belongs to the character set.
  */
  int invLastIndexIn(const char *line, const size_t length = MaxSizeValue) const
  { return (~*this).lastIndexIn(line, length); }

  /*!
    This is an overloaded member function, provided for convenience.

    Exactly alike indexIn(), but looks for any character that does not belongs
    to the character set.
  */
  int invIndexIn(
    const std::string &line, const size_t offset = 0,
    const size_t length = MaxSizeValue) const
  { return (~*this).indexIn(line, offset, length); }

  /*!
    This is an overloaded member function, provided for convenience.

    Exactly alike lastIndexIn(), but looks for any character that does not
    belongs to the character set.
  */
  int invLastIndexIn(
    const std::string &line, const size_t offset = 0,
    const size_t length = MaxSizeValue) const
  { return (~*this).lastIndexIn(line, offset, length); }

  /*!
    Tests first \a length symbols of \a line for belonging to the character set.

    \see matchAnyIn()
  */
  bool matchAllIn(const char *line, const size_t length = MaxSizeValue) const
  { return invIndexIn(line, length) == -1; }

  /*!
    Tests is there in first \a length symbols of \a line symbol belonging to
    the character set.

    \see matchAllIn()
  */
  bool matchAnyIn(const char *line, const size_t length = MaxSizeValue) const
  { return indexIn(line, length) != -1; }

  /*!
    This is an overloaded member function, provided for convenience.

    Tests first \a length symbols of \a line from origin \a offset for
    belonging to the character set.

    \see matchAnyIn()
  */
  bool matchAllIn(
    const std::string &line, const size_t offset = 0,
    const size_t length = MaxSizeValue) const
  { return invIndexIn(line, offset, length) == -1; }

  /*!
    This is an overloaded member function, provided for convenience.

    Tests is there in first \a length symbols of \a line from origin \a offset
    character belonging to the character set.

    \see matchAllIn()
  */
  bool matchAnyIn(
    const std::string &line, const size_t offset = 0,
    const size_t length = MaxSizeValue) const
  { return indexIn(line, offset, length) != -1; }
};


extern const CharSet NoneChars; //!< Empty CharSet.
extern const CharSet AnyChars; //!< Full CharSet.
extern const CharSet NewLineChars; //!< End of line symbols. \see CharSet
extern const CharSet BlankChars; //!< Space and tab symbols. \see CharSet
extern const CharSet SpaceChars; //!< All whitespace symbols. \see CharSet
extern const CharSet LetterChars; //!< Latin letters. \see CharSet
extern const CharSet DigitChars; //!< Decimal digits. \see CharSet


/*!
  Ensures that \a symbol is a valid ASCII character, then tests
  \e NewLineChars.contains(symbol).

  \see NewLineChars
*/
inline bool isNewLine(const int symbol)
{ return isascii(symbol) && NewLineChars.contains(symbol); }

/*!
  Returns true if \a symbol is \e EOF or belongs to NewLineChars; otherwise
  returns false.

  \see NewLineChars
*/
inline bool isEndOfLine(const int symbol)
{ return symbol == EOF || isNewLine(symbol); }

/*!
  Ensures that \a symbol is a valid ASCII character, then tests
  \e BlankChars.contains(symbol).

  \see BlankChars
*/
inline bool isBlank(const int symbol)
{ return isascii(symbol) && BlankChars.contains(symbol); }

/*!
  Ensures that \a symbol is a valid ASCII character, then tests
  \e SpaceChars.contains(symbol).

  \see SpaceChars
*/
inline bool isSpace(const int symbol)
{ return isascii(symbol) && SpaceChars.contains(symbol); }

/*!
  Ensures that \a symbol is a valid ASCII character, then tests
  \e LetterChars.contains(symbol).

  \see LetterChars
*/
inline bool isLetter(const int symbol)
{ return isascii(symbol) && LetterChars.contains(symbol); }

/*!
  Ensures that \a symbol is a valid ASCII character, then tests
  \e DigitChars.contains(symbol).

  \see DigitChars
*/
inline bool isDigit(const int symbol)
{ return isascii(symbol) && DigitChars.contains(symbol); }


}


#endif /* CHECK_CHARSET_HPP */
Дальневосточный федеральный университет