t-suw****@users*****
t-suw****@users*****
2007年 9月 12日 (水) 22:45:16 JST
Index: AquaSKK/src/utility/utf8util.h diff -u /dev/null AquaSKK/src/utility/utf8util.h:1.1.2.1 --- /dev/null Wed Sep 12 22:45:16 2007 +++ AquaSKK/src/utility/utf8util.h Wed Sep 12 22:45:16 2007 @@ -0,0 +1,181 @@ +/* -*- C++ -*- + * + * utf8util.h - UTF-8 utilities + * + * Copyright (c) 2007 Tomotaka SUWA, All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the authors nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef INC__utf8util__ +#define INC__utf8util__ + +#include <string> + +// UTF8 ã¤ãã¬ã¼ã¿ +template <typename Iterator> +class utf8iterator { + Iterator curr_; + + unsigned size() const { + static unsigned table[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 // 0xf0 + }; + + return table[(unsigned char)*curr_]; + } + + bool leadbyte() const { return size() != 0; } + + utf8iterator& next(int count = 1) { + for(int i = 0; i < count; ++ i) { + for(++ curr_; !leadbyte(); ++ curr_) {} + } + return *this; + } + + utf8iterator& prev(int count = 1) { + for(int i = 0; i < count; ++ i) { + for(-- curr_; !leadbyte(); -- curr_) {} + } + return *this; + } + +public: + utf8iterator() : curr_(0) {} + utf8iterator(Iterator iter) : curr_(iter) {} + utf8iterator& operator=(Iterator iter) { + curr_ = iter; + return *this; + } + + Iterator iterator() { return curr_; } + unsigned charsize() const { return size(); } + + std::string operator*() const { + return std::string(curr_, curr_ + size()); + } + + utf8iterator& operator++() { return next(); } + utf8iterator operator++(int) { + utf8iterator iter(*this); + ++ curr_; + return iter; + } + + utf8iterator& operator--() { return prev(); } + utf8iterator operator--(int) { + utf8iterator iter(*this); + -- curr_; + return iter; + } + + utf8iterator& operator+=(int count) { return 0 < count ? next(count) : prev(abs(count)); } + utf8iterator& operator-=(int count) { return 0 < count ? prev(count) : next(abs(count)); } + + friend bool operator==(const utf8iterator& lhs, const utf8iterator& rhs) { return lhs.curr_ == rhs.curr_; } + friend bool operator!=(const utf8iterator& lhs, const utf8iterator& rhs) { return lhs.curr_ != rhs.curr_; } + friend bool operator<(const utf8iterator& lhs, const utf8iterator& rhs) { return lhs.curr_ < rhs.curr_; } +}; + +template <typename Iterator> +int operator-(utf8iterator<Iterator> end, utf8iterator<Iterator> beg) { + int size = 0; + for(utf8iterator<Iterator> cur = beg; beg != end; ++ beg) { ++ size; } + return size; +} + +template <typename Iterator> +utf8iterator<Iterator> operator+(utf8iterator<Iterator> beg, int count) { return beg += count; } +template <typename Iterator> +utf8iterator<Iterator> operator-(utf8iterator<Iterator> beg, int count) { return beg -= count; } + +// åå§çãªæç¶ã +namespace utf8 { + typedef utf8iterator<std::string::iterator> iterator; + typedef utf8iterator<std::string::const_iterator> const_iterator; + + // UTF8 æååé·åå¾ + unsigned length(const std::string& str) { + return const_iterator(str.end()) - const_iterator(str.begin()); + } + + // æå®ä½ç½®ã«æååæ¿å ¥ + // + // ä¾ï¼ + // std::string str = "æåå"; + // utf8::push(str, "a", -3); // "aæåå"; + // utf8::push(str, "b"); // "aæååb"; + // + void push(std::string& target, const std::string& str, int offset = 0) { + if(0 <= offset || target.empty()) { + target += str; + } else { + iterator end(target.end()); + iterator pos(end + offset); + + if(pos.iterator() < target.begin()) { + pos = target.begin(); + } + target.insert(pos.iterator() - target.begin(), str); + } + } + + // æå®ä½ç½®ããä¸æåå¾éãã¦åé¤ + // + // ä¾ï¼ + // std::string str = "æåå"; + // utf8::pop(str); // "æå"; + // utf8::pop(str, -1); // "å"; + // + void pop(std::string& target, int offset = 0) { + iterator end(target.end()); + iterator pos(end + (0 < offset ? 0 : offset) - 1); + + if(!(pos.iterator() < target.begin())) { + target.erase(pos.iterator() - target.begin(), pos.charsize()); + } + } +}; + +#endif