popcount

32/64bit用の popcount をいくつか作って、PowerMac G5 1.8GHzで速度を測定してみました。

使用したアルゴリズムは以下の4つ。 (_32は32bit用、_64は64bit用、_32x2は64bit用だけど_32のコードを使う)

  • Sequential Shifting (SS)
    static
    int
    _sequential_shifting_32(uint32_t number) {
      int counter = 0;
      while(number != 0) {
        counter += number & 1;
        number >>= 1;
      }
      return counter;
    }
    static
    int
    _sequential_shifting_32x2(uint64_t number) {
      return
        _sequential_shifting_32(number & 0xffffffff) +
        _sequential_shifting_32(number >> 32);
    }
    
    static
    int
    _sequential_shifting_64(uint64_t number) {
      int counter = 0;
      while(number != 0) {
        counter += number & 1;
        number >>= 1;
      }
      return counter;
    }
    
  • Arithmetic Logic Counting (AL)
    static
    int
    _arithmetic_logic_counting_32(uint32_t number) {
      int counter = 0;
      while(number != 0) {
        counter += 1;
        number &= number - 1;
      }
      return counter;
    }
    
    static
    int
    _arithmetic_logic_counting_32x2(uint64_t number) {
      return
        _arithmetic_logic_counting_32(number & 0xffffffff) +
        _arithmetic_logic_counting_32(number >> 32);
    }
    
    static
    int
    _arithmetic_logic_counting_64(uint64_t number) {
      int counter = 0;
      while(number != 0) {
        counter += 1;
        number &= number - 1;
      }
      return counter;
    }
    
  • Emulated Popcount (Em. Popcount)
    static
    int
    _emulated_popcount_32(uint32_t number) {
      /* type 4 */
      number = (number & 0x55555555) + ((number >>  1) & 0x55555555);
      number = (number & 0x33333333) + ((number >>  2) & 0x33333333);
      number = (number + (number >>  4)) & 0x0f0f0f0f;
      number =  number + (number >>  8);
      number =  number + (number >> 16);
      return (int)(number & 0x3f);
    }
    
    static
    int
    _emulated_popcount_32x2(uint64_t number) {
      return
        _emulated_popcount_32(number & 0xffffffff) +
        _emulated_popcount_32(number >> 32);
    }
    
    static
    int
    _emulated_popcount_64(uint64_t number) {
      /* type 4 */
      number = (number & 0x5555555555555555ULL) +
               ((number >>  1) & 0x5555555555555555ULL);
      number = (number & 0x3333333333333333ULL) +
               ((number >>  2) & 0x3333333333333333ULL);
      number = (number + (number >>  4)) & 0x0f0f0f0f0f0f0f0fULL;
      number =  number + (number >>  8);
      number =  number + (number >> 16);
      number =  number + (number >> 32);
      return (int)(number & 0x7f);
    }
    
  • Lookup Table (Lookup)
    static
    const int
    _popcount_table[256] = {
      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
    };
    
    static
    int
    _lookup_table_32(uint32_t number) {
      int counter = 0;
      counter += _popcount_table[ number        & 0xff];
      counter += _popcount_table[(number >>  8) & 0xff];
      counter += _popcount_table[(number >> 16) & 0xff];
      counter += _popcount_table[(number >> 24) & 0xff];
      return counter;
    }
    
    static
    int
    _lookup_table_32x2(uint64_t number) {
      return
        _lookup_table_32(number & 0xffffffff) +
        _lookup_table_32(number >> 32);
    }
    
    static
    int
    _lookup_table_64(uint64_t number) {
      int counter = 0;
      counter += _popcount_table[ number        & 0xff];
      counter += _popcount_table[(number >>  8) & 0xff];
      counter += _popcount_table[(number >> 16) & 0xff];
      counter += _popcount_table[(number >> 24) & 0xff];
      counter += _popcount_table[(number >> 32) & 0xff];
      counter += _popcount_table[(number >> 40) & 0xff];
      counter += _popcount_table[(number >> 48) & 0xff];
      counter += _popcount_table[(number >> 56) & 0xff];
      return counter;
    }
    

測定方法とかはソースコードを見てくだされ。popcount_test.c

gcc (4.1.2) のコンパイルオプション '-O3 -m32' での結果

コンパイルオプション '-O3 -m64' での結果

まあ見ての通り、Lookup が速いという結果になりました。ちなみに、Lookup で最も良かったのは、 -m64 でコンパイルした 64bit用のものです。

| | コメント (0) | トラックバック (0)