| From | Sent On | Attachments |
|---|---|---|
| Nobuyoshi Nakada | Mar 11, 2008 1:32 am | |
| Yukihiro Matsumoto | Mar 11, 2008 9:00 am | |
| NARUSE, Yui | Mar 11, 2008 11:34 am | |
| U.Nakamura | Mar 11, 2008 5:34 pm | |
| NARUSE, Yui | Mar 12, 2008 3:49 am | .patch |
| Nobuyoshi Nakada | Mar 12, 2008 7:04 am | |
| Yukihiro Matsumoto | Mar 12, 2008 8:26 am | |
| pegacorn | Mar 13, 2008 6:44 am | |
| NARUSE, Yui | Mar 13, 2008 10:08 am |
| Subject: | [ruby-dev:34027] Re: MurmurHash problem | |
|---|---|---|
| From: | Nobuyoshi Nakada (no...@ruby-lang.org) | |
| Date: | Mar 12, 2008 7:04:15 am | |
| List: | org.ruby-lang.ruby-dev | |
なかだです。
At Wed, 12 Mar 2008 19:50:18 +0900, NARUSE, Yui wrote in [ruby-dev:34026]:
というわけで、int32_t, uint32_t, intptr_t, uintptr_t を定義しつつ、 hash() をアライメント考慮させるパッチです。
inttypes.hをチェックしてないような気がします。
あとBig endianになるとやや遅くなりそうなので、一応作っておいたの も出してみます。
Index: configure.in
===================================================================
--- configure.in (revision 15758)
+++ configure.in (working copy)
@@ -583,5 +583,5 @@ AC_CHECK_HEADERS(stdlib.h string.h unist
syscall.h pwd.h grp.h a.out.h utime.h memory.h direct.h sys/resource.h \
sys/mkdev.h sys/utime.h xti.h netinet/in_systm.h float.h ieeefp.h pthread.h
\
- ucontext.h intrinsics.h langinfo.h locale.h)
+ ucontext.h intrinsics.h langinfo.h locale.h stdint.h)
dnl Check additional types. @@ -621,4 +621,16 @@ AC_CHECK_TYPES(struct timespec) AC_CHECK_TYPE(fd_mask, [AC_DEFINE(HAVE_RB_FD_INIT, 1)])
+test ${rb_cv_type_uint32_t+set} && ac_cv_type_uint32_t=yes +AC_CHECK_TYPE(uint32_t) +if test ${ac_cv_type_uint32_t} != yes; then + AC_CACHE_CHECK([unsigned 32bit int], + rb_cv_type_uint32_t, + [for type in short int long; do + AC_COMPILE_IFELSE(AC_LANG_BOOL_COMPILE_TRY([], [sizeof($type) == 4]), + [rb_cv_type_uint32_t=$type; break], []) + done]) + AC_DEFINE(uint32_t, $rb_cv_type_uint32_t) +fi + AC_CACHE_CHECK(for stack end address, rb_cv_stack_end_address, [rb_cv_stack_end_address=no Index: string.c =================================================================== --- string.c (revision 15758) +++ string.c (working copy) @@ -26,4 +26,8 @@ #endif
+#if HAVE_STDINT_H +#include <stdint.h> +#endif + VALUE rb_cString; VALUE rb_cSymbol; @@ -1686,4 +1690,11 @@ rb_str_concat(VALUE str1, VALUE str2) }
+#if defined __i386__ || defined _M_IX86 || defined __ia64 +#define UNALIGNED_WORD_ACCESS 1 +#endif +#ifndef UNALIGNED_WORD_ACCESS +#define UNALIGNED_WORD_ACCESS 0 +#endif + /* MurmurHash described in http://murmurhash.googlepages.com/ */ unsigned int @@ -1695,14 +1706,93 @@ hash(const unsigned char * data, int len h += 0xdeadbeef;
- while(len >= 4) { - h += *(unsigned int *)data; - h *= m; - h ^= h >> r; + if (len >= 4) { +#if !UNALIGNED_WORD_ACCESS + int align = (VALUE)data & 3; + if (align) { + uint32_t t = 0, d = 0; + int sl, sr, pack; + + switch (align) { +#ifdef WORDS_BIGENDIAN + case 1: t |= data[2]; + case 2: t |= data[1] << 8; + case 3: t |= data[0] << 16; +#else + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; +#endif + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + sl = 8 * (4-align); + sr = 8 * align; + + while (len >= 4) { + d = *(uint32_t *)data; +#ifdef WORDS_BIGENDIAN + t = (t << sr) | (d >> sl); +#else + t = (t >> sr) | (d << sl); +#endif + h += t; + h *= m; + h ^= h >> r; + t = d; + + data += 4; + len -= 4; + } + + pack = len < align ? len : align; + d = 0; + switch (pack) { +#ifdef WORDS_BIGENDIAN + case 3: d |= data[2]; + case 2: d |= data[1] << 8; + case 1: d |= data[0] << 16; + case 0: + h += (t << sr) | (d >> sl); +#else + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: + h += (t >> sr) | (d << sl); +#endif + h *= m; + h ^= h >> r; + }
- data += 4; - len -= 4; + data += pack; + len -= pack; + } + else +#endif + { + do { + h += *(uint32_t *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } while (len >= 4); + } }
switch(len) { +#ifdef WORDS_BIGENDIAN + case 3: + h += data[2]; + case 2: + h += data[1] << 8; + case 1: + h += data[0] << 16; +#else case 3: h += data[2] << 16; @@ -1711,4 +1801,5 @@ hash(const unsigned char * data, int len case 1: h += data[0]; +#endif h *= m; h ^= h >> r;
-- --- 僕の前にBugはない。 --- 僕の後ろにBugはできる。 中田 伸悦






.patch