我只是好奇,因为我想它将对性能产生影响。是否考虑完整字符串?如果是,则在长字符串上会变慢。如果仅考虑字符串的一部分,则会导致性能下降(例如,如果仅考虑字符串的开头,则当HashSet主要包含相同的字符串时,它将导致性能下降。
遇到类似问题时,请确保获取参考源源代码。除了从反编译器中可以看到的内容之外,还有很多其他功能。选择与您的首选.NET目标相匹配的目标,该方法在版本之间已发生了很大变化。我将在此处重现它的.NET 4.5版本,该版本是从Source.NET 4.5 \ 4.6.0.0 \ net \ clr \ src \ BCL \ System \ String.cs \ 604718 \ String.cs检索的
public override int GetHashCode() { #if FEATURE_RANDOMIZED_STRING_HASHING if(HashHelpers.s_UseRandomizedStringHashing) { return InternalMarvin32HashString(this, this.Length, 0); } #endif // FEATURE_RANDOMIZED_STRING_HASHING unsafe { fixed (char *src = this) { Contract.Assert(src[this.Length] == '\0', "src[this.Length] == '\\0'"); Contract.Assert( ((int)src)%4 == 0, "Managed string should start at 4 bytes boundary"); #if WIN32 int hash1 = (5381<<16) + 5381; #else int hash1 = 5381; #endif int hash2 = hash1; #if WIN32 // 32 bit machines. int* pint = (int *)src; int len = this.Length; while (len > 2) { hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0]; hash2 = ((hash2 << 5) + hash2 + (hash2 >> 27)) ^ pint[1]; pint += 2; len -= 4; } if (len > 0) { hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0]; } #else int c; char *s = src; while ((c = s[0]) != 0) { hash1 = ((hash1 << 5) + hash1) ^ c; c = s[1]; if (c == 0) break; hash2 = ((hash2 << 5) + hash2) ^ c; s += 2; } #endif #if DEBUG // We want to ensure we can change our hash function daily. // This is perfectly fine as long as you don't persist the // value from GetHashCode to disk or count on String A // hashing before string B. Those are bugs in your code. hash1 ^= ThisAssembly.DailyBuildNumber; #endif return hash1 + (hash2 * 1566083941); } } }
这可能超出了您的讨价还价,我将对代码进行一些注释: