| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- package internal
- import (
- "fmt"
- "strings"
- "testing"
- )
- // XXHash64sum is a test alias for FastHash to benchmark against SHA256
- func XXHash64sum(text string) string {
- return FastHash(text)
- }
- // Test data that matches real usage patterns in the codebase
- var (
- // Typical policy checker inputs
- policyInputs = []string{
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
- "User-Agent: bot/1.0",
- "User-Agent: GoogleBot/2.1",
- "/robots.txt",
- "/api/.*",
- "10.0.0.0/8",
- "192.168.1.0/24",
- "172.16.0.0/12",
- }
- // Challenge data from challengeFor function
- challengeInputs = []string{
- "Accept-Language=en-US,X-Real-IP=192.168.1.100,User-Agent=Mozilla/5.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=abc123,Difficulty=5",
- "Accept-Language=fr-FR,X-Real-IP=10.0.0.50,User-Agent=Chrome/91.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=def456,Difficulty=3",
- "Accept-Language=es-ES,X-Real-IP=172.16.1.1,User-Agent=Safari/14.0,WeekTime=2025-06-16T00:00:00Z,Fingerprint=ghi789,Difficulty=7",
- }
- // Bot rule patterns
- botRuleInputs = []string{
- "GoogleBot::path:/robots.txt",
- "BingBot::useragent:Mozilla/5.0 (compatible; bingbot/2.0)",
- "FacebookBot::headers:Accept-Language,User-Agent",
- "TwitterBot::cidr:192.168.1.0/24",
- }
- // CEL expressions from policy rules
- celInputs = []string{
- `request.headers["User-Agent"].contains("bot")`,
- `request.path.startsWith("/api/") && request.method == "POST"`,
- `request.remoteAddress in ["192.168.1.0/24", "10.0.0.0/8"]`,
- `request.userAgent.matches(".*[Bb]ot.*") || request.userAgent.matches(".*[Cc]rawler.*")`,
- }
- // Thoth ASN checker inputs
- asnInputs = []string{
- "ASNChecker\nAS 15169\nAS 8075\nAS 32934",
- "ASNChecker\nAS 13335\nAS 16509\nAS 14061",
- "ASNChecker\nAS 36351\nAS 20940\nAS 8100",
- }
- )
- func BenchmarkSHA256_PolicyInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := policyInputs[i%len(policyInputs)]
- _ = SHA256sum(input)
- }
- }
- func BenchmarkXXHash_PolicyInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := policyInputs[i%len(policyInputs)]
- _ = XXHash64sum(input)
- }
- }
- func BenchmarkSHA256_ChallengeInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := challengeInputs[i%len(challengeInputs)]
- _ = SHA256sum(input)
- }
- }
- func BenchmarkXXHash_ChallengeInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := challengeInputs[i%len(challengeInputs)]
- _ = XXHash64sum(input)
- }
- }
- func BenchmarkSHA256_BotRuleInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := botRuleInputs[i%len(botRuleInputs)]
- _ = SHA256sum(input)
- }
- }
- func BenchmarkXXHash_BotRuleInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := botRuleInputs[i%len(botRuleInputs)]
- _ = XXHash64sum(input)
- }
- }
- func BenchmarkSHA256_CELInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := celInputs[i%len(celInputs)]
- _ = SHA256sum(input)
- }
- }
- func BenchmarkXXHash_CELInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := celInputs[i%len(celInputs)]
- _ = XXHash64sum(input)
- }
- }
- func BenchmarkSHA256_ASNInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := asnInputs[i%len(asnInputs)]
- _ = SHA256sum(input)
- }
- }
- func BenchmarkXXHash_ASNInputs(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- input := asnInputs[i%len(asnInputs)]
- _ = XXHash64sum(input)
- }
- }
- // Benchmark the policy list hashing used in checker.go
- func BenchmarkSHA256_PolicyList(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- var sb strings.Builder
- for _, input := range policyInputs {
- fmt.Fprintln(&sb, SHA256sum(input))
- }
- _ = SHA256sum(sb.String())
- }
- }
- func BenchmarkXXHash_PolicyList(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- var sb strings.Builder
- for _, input := range policyInputs {
- fmt.Fprintln(&sb, XXHash64sum(input))
- }
- _ = XXHash64sum(sb.String())
- }
- }
- // Tests that xxhash doesn't have collisions in realistic scenarios
- func TestHashCollisions(t *testing.T) {
- allInputs := append(append(append(append(policyInputs, challengeInputs...), botRuleInputs...), celInputs...), asnInputs...)
- // Start with realistic inputs from actual usage
- xxhashHashes := make(map[string]string)
- for _, input := range allInputs {
- hash := XXHash64sum(input)
- if existing, exists := xxhashHashes[hash]; exists {
- t.Errorf("XXHash collision detected: %q and %q both hash to %s", input, existing, hash)
- }
- xxhashHashes[hash] = input
- }
- t.Logf("Basic test: %d realistic inputs, no collisions", len(allInputs))
- // Test similar strings that might cause hash collisions
- prefixes := []string{"User-Agent: ", "X-Real-IP: ", "Accept-Language: ", "Host: "}
- suffixes := []string{"bot", "crawler", "spider", "scraper", "Mozilla", "Chrome", "Safari", "Firefox"}
- variations := []string{"", "/1.0", "/2.0", " (compatible)", " (Windows)", " (Linux)", " (Mac)"}
- stressCount := 0
- for _, prefix := range prefixes {
- for _, suffix := range suffixes {
- for _, variation := range variations {
- for i := 0; i < 100; i++ {
- input := fmt.Sprintf("%s%s%s-%d", prefix, suffix, variation, i)
- hash := XXHash64sum(input)
- if existing, exists := xxhashHashes[hash]; exists {
- t.Errorf("XXHash collision in stress test: %q and %q both hash to %s", input, existing, hash)
- }
- xxhashHashes[hash] = input
- stressCount++
- }
- }
- }
- }
- t.Logf("Stress test 1: %d similar string variations, no collisions", stressCount)
- // Test sequential patterns that might be problematic
- patterns := []string{
- "192.168.1.%d",
- "10.0.0.%d",
- "172.16.%d.1",
- "challenge-%d",
- "bot-rule-%d",
- "policy-%016x",
- "session-%016x",
- }
- seqCount := 0
- for _, pattern := range patterns {
- for i := 0; i < 10000; i++ {
- input := fmt.Sprintf(pattern, i)
- hash := XXHash64sum(input)
- if existing, exists := xxhashHashes[hash]; exists {
- t.Errorf("XXHash collision in sequential test: %q and %q both hash to %s", input, existing, hash)
- }
- xxhashHashes[hash] = input
- seqCount++
- }
- }
- t.Logf("Stress test 2: %d sequential patterns, no collisions", seqCount)
- totalInputs := len(allInputs) + stressCount + seqCount
- t.Logf("TOTAL: Tested %d inputs across realistic scenarios - NO COLLISIONS", totalInputs)
- }
- // Verify xxhash output works as cache keys
- func TestXXHashFormat(t *testing.T) {
- testCases := []string{
- "short",
- "",
- "very long string with lots of content that might be used in policy checking and other internal hashing scenarios",
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
- }
- for _, input := range testCases {
- hash := XXHash64sum(input)
- // Check it's valid hex
- if len(hash) == 0 {
- t.Errorf("Empty hash for input %q", input)
- }
- // xxhash is 64-bit so max 16 hex chars
- if len(hash) > 16 {
- t.Errorf("Hash too long for input %q: %s (length %d)", input, hash, len(hash))
- }
- // Make sure it's all hex characters
- for _, char := range hash {
- if !((char >= '0' && char <= '9') || (char >= 'a' && char <= 'f')) {
- t.Errorf("Non-hex character %c in hash %s for input %q", char, hash, input)
- }
- }
- t.Logf("Input: %q -> Hash: %s", input, hash)
- }
- }
|