acts-like-browser.yaml 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # Assert behaviour that only genuine browsers display. This ensures that modern Chrome
  2. # or Firefox versions will get through without a challenge.
  3. #
  4. # These rules have been known to be bypassed by some of the worst automated scrapers.
  5. # Use at your own risk.
  6. - name: realistic-browser-catchall
  7. expression:
  8. all:
  9. - '"User-Agent" in headers'
  10. - '( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )'
  11. - '"Accept" in headers'
  12. - '"Sec-Fetch-Dest" in headers'
  13. - '"Sec-Fetch-Mode" in headers'
  14. - '"Sec-Fetch-Site" in headers'
  15. - '"Accept-Encoding" in headers'
  16. - '( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )'
  17. - '"Accept-Language" in headers'
  18. action: WEIGH
  19. weight:
  20. adjust: -10
  21. # The Upgrade-Insecure-Requests header is typically sent by browsers, but not always
  22. - name: upgrade-insecure-requests
  23. expression: '"Upgrade-Insecure-Requests" in headers'
  24. action: WEIGH
  25. weight:
  26. adjust: -2
  27. # Chrome should behave like Chrome
  28. - name: chrome-is-proper
  29. expression:
  30. all:
  31. - userAgent.contains("Chrome")
  32. - '"Sec-Ch-Ua" in headers'
  33. - 'headers["Sec-Ch-Ua"].contains("Chromium")'
  34. - '"Sec-Ch-Ua-Mobile" in headers'
  35. - '"Sec-Ch-Ua-Platform" in headers'
  36. action: WEIGH
  37. weight:
  38. adjust: -5
  39. - name: should-have-accept
  40. expression: '!("Accept" in headers)'
  41. action: WEIGH
  42. weight:
  43. adjust: 5
  44. # Generic catchall rule
  45. - name: generic-browser
  46. user_agent_regex: >-
  47. Mozilla|Opera
  48. action: WEIGH
  49. weight:
  50. adjust: 10