| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- # Assert behaviour that only genuine browsers display. This ensures that modern Chrome
- # or Firefox versions will get through without a challenge.
- #
- # These rules have been known to be bypassed by some of the worst automated scrapers.
- # Use at your own risk.
- - name: realistic-browser-catchall
- expression:
- all:
- - '"User-Agent" in headers'
- - '( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )'
- - '"Accept" in headers'
- - '"Sec-Fetch-Dest" in headers'
- - '"Sec-Fetch-Mode" in headers'
- - '"Sec-Fetch-Site" in headers'
- - '"Accept-Encoding" in headers'
- - '( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )'
- - '"Accept-Language" in headers'
- action: WEIGH
- weight:
- adjust: -10
- # The Upgrade-Insecure-Requests header is typically sent by browsers, but not always
- - name: upgrade-insecure-requests
- expression: '"Upgrade-Insecure-Requests" in headers'
- action: WEIGH
- weight:
- adjust: -2
- # Chrome should behave like Chrome
- - name: chrome-is-proper
- expression:
- all:
- - userAgent.contains("Chrome")
- - '"Sec-Ch-Ua" in headers'
- - 'headers["Sec-Ch-Ua"].contains("Chromium")'
- - '"Sec-Ch-Ua-Mobile" in headers'
- - '"Sec-Ch-Ua-Platform" in headers'
- action: WEIGH
- weight:
- adjust: -5
- - name: should-have-accept
- expression: '!("Accept" in headers)'
- action: WEIGH
- weight:
- adjust: 5
- # Generic catchall rule
- - name: generic-browser
- user_agent_regex: >-
- Mozilla|Opera
- action: WEIGH
- weight:
- adjust: 10
|