botPolicies.yaml 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. ## Anubis has the ability to let you import snippets of configuration into the main
  2. ## configuration file. This allows you to break up your config into smaller parts
  3. ## that get logically assembled into one big file.
  4. ##
  5. ## Of note, a bot rule can either have inline bot configuration or import a
  6. ## bot config snippet. You cannot do both in a single bot rule.
  7. ##
  8. ## Import paths can either be prefixed with (data) to import from the common/shared
  9. ## rules in the data folder in the Anubis source tree or will point to absolute/relative
  10. ## paths in your filesystem. If you don't have access to the Anubis source tree, check
  11. ## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
  12. bots:
  13. # You can import the entire default config with this macro:
  14. # - import: (data)/meta/default-config.yaml
  15. # Pathological bots to deny
  16. - # This correlates to data/bots/_deny-pathological.yaml in the source tree
  17. # https://github.com/TecharoHQ/anubis/blob/main/data/bots/_deny-pathological.yaml
  18. import: (data)/bots/_deny-pathological.yaml
  19. - import: (data)/bots/aggressive-brazilian-scrapers.yaml
  20. # Aggressively block AI/LLM related bots/agents by default
  21. - import: (data)/meta/ai-block-aggressive.yaml
  22. # Consider replacing the aggressive AI policy with more selective policies:
  23. # - import: (data)/meta/ai-block-moderate.yaml
  24. # - import: (data)/meta/ai-block-permissive.yaml
  25. # Search engine crawlers to allow, defaults to:
  26. # - Google (so they don't try to bypass Anubis)
  27. # - Apple
  28. # - Bing
  29. # - DuckDuckGo
  30. # - Qwant
  31. # - The Internet Archive
  32. # - Kagi
  33. # - Marginalia
  34. # - Mojeek
  35. - import: (data)/crawlers/_allow-good.yaml
  36. # Challenge Firefox AI previews
  37. - import: (data)/clients/x-firefox-ai.yaml
  38. # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
  39. - import: (data)/common/keep-internet-working.yaml
  40. # # Punish any bot with "bot" in the user-agent string
  41. # # This is known to have a high false-positive rate, use at your own risk
  42. # - name: generic-bot-catchall
  43. # user_agent_regex: (?i:bot|crawler)
  44. # action: CHALLENGE
  45. # challenge:
  46. # difficulty: 16 # impossible
  47. # algorithm: slow # intentionally waste CPU cycles and time
  48. # Requires a subscription to Thoth to use, see
  49. # https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
  50. - name: countries-with-aggressive-scrapers
  51. action: WEIGH
  52. geoip:
  53. countries:
  54. - BR
  55. - CN
  56. weight:
  57. adjust: 10
  58. # Requires a subscription to Thoth to use, see
  59. # https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
  60. - name: aggressive-asns-without-functional-abuse-contact
  61. action: WEIGH
  62. asns:
  63. match:
  64. - 13335 # Cloudflare
  65. - 136907 # Huawei Cloud
  66. - 45102 # Alibaba Cloud
  67. weight:
  68. adjust: 10
  69. # ## System load based checks.
  70. # # If the system is under high load, add weight.
  71. # - name: high-load-average
  72. # action: WEIGH
  73. # expression: load_1m >= 10.0 # make sure to end the load comparison in a .0
  74. # weight:
  75. # adjust: 20
  76. ## If your backend service is running on the same operating system as Anubis,
  77. ## you can uncomment this rule to make the challenge easier when the system is
  78. ## under low load.
  79. ##
  80. ## If it is not, remove weight.
  81. # - name: low-load-average
  82. # action: WEIGH
  83. # expression: load_15m <= 4.0 # make sure to end the load comparison in a .0
  84. # weight:
  85. # adjust: -10
  86. # Generic catchall rule
  87. - name: generic-browser
  88. user_agent_regex: >-
  89. Mozilla|Opera
  90. action: WEIGH
  91. weight:
  92. adjust: 10
  93. dnsbl: false
  94. # #
  95. # impressum:
  96. # # Displayed at the bottom of every page rendered by Anubis.
  97. # footer: >-
  98. # This website is hosted by Zombocom. If you have any complaints or notes
  99. # about the service, please contact
  100. # <a href="mailto:contact@domainhere.example">contact@domainhere.example</a>
  101. # and we will assist you as soon as possible.
  102. # # The imprint page that will be linked to at the footer of every Anubis page.
  103. # page:
  104. # # The HTML <title> of the page
  105. # title: Imprint and Privacy Policy
  106. # # The HTML contents of the page. The exact contents of this page can
  107. # # and will vary by locale. Please consult with a lawyer if you are not
  108. # # sure what to put here
  109. # body: >-
  110. # <p>Last updated: June 2025</p>
  111. # <h2>Information that is gathered from visitors</h2>
  112. # <p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
  113. # <p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
  114. # <p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
  115. # <!-- ... -->
  116. # Open Graph passthrough configuration, see here for more information:
  117. # https://anubis.techaro.lol/docs/admin/configuration/open-graph/
  118. openGraph:
  119. # Enables Open Graph passthrough
  120. enabled: false
  121. # Enables the use of the HTTP host in the cache key, this enables
  122. # caching metadata for multiple http hosts at once.
  123. considerHost: false
  124. # How long cached OpenGraph metadata should last in memory
  125. ttl: 24h
  126. # # If set, return these opengraph values instead of looking them up with
  127. # # the target service.
  128. # #
  129. # # Correlates to properties in https://ogp.me/
  130. # override:
  131. # # og:title is required, it is the title of the website
  132. # "og:title": "Techaro Anubis"
  133. # "og:description": >-
  134. # Anubis is a Web AI Firewall Utility that helps you fight the bots
  135. # away so that you can maintain uptime at work!
  136. # "description": >-
  137. # Anubis is a Web AI Firewall Utility that helps you fight the bots
  138. # away so that you can maintain uptime at work!
  139. # By default, send HTTP 200 back to clients that either get issued a challenge
  140. # or a denial. This seems weird, but this is load-bearing due to the fact that
  141. # the most aggressive scraper bots seem to really, really, want an HTTP 200 and
  142. # will stop sending requests once they get it.
  143. status_codes:
  144. CHALLENGE: 200
  145. DENY: 200
  146. # Anubis can store temporary data in one of a few backends. See the storage
  147. # backends section of the docs for more information:
  148. #
  149. # https://anubis.techaro.lol/docs/admin/policies#storage-backends
  150. store:
  151. backend: memory
  152. parameters: {}
  153. # The weight thresholds for when to trigger individual challenges. Any
  154. # CHALLENGE will take precedence over this.
  155. #
  156. # A threshold has four configuration options:
  157. #
  158. # - name: the name that is reported down the stack and used for metrics
  159. # - expression: A CEL expression with the request weight in the variable
  160. # weight
  161. # - action: the Anubis action to apply, similar to in a bot policy
  162. # - challenge: which challenge to send to the user, similar to in a bot policy
  163. #
  164. # See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
  165. # information.
  166. thresholds:
  167. # By default Anubis ships with the following thresholds:
  168. - name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
  169. expression: weight <= 0 # a feather weighs zero units
  170. action: ALLOW # Allow the traffic through
  171. # For clients that had some weight reduced through custom rules, give them a
  172. # lightweight challenge.
  173. - name: mild-suspicion
  174. expression:
  175. all:
  176. - weight > 0
  177. - weight < 10
  178. action: CHALLENGE
  179. challenge:
  180. # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
  181. algorithm: metarefresh
  182. difficulty: 1
  183. # For clients that are browser-like but have either gained points from custom rules or
  184. # report as a standard browser.
  185. - name: moderate-suspicion
  186. expression:
  187. all:
  188. - weight >= 10
  189. - weight < 20
  190. action: CHALLENGE
  191. challenge:
  192. # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
  193. algorithm: fast
  194. difficulty: 2 # two leading zeros, very fast for most clients
  195. - name: mild-proof-of-work
  196. expression:
  197. all:
  198. - weight >= 20
  199. - weight < 30
  200. action: CHALLENGE
  201. challenge:
  202. # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
  203. algorithm: fast
  204. difficulty: 4
  205. # For clients that are browser like and have gained many points from custom rules
  206. - name: extreme-suspicion
  207. expression: weight >= 30
  208. action: CHALLENGE
  209. challenge:
  210. # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
  211. algorithm: fast
  212. difficulty: 6