botPolicies.yaml 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. ## Anubis has the ability to let you import snippets of configuration into the main
  2. ## configuration file. This allows you to break up your config into smaller parts
  3. ## that get logically assembled into one big file.
  4. ##
  5. ## Of note, a bot rule can either have inline bot configuration or import a
  6. ## bot config snippet. You cannot do both in a single bot rule.
  7. ##
  8. ## Import paths can either be prefixed with (data) to import from the common/shared
  9. ## rules in the data folder in the Anubis source tree or will point to absolute/relative
  10. ## paths in your filesystem. If you don't have access to the Anubis source tree, check
  11. ## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
  12. bots:
  13. - import: (data)/crawlers/commoncrawl.yaml
  14. # Pathological bots to deny
  15. - # This correlates to data/bots/deny-pathological.yaml in the source tree
  16. # https://github.com/TecharoHQ/anubis/blob/main/data/bots/deny-pathological.yaml
  17. import: (data)/bots/_deny-pathological.yaml
  18. - import: (data)/bots/aggressive-brazilian-scrapers.yaml
  19. # Aggressively block AI/LLM related bots/agents by default
  20. - import: (data)/meta/ai-block-aggressive.yaml
  21. # Consider replacing the aggressive AI policy with more selective policies:
  22. # - import: (data)/meta/ai-block-moderate.yaml
  23. # - import: (data)/meta/ai-block-permissive.yaml
  24. # Search engine crawlers to allow, defaults to:
  25. # - Google (so they don't try to bypass Anubis)
  26. # - Apple
  27. # - Bing
  28. # - DuckDuckGo
  29. # - Qwant
  30. # - The Internet Archive
  31. # - Kagi
  32. # - Marginalia
  33. # - Mojeek
  34. - import: (data)/crawlers/_allow-good.yaml
  35. # Challenge Firefox AI previews
  36. - import: (data)/clients/x-firefox-ai.yaml
  37. # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
  38. - import: (data)/common/keep-internet-working.yaml
  39. # # Punish any bot with "bot" in the user-agent string
  40. # # This is known to have a high false-positive rate, use at your own risk
  41. # - name: generic-bot-catchall
  42. # user_agent_regex: (?i:bot|crawler)
  43. # action: CHALLENGE
  44. # challenge:
  45. # difficulty: 16 # impossible
  46. # algorithm: slow # intentionally waste CPU cycles and time
  47. - name: rss-feed-blog
  48. action: ALLOW
  49. expression:
  50. any:
  51. - path.startsWith("/blog/atom.")
  52. - path.startsWith("/blog/rss.")
  53. # Generic catchall rule
  54. - name: base-weight
  55. expression: "true"
  56. action: WEIGH
  57. weight:
  58. adjust: 10
  59. - name: http2-client-protocol
  60. expression:
  61. all:
  62. - '"X-Http-Protocol" in headers'
  63. - headers["X-Http-Protocol"] == "HTTP/2.0"
  64. action: WEIGH
  65. weight:
  66. adjust: -5
  67. # The weight thresholds for when to trigger individual challenges. Any
  68. # CHALLENGE will take precedence over this.
  69. #
  70. # A threshold has four configuration options:
  71. #
  72. # - name: the name that is reported down the stack and used for metrics
  73. # - expression: A CEL expression with the request weight in the variable
  74. # weight
  75. # - action: the Anubis action to apply, similar to in a bot policy
  76. # - challenge: which challenge to send to the user, similar to in a bot policy
  77. #
  78. # See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
  79. # information.
  80. thresholds:
  81. # By default Anubis ships with the following thresholds:
  82. - name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
  83. expression: weight <= 0 # a feather weighs zero units
  84. action: ALLOW # Allow the traffic through
  85. # For clients that had some weight reduced through custom rules, give them a
  86. # lightweight challenge.
  87. - name: mild-suspicion
  88. expression:
  89. all:
  90. - weight > 0
  91. - weight < 10
  92. action: CHALLENGE
  93. challenge:
  94. # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
  95. algorithm: metarefresh
  96. difficulty: 1
  97. # For clients that are browser-like but have either gained points from custom rules or
  98. # report as a standard browser.
  99. - name: moderate-suspicion
  100. expression:
  101. all:
  102. - weight >= 10
  103. - weight < 20
  104. action: CHALLENGE
  105. challenge:
  106. # https://anubis.techaro.lol/docs/admin/configuration/challenges/preact
  107. #
  108. # This challenge proves the client can run a webapp written with Preact.
  109. # The preact webapp simply loads, calculates the SHA-256 checksum of the
  110. # challenge data, and forwards that to the client.
  111. algorithm: preact
  112. difficulty: 1
  113. - name: mild-proof-of-work
  114. expression:
  115. all:
  116. - weight >= 20
  117. - weight < 30
  118. action: CHALLENGE
  119. challenge:
  120. # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
  121. algorithm: fast
  122. difficulty: 2 # two leading zeros, very fast for most clients
  123. # For clients that are browser like and have gained many points from custom rules
  124. - name: extreme-suspicion
  125. expression: weight >= 30
  126. action: CHALLENGE
  127. challenge:
  128. # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
  129. algorithm: fast
  130. difficulty: 4
  131. dnsbl: false
  132. impressum:
  133. footer: |
  134. This website is hosted by Techaro. If you have any complaints or notes about the service, please contact <a href="mailto:support@techaro.lol">support@techaro.lol</a> and we will assist you as soon as possible.
  135. page:
  136. title: Privacy Policy
  137. body: |
  138. <p>Last updated: June 2025</p>
  139. <h2>Information that is gathered from visitors</h2>
  140. <p>In common with other websites, log files are stored on the web server saving details such as the visitor's IP address, browser type, referring page and time of visit.</p>
  141. <p>Cookies may be used to remember visitor preferences when interacting with the website.</p>
  142. <p>Where registration is required, the visitor's email and a username will be stored on the server.</p>
  143. <h2>How the Information is used</h2>
  144. <p>The information is used to enhance the visitor's experience when using the website to display personalised content and possibly advertising.</p>
  145. <p>E-mail addresses will not be sold, rented or leased to 3rd parties.</p>
  146. <p>E-mail may be sent to inform you of news of our services or offers by us or our affiliates.</p>
  147. <h2>Visitor Options</h2>
  148. <p>If you have subscribed to one of our services, you may unsubscribe by following the instructions which are included in e-mail that you receive.</p>
  149. <p>You may be able to block cookies via your browser settings but this may prevent you from access to certain features of the website.</p>
  150. <h2>Cookies</h2>
  151. <p>Cookies are small digital signature files that are stored by your web browser that allow your preferences to be recorded when visiting the website. Also they may be used to track your return visits to the website.</p>
  152. <p>3rd party advertising companies may also use cookies for tracking purposes.</p>
  153. <h2>Techaro Anubis</h2>
  154. <p>This website uses a service called <a href="https://anubis.techaro.lol">Anubis</a> to filter malicious traffic. Anubis requires the use of browser cookies to ensure that web clients are running conformant software. Anubis also may report the following data to Techaro to improve service quality:</p>
  155. <ul>
  156. <li>IP address (for purposes of matching against geo-location and BGP autonomous systems numbers), which is stored in-memory and not persisted to disk.</li>
  157. <li>Unique browser fingerprints (such as HTTP request fingerprints and encryption system fingerprints), which may be stored on Techaro's side for a period of up to one month.</li>
  158. <li>HTTP request metadata that may include things such as the User-Agent header and other identifiers.</li>
  159. </ul>
  160. <p>This data is processed and stored for the legitimate interest of combatting abusive web clients. This data is encrypted at rest as much as possible and is only decrypted in memory for the purposes of fulfilling requests.</p>
  161. # By default, send HTTP 200 back to clients that either get issued a challenge
  162. # or a denial. This seems weird, but this is load-bearing due to the fact that
  163. # the most aggressive scraper bots seem to really, really, want an HTTP 200 and
  164. # will stop sending requests once they get it.
  165. status_codes:
  166. CHALLENGE: 200
  167. DENY: 200
  168. store:
  169. backend: bbolt
  170. parameters:
  171. path: /xe/data/anubis/data.bdb