Home Events! Entrance Everyone Wiki Search Login Register

Welcome, Guest. Please login or register. - Thinking of joining the forum??
December 03, 2024 - @764.57 (what is this?)
Forum activity rating: Two Stars Posts: 24/1k.beats Unread Topics | Unread Replies | My Stuff | Random Topic | Recent Posts    Start New Topic
News: :4u: ~~~~~~~~~~~  :4u:

+  MelonLand Forum
|-+  World Wild Web
| |-+  ♺ ∙ Web Crafting Materials
| | |-+  reject.js: a bit of JavaScript to reject bad referrers and bots


« previous next »
Pages: [1] Print
Author Topic: reject.js: a bit of JavaScript to reject bad referrers and bots  (Read 781 times)
starbreaker
Sr. Member ⚓︎
****


What good is Heaven if we dare not storm it?

⛺︎ My Room
SpaceHey: Friend Me!

View Profile WWW

First 1000 Members!G4 Club Member!Joined 2023!
« on: October 17, 2023 @873.53 »

I recently wrote a bit of JavaScript that checks referrer and user agent and redirects visitors to an arbitrary destination. It can also, at the operator's discretion, display a message in a pop-up after redirecting.

It isn't a substitute for using robots.txt or rejecting bad referrers in .htaccess or httpd.conf, but if you're on Neocities you probably don't have access to the web server's configuation. Likewise, if a site doesn't provide a referrer header or the visitor's browser doesn't implement JavaScript, this won't work.

If your user has a JS-capable browser but isn't using modern JS, this script might not work either.

It's probably little more than a bit of trolling that might catch some people who aren't as tech-savvy as they think they are.

Nevertheless, you're welcome to it if you want it.

Code
/*
    DDOS-as-a-service rejection, with assistance from 💕 Devastatia 💕
    no license; do what thou wilt shall be the whole of the law
*/

window.addEventListener("load", testReferrerAndUserAgent);

function testReferrerAndUserAgent() {
    var rejectDestination = "about:blank";
    var rejectMessage = "You will find scant welcome on MY website.";
    var showRejectMessage = true;
    var referrers = [
        "facebook.com",
        "twitter.com",
        "news.ycombinator.com",
        "threads.net",
        "[auto redacted].org",
        "reddit.com"
    ];
    var userAgents = [
        "360spider",
        "80legs",
        "80legs.com",
        "abonti",
        "aboundex",
        "acoonbot",
        "acunetix",
        "adbeat_bot",
        "addthis.com",
        "adidxbot",
        "admantx",
        "ahrefs",
        "ahrefsbot",
        "aibot",
        "aiohttp",
        "alexibot",
        "alligator",
        "allsubmitter",
        "angloinfo",
        "antelope",
        "apexoo",
        "asterias",
        "attach",
        "backdoorbot",
        "backstreet",
        "backweb",
        "badass",
        "baid",
        "baiduspider",
        "bandit",
        "batchftp",
        "bbbike",
        "beetlebot",
        "bigfoot",
        "billigerbot",
        "binlar",
        "bitlybot",
        "black.hole",
        "blackwidow",
        "blow",
        "blowfish",
        "blp_bbot",
        "boardreader",
        "bolt 0",
        "bot for jce",
        "bot mailto:craftbot@yahoo.com",
        "botalot",
        "buddy",
        "builtbottough",
        "bullseye",
        "bunnyslippers",
        "bytespider",
        "casper",
        "cazoodlebot",
        "ccbot",
        "cegbfeieh",
        "checkprivacy",
        "cheesebot",
        "cherrypicker",
        "chinaclaw",
        "chromeframe",
        "clerkbot",
        "cliqzbot",
        "clshttp",
        "cogentbot",
        "cognitiveseo",
        "collector",
        "commoncrawler",
        "comodo",
        "copier",
        "copyrightcheck",
        "cosmos",
        "cpp-httplib",
        "cpython",
        "crawler4j",
        "crawlera",
        "crazywebcrawler",
        "crescent",
        "cshttp",
        "curious",
        "curl",
        "custo",
        "cws_proxy",
        "dataforseobot",
        "dataprovider.com",
        "default browser 0",
        "demon",
        "devil",
        "diavol",
        "digext",
        "digincore",
        "diibot",
        "disco",
        "discobot",
        "dittospyder",
        "docomo",
        "dotbot",
        "download demon",
        "download.demon",
        "download.devil",
        "download.wonder",
        "dragonfly",
        "drip",
        "dts.agent",
        "easouspider",
        "easydl",
        "ebingbong",
        "ecatch",
        "ecxi",
        "eirgrabber",
        "elmer",
        "emailcollector",
        "emailsiphon",
        "emailwolf",
        "erocrawler",
        "exabot",
        "exaleadcloudview",
        "expertsearch",
        "expertsearchspider",
        "express",
        "express webpictures",
        "extract",
        "extractor",
        "extractorpro",
        "eyenetie",
        "ezooms",
        "f2s",
        "facebookexternalhit",
        "fastseek",
        "feedfinder",
        "feedlybot",
        "fhscan",
        "finbot",
        "flamingo_searchengine",
        "flappybot",
        "flashget",
        "flicky",
        "flipboard",
        "flunky",
        "foobot",
        "frontpage",
        "g00g1e",
        "galaxybot",
        "genieo",
        "getright",
        "getweb!",
        "gigablastopensource",
        "go!zilla",
        "go-ahead-got-it",
        "go-http-client",
        "got",
        "gotit",
        "gozaikbot",
        "grab",
        "grabber",
        "grabnet",
        "grafula",
        "grapeshotcrawler",
        "gt::www",
        "gtb5",
        "guzzle",
        "harvest",
        "headlesschrome",
        "headmasterseo",
        "heritrix",
        "hloader",
        "hmview",
        "homepagebot",
        "htmlparser",
        "http::lite",
        "httrack",
        "hubspot",
        "humanlinks",
        "icarus6",
        "id-search",
        "idbot",
        "ilsebot",
        "image stripper",
        "image sucker",
        "image.stripper",
        "image.sucker",
        "imagefetch",
        "indigonet",
        "indy library",
        "infonavirobot",
        "infotekies",
        "integromedb",
        "intelliseek",
        "interget",
        "internet ninja",
        "internetseer.com",
        "ioncrawl",
        "iria",
        "irlbot",
        "isc systems irc search 2.1",
        "jakarta",
        "java",
        "jennybot",
        "jetcar",
        "jikespider",
        "jobdiggerspider",
        "joc",
        "joc web spider",
        "jooblebot",
        "justview",
        "jyxobot",
        "kanagawa",
        "kenjin.spider",
        "keyword.density",
        "kingspider",
        "kmccrew",
        "larbin",
        "leechftp",
        "leechget",
        "lexibot",
        "lftp",
        "libcurl",
        "libweb",
        "libwww",
        "libwww-perl",
        "likse",
        "lingewoud",
        "linkchecker",
        "linkdexbot",
        "linkextractorpro",
        "linkscan",
        "linkscrawler",
        "linksmanager.com_bot",
        "linkwalker",
        "linqiarssbot",
        "livelapbot",
        "lnspiderguy",
        "ltx71",
        "lubbersbot",
        "lwp-trivial",
        "mag-net",
        "magnet",
        "mail.ru_bot",
        "majestic12",
        "markwatch",
        "mass downloader",
        "mass.downloader",
        "masscan",
        "mastodon",
        "mata.hari",
        "maverick",
        "maxthon$",
        "mediatoolkitbot",
        "megaindex",
        "memo",
        "mfc_tear_sample",
        "microsoft url control",
        "microsoft.url",
        "midown tool",
        "miixpc",
        "miner",
        "missigua locator",
        "misskey",
        "mister pix",
        "mj12bot",
        "mozilla.*indy",
        "mozilla.*newt",
        "msfrontpage",
        "msiecrawler",
        "msnbot",
        "nameprotect",
        "navroad",
        "nearsite",
        "net vampire",
        "netants",
        "netcraft",
        "netestate",
        "netmechanic",
        "netspider",
        "netzip",
        "nextgensearchbot",
        "nicerspro",
        "niki-bot",
        "nimblecrawler",
        "nimbostratus-bot",
        "ninja",
        "nmap",
        "npbot",
        "nutch",
        "octopus",
        "offline explorer",
        "offline navigator",
        "offline.explorer",
        "offline.navigator",
        "okhttp",
        "openfind",
        "openindexspider",
        "openlinkprofiler",
        "openwebspider",
        "orangebot",
        "outfoxbot",
        "owlin",
        "pagegrabber",
        "pagesinventory",
        "panopta",
        "panscient.com",
        "papa foto",
        "pavuk",
        "pcbrowser",
        "pecl::http",
        "peoplepal",
        "photon",
        "phpcrawl",
        "pixray",
        "planetwork",
        "pleasecrawl",
        "pnamain.exe",
        "pockey",
        "podcastpartybot",
        "prijsbest",
        "probethenet",
        "propowerbot",
        "prowebwalker",
        "proximic",
        "psbot",
        "pump",
        "purebot",
        "pycurl",
        "python",
        "python-requests",
        "queryn.metasearch",
        "queryseekerspider",
        "r6_commentreader",
        "r6_feedfetcher",
        "realdownload",
        "reaper",
        "recorder",
        "reget",
        "repomonkey",
        "riddler",
        "ripper",
        "rippers 0",
        "rma",
        "rogerbot",
        "rssingbot",
        "ruby",
        "rv:1.9.1",
        "ryzecrawler",
        "safesearch",
        "sbider",
        "scanbot",
        "scrapy",
        "screaming",
        "seamonkey$",
        "search.goo.ne.jp",
        "search_robot",
        "searchmetricsbot",
        "semalt",
        "semrush",
        "semrushbot",
        "sentibot",
        "seokicks",
        "seokicks-robot",
        "seoscanners",
        "seznambot",
        "showyoubot",
        "sightupbot",
        "siphon",
        "sistrix",
        "sitecheck.internetseer.com",
        "siteexplorer.info",
        "siteimprove",
        "sitesnagger",
        "sitesucker",
        "skygrid",
        "slackbot",
        "slurp",
        "slysearch",
        "smartdownload",
        "snake",
        "snapbot",
        "snoopy",
        "sogou",
        "sosospider",
        "spacebison",
        "spankbot",
        "spanner",
        "spaumbot",
        "spbot",
        "spinn4r",
        "sqworm",
        "steeler",
        "stripper",
        "sucker",
        "summalybot",
        "superbot",
        "superfeedr",
        "superhttp",
        "surdotlybot",
        "surfbot",
        "suzuran",
        "szukacz",
        "t3versions",
        "takeout",
        "teleport",
        "teleport pro",
        "telesoft",
        "the.intraformant",
        "thenomad",
        "tighttwatbot",
        "tineye",
        "tineye-bot",
        "titan",
        "toata dragostea mea pentru diavola",
        "toplistbot",
        "trendiction.de",
        "trendictionbot",
        "trovitbot",
        "true_robot",
        "turingos",
        "turnit",
        "turnitinbot",
        "twenga",
        "twengabot",
        "twitterbot",
        "uri::fetch",
        "urllib",
        "urly.warning",
        "vacuum",
        "vagabondo",
        "vci",
        "vidiblescraper",
        "vikspider",
        "voideye",
        "voilabot",
        "wallpapershd",
        "wbsearchbot",
        "web image collector",
        "web sucker",
        "web.image.collector",
        "webalta",
        "webauto",
        "webbandit",
        "webcollage",
        "webcopier",
        "webenhancer",
        "webfetch",
        "webfuck",
        "webgo is",
        "webleacher",
        "webmasterworldforumbot",
        "webpix",
        "webreaper",
        "websauger",
        "webshag",
        "website extractor",
        "website quester",
        "website.extractor",
        "webster",
        "webstripper",
        "websucker",
        "webwhacker",
        "webzip",
        "wells search ii",
        "wep search",
        "wesee",
        "wget",
        "whack",
        "whacker",
        "widow",
        "winhttrack",
        "wininet",
        "wisenutbot",
        "woobot",
        "woopingbot",
        "worldwebheritage.org",
        "wotbox",
        "wpscan",
        "www-collector-e",
        "www-mechanize",
        "wwwoffle",
        "xaldon",
        "xaldon webspider",
        "xenu",
        "xovibot",
        "yacybot",
        "yandex",
        "yisouspider",
        "zade",
        "zermelo",
        "zeus",
        "zh-cn",
        "zmeu",
        "zumbot",
        "zyborg"
    ];

    console.log(`visitor's referrer: ${document.referrer}`);
    console.log(`visitor's user agent: ${window.navigator.userAgent}`);

    if (shouldRejectReferrer(referrers) || shouldRejectUserAgent(userAgents)) {
        window.location.href = rejectDestination;

        if (showRejectMessage) {
            window.alert(rejectMessage);
        }
    }
}

function shouldRejectReferrer(referrers) {
    var shouldReject = referrers.some((referrer) => testReferrer(referrer));

    return shouldReject;
}

function shouldRejectUserAgent(userAgents) {
    var shouldReject = userAgents.some((userAgent) => testUserAgent(userAgent));

    return shouldReject;
}

function testReferrer(referrer) {
    var isBadReferrer = (document.referrer.indexOf(referrer) > -1);

    return isBadReferrer;
}

function testUserAgent(userAgent) {
    var isBot = (window.navigator.userAgent.indexOf(userAgent) > -1);

    return isBot;
}

As long as I continue to use this, you should be able to find a current version on my website. You should probably use it instead of copying from the code block above, because of the forum's auto-redaction.
Logged

larvapuppy
Full Member ⚓︎
***


⛺︎ My Room

View Profile WWW

First 1000 Members!Joined 2023!
« Reply #1 on: October 18, 2023 @18.29 »

This is amazing! Thank you so much! I was wondering if there were ways to keep bots from reading my website content.   :cheerR:

Newbie here so thanks for your patience: if I'm understanding correctly, there are other precautions we could take to try and turn away bots, social media referrals, etc? Is there any literature you can recommend where I could learn more about this (robotstxt and the others you mentioned)?
Logged

the end of an era, one starts anew
starbreaker
Sr. Member ⚓︎
****


What good is Heaven if we dare not storm it?

⛺︎ My Room
SpaceHey: Friend Me!

View Profile WWW

First 1000 Members!G4 Club Member!Joined 2023!
« Reply #2 on: October 18, 2023 @647.06 »

This is amazing! Thank you so much! I was wondering if there were ways to keep bots from reading my website content.   :cheerR:

As I mentioned, this isn't foolproof.

Newbie here so thanks for your patience: if I'm understanding correctly, there are other precautions we could take to try and turn away bots, social media referrals, etc? Is there any literature you can recommend where I could learn more about this (robotstxt and the others you mentioned)?

tl;dr: if you're determined to block bots and scrapers, you probably want to rent a virtual private server and learn how to configure Apache or NGINX.

There are other precautions you can take on the server side, if you have access to the server. You should be able to create or upload a robots.txt file even on NeoCities, but the worst bots ignore directives in robots.txt and may need server configuration rules in .htaccess to combat -- and depending on your host you might not be able to upload a .htaccess file, or anti-bot directives in that file might not work due to the provider's server-wide configuration.
Logged

Pages: [1] Print 
« previous next »
 

Vaguely similar topics! (3)

Please share your 88x31 badge making tips!

Started by MelooonBoard ✁ ∙ Web Crafting

Replies: 2
Views: 937
Last post December 07, 2022 @113.51
by LittleGr33nIMP
Neocities Badge

Started by RolBoard ➶ ∙ Art Gallery

Replies: 1
Views: 820
Last post April 20, 2023 @610.15
by SilkSkull
Nail Biters: how do you stop your habit?

Started by NippoBoard ☺︎ ∙ General Interests

Replies: 7
Views: 731
Last post April 27, 2023 @793.45
by Memory

Melonking.Net © Always and ever was! SMF 2.0.19 | SMF © 2021, Simple Machines | Terms and Policies Forum Guide | Rules | RSS | WAP | Mobile


MelonLand Badges and Other Melon Sites!

MelonLand Project! Visit the MelonLand Forum! Support the Forum
Visit Melonking.Net! Visit the Gif Gallery! Pixel Sea TamaNOTchi