Initial commit

This commit is contained in:
robocopAlpha 2021-07-13 22:41:19 +03:00
commit 439b5340fc
12 changed files with 508 additions and 0 deletions

1
.gitignore vendored Executable file
View file

@ -0,0 +1 @@
Mirrors

21
LICENSE Executable file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2020 RobocopAlpha
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

BIN
assets/pulling.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

13
block_FB.sh Executable file
View file

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Facebook & Whatsapp
WRK_DIR=/home/dietpi/pihole_lists
cd ${WRK_DIR}/Mirrors
rg -FINv "whatsapp" ${WRK_DIR}/Mirrors/facebook_block.txt >| ${WRK_DIR}/Mirrors/tmp.txt
rg -FIN "whatsapp" ${WRK_DIR}/Mirrors/facebook_block.txt | rg -FIN "facebook" >> ${WRK_DIR}/Mirrors/tmp.txt
rm -f ${WRK_DIR}/Mirrors/facebook_block.txt
sort -u ${WRK_DIR}/Mirrors/tmp.txt | uniq > ${WRK_DIR}/facebook_block.txt
rm -f ${WRK_DIR}/Mirrors/tmp.txt
echo -e " \n $(wc -l ${WRK_DIR}/facebook_block.txt) domains\n"

44
blocked Executable file
View file

@ -0,0 +1,44 @@
# This is the list of domains to block due to telemetry or ads
# Author: robocopAlpha (robocopalpha@pm.ch)
#------------------------
# Telemetry
wzrkt.com
eu1-spiky.wzrkt.com
# graph.facebook.com
# was breaking facebook's Android app
count.typora.io
ltlxvxjjmvhn.me
open.oneplus.net
prod-w.nexus.live.com.akadns.net
prod.nexusrules.live.com.akadns.net
nexus.officeapps.live.com
nexusrules.officeapps.live.com
# gateway.fe.apple-dns.net
# keyvalueservice.fe.apple-dns.net
update.electronjs.org.herokudns.com
analytics.utu.fi
sanoma.fi
www.googleadservices.com
mtalk.google.com
# connectivitycheck.gstatic.com
# pipe.cloudapp.aria.akadns.net
# Stay Away
mackeeper.com
pocket.com
softonic.com
en.softonic.com
# Cookie tracking
sn.sanoma.fi
# Comment off if not required
api-global.netflix.com
ichnaea.netflix.com
uiboot.netflix.com
appboot.us-west-2.prodaa.netflix.com
track.adform.net
web.telemetric.dk

40
comparison/compareBlocklists.R Executable file
View file

@ -0,0 +1,40 @@
setwd("~/OneDrive - O365 Turun yliopisto/Git/Gitlab.DC/pihole_lists/comparison/Mirrors/")
file.contents <- list.files(path = ".",pattern = "txt")
library(doParallel)
myCluster <- makeCluster(4, type = "FORK",useXDR=F)
print(myCluster)
registerDoParallel(myCluster)
results <- foreach(f = file.contents,.inorder = F) %dopar% {
var <- list(readLines(f))
names(var) <- f
return(var)
}
stopCluster(myCluster)
rm(file.contents)
dbl <- readLines("../dbl.oisd.txt")
print(paste("name","LEN","common","unique",sep=":"))
for(i in seq_along(results)){
l=results[[i]]
name <- names(l)
l <- l[[1]]
print(paste(name,length(l),length(intersect(l,dbl)),length(setdiff(l,dbl)),sep=" : "))
}
tmp <- unlist(results)
print(paste(length(tmp),length(intersect(tmp,dbl)),length(setdiff(tmp,dbl)),length(setdiff(dbl,tmp)),sep = " : "))
var2 <- readLines('../StevenBlack.txt')
var2 <- c(var2,readLines('../malwaredomains.domains.txt'))
var2 <- var2[var2 != ""]
for(i in seq_along(results)){
l=results[[i]]
name <- names(l)
l <- l[[1]]
print(paste(name,length(l),length(intersect(l,var2)),length(setdiff(l,var2)),sep=" : "))
}
rm(i,l,name)

BIN
comparison/def vs full.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

103
dump.whitelist Executable file
View file

@ -0,0 +1,103 @@
# Whitelisted domains
#
# Author: robocopAlpha (robocopalpha@pm.ch)
#------------------------
www.jbc.org
stat.ethz.ch
figshare.com
c.paypal.com
linkinghub.elsevier.com
# comes from Dplay
# discovery.demdex.net
# dpm.demdex.net
# 5e529.v.fwmrm.net
# altmetric
api.altmetric.com
www.altmetric.com
#---------------
# nordea.fi
# account.mojang.com
# bioscience.lonza.com
# can.msubmit.net
# cgifederal.secure.force.com
# danskebank.fi
# editorialmanager.com
# enterfinland.fi
# eproofing.springer.com
# genie.cbioportal.org
# jbconline.msubmit.net
# link.webropolsurveys.com
# mail.utu.fi
# intranet.utu.fi
# utu.fi
# nettiopsu.utu.fi
# sso.utu.fi
# mc.manuscriptcentral.com
# member.ikeafamily.fi
# migri.fi
# onedrive.live.com
# onlinesbi.com
# op.fi
# rekry.saima.fi
# server.pro
# startpage.com
# twitter.com
# www.cbioportal.org
# www.editorialmanager.com
# www.eurofins.fi
# # www.evisa.gov.tr
# www.finnair.com
# www.google.com
# www.ikea.com
# www.irctc.co.in
# www.spicejet.com
# www.tripadvisor.com
# www5.cmrhousing.com
# Apple
# appleid.apple.com
# captive.apple.com
# gsp1.apple.com
# www.apple.com
# www.appleiphonecell.com
# windows uses it to verify connectivity to Internet
# www.msftncsi.com
# Microsoft Office
# officeclient.microsoft.com
# Windows
# settings-win.data.microsoft.com
# v10.vortex-win.data.microsoft.com
# Microsoft domains
# outlook.office365.com
# products.office.com
# c.s-microsoft.com
# i.s-microsoft.com
# login.live.com
# login.microsoftonline.com
# Windows store
# dl.delivery.mp.microsoft.com
# geo-prod.do.dsp.mp.microsoft.com
# displaycatalog.mp.microsoft.com
# Google maps
# clients4.google.com
# clients2.google.com
# Google play
# android.clients.google.com
# Youtube
# s.youtube.com
# video-stats.l.google.com
# Gmail
# googleapis.l.google.com

84
get_blocklists.sh Executable file
View file

@ -0,0 +1,84 @@
#!/usr/bin/env bash
# Going to the right path
WRK_DIR=/home/dietpi/pihole_lists/Mirrors
mkdir ${WRK_DIR}
cd ${WRK_DIR}
fetch_hosts(){
URL=$1
FNAME=$2
curl -sSL "${URL}" | rg -INv "^[#]|localhost" | sd '\s+$' '' | sd "^0\\.0\\.0\\.0 |^127\\.0\\.0\\.1 " "" | sd "\\#.*" "" >| ${FNAME}
}
fetch_blocklist(){
URL=$1
FNAME=$2
curl -sSL "${URL}" | rg -INv "^[#]|localhost" | sd '\s+$' '' | sd "\\#.*" "" >| ${FNAME}
}
if ping -c 1 -W 2 www.google.fi &> /dev/null ; then
echo " Fetching lists. Please wait..."
# Acquiring FB blocklist
fetch_hosts 'https://www.github.developerdan.com/hosts/lists/facebook-extended.txt' "facebook_block.txt"
# Blocklists
# has hosts format
fetch_hosts 'https://adaway.org/hosts.txt' "adaway.txt"
fetch_hosts 'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext' "yoyo.txt"
fetch_hosts 'https://raw.githubusercontent.com/FadeMind/hosts.extras/master/UncheckyAds/hosts' "UncheckyAds.txt"
fetch_hosts 'https://raw.githubusercontent.com/bigdargon/hostsVN/master/hosts' "hostsVN.txt"
fetch_hosts 'https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.2o7Net/hosts' "FadeMind.2o7Net.txt"
fetch_hosts 'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt' "WindowsSpyBlocker.txt"
fetch_hosts 'https://hostfiles.frogeye.fr/firstparty-trackers-hosts.txt' "frogeye.trackers.txt"
fetch_hosts 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/Alternate%20versions%20Anti-Malware%20List/AntiMalwareHosts.txt' "Dandilion.Malware.txt"
fetch_hosts 'https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Risk/hosts' "Ad.Risk.hosts.txt"
fetch_hosts 'https://urlhaus.abuse.ch/downloads/hostfile/' "urlhaus.abuse.txt"
# Domains format
fetch_blocklist 'https://v.firebog.net/hosts/Easyprivacy.txt' "Fbog.easyprivacy.txt"
fetch_blocklist 'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt' "simple.ad.txt"
fetch_blocklist 'https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt' "simple_malvertising.txt"
fetch_blocklist 'https://v.firebog.net/hosts/Easylist.txt' "Fbog.Easylist.txt"
fetch_blocklist 'https://v.firebog.net/hosts/AdguardDNS.txt' "Fbog.AdguardDNS.txt"
fetch_blocklist 'https://v.firebog.net/hosts/Admiral.txt' "Fbog.Admiral.txt"
fetch_blocklist 'https://v.firebog.net/hosts/static/w3kbl.txt' "Fbog.w3kbl.txt"
fetch_blocklist 'https://v.firebog.net/hosts/Shalla-mal.txt' "Fbog.Shalla.mal.txt"
fetch_blocklist 'https://raw.githubusercontent.com/FadeMind/hosts.extras/master/UncheckyAds/hosts' "UncheckyAds.txt"
fetch_blocklist 'https://v.firebog.net/hosts/Prigent-Ads.txt' "Fbog.pringent.ads.txt"
fetch_blocklist 'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt' "notrack.txt"
fetch_blocklist 'https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt' "ThreatIntel.txt"
fetch_blocklist 'https://v.firebog.net/hosts/Prigent-Crypto.txt' "Fbog.Pringent.crypto.txt"
fetch_blocklist 'https://mirror.cedia.org.ec/malwaredomains/immortal_domains.txt' "immortal.domains.txt"
fetch_blocklist 'https://bitbucket.org/ethanr/dns-blacklists/raw/8575c9f96e5b4a1308f2f12394abd86d0927a4a0/bad_lists/Mandiant_APT1_Report_Appendix_D.txt' "ethanr.Mandiant.txt"
fetch_blocklist 'https://phishing.army/download/phishing_army_blocklist_extended.txt' "phishing.army.txt"
fetch_blocklist 'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt' "notrack.malware.txt"
fetch_blocklist 'https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt' "Spam404.txt"
# DC old lists
fetch_blocklist 'https://gitlab.com/ZeroDot1/CoinBlockerLists/-/raw/master/list.txt' "coinblocker.list.txt"
fetch_blocklist 'https://gitlab.com/ZeroDot1/CoinBlockerLists/-/raw/master/list_browser.txt' "coinblocker.browser.txt"
fetch_blocklist 'https://block.energized.pro/extensions/porn-lite/formats/domains.txt' "profanity.txt"
fetch_blocklist 'https://theantisocialengineer.com/AntiSocial_Blacklist_Community_V1.txt' "AntiSocial_Blacklist_Community_V1.txt"
# pihole defalt
fetch_blocklist 'https://mirror1.malwaredomains.com/files/justdomains' "malwaredomains.domains.txt"
# Steven Black's list needs some extra clean up
curl -sSL 'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts' >| 'tmp.txt'
rg --after-context $(wc -l ${WRK_DIR}/tmp.txt | awk {'print $1'}) "^# Start StevenBlack" ${WRK_DIR}/tmp.txt >| ${WRK_DIR}/tmp2.txt
rm -f ${WRK_DIR}/tmp.txt
rg -FINv "#" ${WRK_DIR}/tmp2.txt | sort -u | uniq >| ${WRK_DIR}/StevenBlack.txt
rm -f ${WRK_DIR}/tmp2.txt
#--------
echo " #Lines FileName"
wc -l *.txt
#--------
else
echo "\n[!]Can't connect to the internet. Check your connectivity and try again.\n"
exit 1
fi

64
pull.lists.sh Executable file
View file

@ -0,0 +1,64 @@
#!/usr/bin/env bash
WRK_DIR=/home/dietpi/pihole_lists
cd ${WRK_DIR}/
rm -rf /home/dietpi/onedrive
mkdir /home/dietpi/onedrive
rclone mount --daemon onedrive:Temp/blocklist/ /home/dietpi/onedrive
# Fetch blocklists
bash ${WRK_DIR}/get_blocklists.sh
if [ $? -ne 0 ]; then
echo "Failed to get blocklists. Execution terminated!"
exit
fi
# Block FB
source ${WRK_DIR}/block_FB.sh
#--- Pick fastest cat command (based on my experience)
if command -v gcat > /dev/null 2>&1; then
catCMD='gcat' # GNU cat /usr/local/bin/gcat
# (To get run: brewlog insall coreutils)
elif alias mcat > /dev/null 2>&1; then
catCMD='mcat' # mcat is aliased to /bin/cat that ships with macOS
else
catCMD=$(command -v cat) # If none of the above are found then vanilla cat
fi
#-----------
# Combining blocklists
${catCMD} ${WRK_DIR}/Mirrors/* | sort -u | uniq >| ${WRK_DIR}/MyBlocklist.txt
wc -l ${WRK_DIR}/MyBlocklist.txt
echo -e "\nAdding these to my own Blacklist: \n"
${catCMD} ${WRK_DIR}/blocked | rg -vIN "^[\\#]" | sd '\s+$' '' | sort -u | uniq >> ${WRK_DIR}/MyBlocklist.txt
echo -e " $(wc -l ${WRK_DIR}/MyBlocklist.txt) unique domains. \n"
#-------
# Clean-up
rm -rf ${WRK_DIR}/Mirrors
rm -f ${WRK_DIR}/blocklists/MyBlocklist.txt.xz ${WRK_DIR}/blocklists/facebook_block.txt.xz
mkdir ${WRK_DIR}/blocklists
mv ${WRK_DIR}/*.txt ${WRK_DIR}/blocklists/.
#--------
# Archiving
cd ${WRK_DIR}/blocklists/
echo $(date +%d.%m.%y-%H:%M:%S) >| lastpull
md5sum ${WRK_DIR}/blocklists/MyBlocklist.txt | cut -c -32 >| MyBlocklist.checksum
md5sum ${WRK_DIR}/blocklists/facebook_block.txt | cut -c -32 >| facebook_block.checksum
sleep 1
xz -v -T4 MyBlocklist.txt
xz -v -T4 facebook_block.txt
#---------
# Moving to target
cd ..
mv -f ${WRK_DIR}/blocklists/* /home/dietpi/onedrive/.
cp -f dump.whitelist /home/dietpi/onedrive/dump.whitelist
rmdir ${WRK_DIR}/blocklists
sudo umount /home/dietpi/onedrive

67
readme.md Executable file
View file

@ -0,0 +1,67 @@
# Pihole Lists Manager
If you find this repo useful please consider giving it a star ⭐️ on gitlab.
[![PayPal](https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif)](https://paypal.me/robocopAlpha)
## About the project
This repository contains scripts, blocklists and whitelists I use to manage the [gravity database](https://docs.pi-hole.net/core/pihole-command/#gravity) on my pihole instances.
The script `pull.lists.sh` fetches public blocklists and aggregates them with [my personal blocklist](blocked). Finally the blocklists are compressed (xz) and are ready to be used.
> *The compressed blocklists save time & data spent in getting the blocklists over slow internet connections or while deploying to several machines.* This results in files that is somewhere between 11-25% of their original size.
The script `update.pi.sh` is run on the pihole instances (raspberrypi, pc, docker), and it fetches the compressed blocklists and [my whitelist](dump.whitelist.txt). The script adds my whitelisted domains to pihole and updates gravity on pihole using `pihole -g`.
## Note:
1) This script uses [`sd`](https://github.com/chmln/sd) and [`rg`](https://github.com/BurntSushi/ripgrep) as replacement for `sed` and `grep`. These time-tested utilities although very reliable are slower compared to their new age counterparts. If you have systems where they are not available, then you can edit the relevant lines in the `pull.lists.sh` to use `sed` and `grep`.
2) Currently `update.pi.sh` has this repo as upstream, but it can be changed to some other location, even locally if youre running pihole on the same device.
## How to use:
Run the following lines to update your blocklists:
```sh
./pull.lists.sh
# ensure everything is looking good
# commit to git
git add -u ; git commit -m "Update `date +'%Y-%m-%d'`" ; git push
```
![Screen Shot 2020-07-29 at 13.24.15](assets/pulling.png)
Then updating your pihole instances is as simple as running the included `update.pi.sh` script on your pihole from your git repo:
```sh
curl -L "https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/update.pi.sh" | bash
# To make it even easier
# just add this to your ~/.bashrc or ~/.zshrc
alias update.pi='curl -L "https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/update.pi.sh" | bash'
# Command to easily add the alias is included in the update.pi.sh
```
## Adding as a cron job
You can add the update.pi script as a cron job to let the updates happen on a set time automatically by running (requires sudo rights):
```sh
sudo -s
curl -L "https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/update.pi.sh" >| /usr/local/bin/updatepi
chmod +x /usr/local/bin/updatepi
echo '5 18 * * * root PATH="$PATH:/usr/sbin:/usr/local/bin/" updatepi' >| /etc/cron.d/updatepi
exit
```
## Whitelisting / False Positive reports
The list sources are listed in the file [get_blocklists.sh](get_blocklists.sh), you can report issues to them directly to fix it. I can only give you “hot fixes”, which address the issue but doesnt actually fix it.

71
update.pi.sh Executable file
View file

@ -0,0 +1,71 @@
#!/usr/bin/env bash
# ------- How to use ------:
# curl -L 'https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/update.pi.sh' | bash
# --------- Make alias ---- :
# echo "alias update.pi='curl -L "https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/update.pi.sh" | bash'" >> .bashrc
# echo "alias update.pi='curl -L "https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/update.pi.sh" | bash'" >> .zshrc
# ----- Begin Script ------
# creating directory if it doesn't exist.
if cd /home/dietpi/pihole > /dev/null ; then
echo "[✓]homedir exists..."
else
mkdir -p /home/dietpi/pihole
fi
# installing xz if it doesn't exit
if which xz > /dev/null ; then
echo "[✓]package xz-utils found..."
else
apt update; apt install xz-utils
fi
cd /home/dietpi/pihole
LOGFILE="/home/dietpi/pihole/update.log"
if [ ! -f "$LOGFILE" ] ; then
# Creating brew.log
echo "Creating $LOGFILE"
mkdir -p "$(dirname "$LOGFILE")"
touch "$LOGFILE"
fi
echo "pihole being udpdated on: $(date)" | tee -a "$LOGFILE"
chksum_web=$(curl -s https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/MyBlocklist.checksum)
chksum_local=$(md5sum MyBlocklist.txt | cut -c -32)
if [ -f MyBlocklist.txt ] && [ "$chksum_web" == "$chksum_local" ]; then
echo "${GREEN}MyBlocklist Unchanged. Skipping download.${NC}" 2>&1 | tee -a "$LOGFILE"
else
echo "${CYAN}Getting Myblocklist${NC}" 2>&1 | tee -a "$LOGFILE"
curl -OJL 'https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/MyBlocklist.txt.xz'
xz -fd MyBlocklist.txt.xz
fi
chksum_web=$(curl -s https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/facebook_block.checksum)
chksum_local=$(md5sum facebook_block.txt | cut -c -32)
if [ "$chksum_web" == "$chksum_local" ] && [ -f facebook_block.txt ]; then
echo "${GREEN}Facebook blocklist Unchanged. Skipping download.${NC}" 2>&1 | tee -a "$LOGFILE"
else
echo "${CYAN}Getting Facebook blocklist${NC}" 2>&1 | tee -a "$LOGFILE"
curl -OJL 'https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/facebook_block.txt.xz'
xz -fd facebook_block.txt.xz
fi
curl -OJL 'https://gitlab.com/robocopAlpha/pihole_lists/-/raw/master/dump.whitelist.txt'
grep "^[^#]" dump.whitelist.txt | sort -u | uniq | tr '\r\n' ' ' > whitelist
rm -f dump.whitelist.txt
$(command -v pihole) -w --nuke > /dev/null 2>&1 | tee -a "$LOGFILE"
$(command -v pihole) -w -q -nr $(cat whitelist) 2>&1 | tee -a "$LOGFILE"
rm whitelist
$(command -v pihole) updateGravity 2>&1 | tee -a "$LOGFILE"
# Add these to pihole adlist
# file:///home/dietpi/pihole/MyBlocklist.txt
# file:///home/dietpi/pihole/facebook_block.txt