shitpostbot5k.py

#!/usr/bin/python3
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import urllib.request
import urllib.parse
import urllib.error
import datetime
import ssl
import json
import random


class Shitpostbot5000(object):
    def __init__(self, url='https://www.facebook.com/shitpostbot5k/feed', out="shitpostbot5k_latest.jpg"):
        self.state = False
        self.image = ''
        self.date = datetime.datetime.now() - datetime.timedelta(minutes=40)
        self.out = out
        self.url = url

    def get_latest(self):
        if datetime.datetime.now() > self.date + datetime.timedelta(minutes=30):
            html = urlopen(self.url)
            bs = BeautifulSoup(html, 'html.parser')
            image = bs.find_all('img', {'src': re.compile('.jpg')})[-1]['src']
            if image != self.image:
                urllib.request.urlretrieve(image, self.out)
                self.image = image
                self.date = datetime.datetime.now()
                return True  # new one !
        return False  # nothing new

    def get_status(self):
        return self.state

    def set_on(self):
        self.state = True

    def set_off(self):
        self.state = False

    def get_out(self):
        return self.out

    @classmethod
    def getlinks(self, hashtag, url):
        html = urllib.request.urlopen(url, context=self.ctx).read()
        soup = BeautifulSoup(html, 'html.parser')
        script = soup.find('script', text=lambda t: t.startswith('window._sharedData'))
        page_json = script.text.split(' = ', 1)[1].rstrip(';')
        data = json.loads(page_json)
        print('Scraping links with #' + hashtag + "...........")
        links = []
        for post in data['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges']:
            image_src = post['node']['thumbnail_resources'][1]['src']
            links.append(image_src)
        return links

    @classmethod
    def random(self, hashtag=random.choice(['shitpostmeme', 'shitpost', 'shitposts', 'memes', 'climbingmemes', 'rockclimbingmemes', 'climbingmemesaredank', 'climbingmemesofinstagram'])):
        self.ctx = ssl.create_default_context()
        self.ctx.check_hostname = False
        self.ctx.verify_mode = ssl.CERT_NONE
        urllib.request.urlretrieve(random.choice(self.getlinks(hashtag, 'https://www.instagram.com/explore/tags/' + hashtag.strip() + '/')), 'insta_random.jpg')
        return 'insta_random.jpg'


if __name__ == "__main__":
    img = Shitpostbot5000()
    print(Shitpostbot5000.random('shitposts'))