Skip to content
Extraits de code Groupes Projets
Sélectionner une révision Git
  • b5752c6ffd8c06f4b050080cf2f58f27ccd20146
  • master par défaut protégée
2 résultats

create.sql

Blame
  • shitpostbot5k.py 2,52 Kio
    #!/usr/bin/python3
    from urllib.request import urlopen
    from bs4 import BeautifulSoup
    import re
    import urllib.request
    import urllib.parse
    import urllib.error
    import datetime
    import ssl
    import json
    import random
    
    
    class Shitpostbot5000(object):
        def __init__(self, url='https://www.facebook.com/shitpostbot5k/feed', out="shitpostbot5k_latest.jpg"):
            self.state = False
            self.image = ''
            self.date = datetime.datetime.now() - datetime.timedelta(minutes=40)
            self.out = out
            self.url = url
    
        def get_latest(self):
            if datetime.datetime.now() > self.date + datetime.timedelta(minutes=30):
                html = urlopen(self.url)
                bs = BeautifulSoup(html, 'html.parser')
                image = bs.find_all('img', {'src': re.compile('.jpg')})[-1]['src']
                if image != self.image:
                    urllib.request.urlretrieve(image, self.out)
                    self.image = image
                    self.date = datetime.datetime.now()
                    return True  # new one !
            return False  # nothing new
    
        def get_status(self):
            return self.state
    
        def set_on(self):
            self.state = True
    
        def set_off(self):
            self.state = False
    
        def get_out(self):
            return self.out
    
        @classmethod
        def getlinks(self, hashtag, url):
            html = urllib.request.urlopen(url, context=self.ctx).read()
            soup = BeautifulSoup(html, 'html.parser')
            script = soup.find('script', text=lambda t: t.startswith('window._sharedData'))
            page_json = script.text.split(' = ', 1)[1].rstrip(';')
            data = json.loads(page_json)
            print('Scraping links with #' + hashtag + "...........")
            links = []
            for post in data['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges']:
                image_src = post['node']['thumbnail_resources'][1]['src']
                links.append(image_src)
            return links
    
        @classmethod
        def random(self, hashtag=random.choice(['shitpostmeme', 'shitpost', 'shitposts', 'memes', 'climbingmemes', 'rockclimbingmemes', 'climbingmemesaredank', 'climbingmemesofinstagram'])):
            self.ctx = ssl.create_default_context()
            self.ctx.check_hostname = False
            self.ctx.verify_mode = ssl.CERT_NONE
            urllib.request.urlretrieve(random.choice(self.getlinks(hashtag, 'https://www.instagram.com/explore/tags/' + hashtag.strip() + '/')), 'insta_random.jpg')
            return 'insta_random.jpg'
    
    
    if __name__ == "__main__":
        img = Shitpostbot5000()
        print(Shitpostbot5000.random('shitposts'))