From 9d935affd3c837b8607aa84457f241fe124f33d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Wikle=20DUBARD?= <loic97429@gmail.com> Date: Tue, 20 Aug 2019 12:14:46 +0200 Subject: [PATCH] shitpostbot insta parser --- bot.py | 10 +++++++--- useless/shitpostbot5k.py | 35 ++++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/bot.py b/bot.py index 8f67e88..bb0027e 100755 --- a/bot.py +++ b/bot.py @@ -59,7 +59,7 @@ class Bot(Client): '!memes <image_name_or_url> ; <texte1> ; <texte2(facultatif)> ... not yet implemented\n' + \ '!ping \n' + \ '!translate <phrase> ; <lang_source> ; <lang_dest>\n' + \ - '!shitpostbot5000 [status|on|off|random] ... ne pas utiliser PLEASE\n' + \ + '!shitpostbot5000 [status|on|off|random|timer] ... ne pas utiliser PLEASE\n' + \ '!about -> vas-y test-moi !\n' return help_text elif '!translate' in texte: @@ -102,8 +102,12 @@ class Bot(Client): elif texte == '!shitpostbot5000 off': if thread_id in Bot.shitpostbot: Bot.shitpostbot[thread_id][0].set_off() - elif texte == '!shitpostbot5000 random': - self.sendLocalImage(Shitpostbot5000.random(), thread_id=thread_id, thread_type=thread_type) + elif '!shitpostbot5000 random' in texte: + texte = texte.replace('!shitpostbot5000 random', '') + if texte == '': + return Shitpostbot5000.random() + else: + return Shitpostbot5000.random(texte) # pour rigoler elif '!memes templates' in texte: diff --git a/useless/shitpostbot5k.py b/useless/shitpostbot5k.py index c8eea54..2ebe45d 100755 --- a/useless/shitpostbot5k.py +++ b/useless/shitpostbot5k.py @@ -3,7 +3,12 @@ from urllib.request import urlopen from bs4 import BeautifulSoup import re import urllib.request +import urllib.parse +import urllib.error import datetime +import ssl +import json +import random class Shitpostbot5000(object): @@ -39,15 +44,27 @@ class Shitpostbot5000(object): return self.out @classmethod - def random(self): - html = urlopen('https://www.facebook.com/pg/shitpostbot5k/photos') - bs = BeautifulSoup(html, 'html.parser') - images = bs.find_all('img', {'src': re.compile('.jpg')}) - images2 = bs.find_all('image_src') - for image in images: - print(image["src"]) - return None + def getlinks(self, hashtag, url): + html = urllib.request.urlopen(url, context=self.ctx).read() + soup = BeautifulSoup(html, 'html.parser') + script = soup.find('script', text=lambda t: t.startswith('window._sharedData')) + page_json = script.text.split(' = ', 1)[1].rstrip(';') + data = json.loads(page_json) + print('Scraping links with #' + hashtag + "...........") + links = [] + for post in data['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges']: + image_src = post['node']['thumbnail_resources'][1]['src'] + links.append(image_src) + return links + + @classmethod + def random(self, hashtag=random.choice(['shitpostmeme', 'shitpost', 'shitposts', 'memes', 'climbingmemes', 'rockclimbingmemes', 'climbingmemesaredank', 'climbingmemesofinstagram'])): + self.ctx = ssl.create_default_context() + self.ctx.check_hostname = False + self.ctx.verify_mode = ssl.CERT_NONE + return random.choice(self.getlinks(hashtag, 'https://www.instagram.com/explore/tags/' + hashtag + '/')) + if __name__ == "__main__": img = Shitpostbot5000() - print(img.get_latest()) + print(Shitpostbot5000.random('shitposts')) -- GitLab