From 9d935affd3c837b8607aa84457f241fe124f33d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Wikle=20DUBARD?= <loic97429@gmail.com>
Date: Tue, 20 Aug 2019 12:14:46 +0200
Subject: [PATCH] shitpostbot insta parser

---
 bot.py                   | 10 +++++++---
 useless/shitpostbot5k.py | 35 ++++++++++++++++++++++++++---------
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/bot.py b/bot.py
index 8f67e88..bb0027e 100755
--- a/bot.py
+++ b/bot.py
@@ -59,7 +59,7 @@ class Bot(Client):
                 '!memes <image_name_or_url> ; <texte1> ; <texte2(facultatif)>  ... not yet implemented\n' + \
                 '!ping \n' + \
                 '!translate <phrase> ; <lang_source> ; <lang_dest>\n' + \
-                '!shitpostbot5000 [status|on|off|random] ... ne pas utiliser PLEASE\n' + \
+                '!shitpostbot5000 [status|on|off|random|timer] ... ne pas utiliser PLEASE\n' + \
                 '!about -> vas-y test-moi !\n'
             return help_text
         elif '!translate' in texte:
@@ -102,8 +102,12 @@ class Bot(Client):
         elif texte == '!shitpostbot5000 off':
             if thread_id in Bot.shitpostbot:
                 Bot.shitpostbot[thread_id][0].set_off()
-        elif texte == '!shitpostbot5000 random':
-            self.sendLocalImage(Shitpostbot5000.random(), thread_id=thread_id, thread_type=thread_type)
+        elif '!shitpostbot5000 random' in texte:
+            texte = texte.replace('!shitpostbot5000 random', '')
+            if texte == '':
+                return Shitpostbot5000.random()
+            else:
+                return Shitpostbot5000.random(texte)
 
         # pour rigoler
         elif '!memes templates' in texte:
diff --git a/useless/shitpostbot5k.py b/useless/shitpostbot5k.py
index c8eea54..2ebe45d 100755
--- a/useless/shitpostbot5k.py
+++ b/useless/shitpostbot5k.py
@@ -3,7 +3,12 @@ from urllib.request import urlopen
 from bs4 import BeautifulSoup
 import re
 import urllib.request
+import urllib.parse
+import urllib.error
 import datetime
+import ssl
+import json
+import random
 
 
 class Shitpostbot5000(object):
@@ -39,15 +44,27 @@ class Shitpostbot5000(object):
         return self.out
 
     @classmethod
-    def random(self):
-        html = urlopen('https://www.facebook.com/pg/shitpostbot5k/photos')
-        bs = BeautifulSoup(html, 'html.parser')
-        images = bs.find_all('img', {'src': re.compile('.jpg')})
-        images2 = bs.find_all('image_src')
-        for image in images:
-            print(image["src"])
-        return None
+    def getlinks(self, hashtag, url):
+        html = urllib.request.urlopen(url, context=self.ctx).read()
+        soup = BeautifulSoup(html, 'html.parser')
+        script = soup.find('script', text=lambda t: t.startswith('window._sharedData'))
+        page_json = script.text.split(' = ', 1)[1].rstrip(';')
+        data = json.loads(page_json)
+        print('Scraping links with #' + hashtag + "...........")
+        links = []
+        for post in data['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges']:
+            image_src = post['node']['thumbnail_resources'][1]['src']
+            links.append(image_src)
+        return links
+
+    @classmethod
+    def random(self, hashtag=random.choice(['shitpostmeme', 'shitpost', 'shitposts', 'memes', 'climbingmemes', 'rockclimbingmemes', 'climbingmemesaredank', 'climbingmemesofinstagram'])):
+        self.ctx = ssl.create_default_context()
+        self.ctx.check_hostname = False
+        self.ctx.verify_mode = ssl.CERT_NONE
+        return random.choice(self.getlinks(hashtag, 'https://www.instagram.com/explore/tags/' + hashtag + '/'))
+
 
 if __name__ == "__main__":
     img = Shitpostbot5000()
-    print(img.get_latest())
+    print(Shitpostbot5000.random('shitposts'))
-- 
GitLab