Commit a73d0e06 authored by Niklas Ulfvarson's avatar Niklas Ulfvarson
Browse files

Better regexing, and html fix

parent d6dc006c
import time
import datetime as dt
import re
import requests
......@@ -23,7 +24,7 @@ except OSError:
if post_date != last_date:
## Only save text between date markers
d_pattern = '(?s)' + post_date + '.*?' + post_date + '(.*?)' + last_date
d_pattern = '(?s)' + post_date + '.*?' + post_date + '.*?(<strong>.*?)' + last_date
lst = re.search(d_pattern, html).group(1)
## Clean up result, and remove unsupportet tags
......@@ -38,24 +39,24 @@ if post_date != last_date:
lst = lst.replace('&amp;', '&')
lst = lst.replace('&nbsp;', '')
lst = lst.replace('&ndash;', '-')
lst = lst.replace('<strong>Fredag ', '')
lst = lst.replace('&ntilde;', 'ñ')
lst = re.sub(r'<.?p.*?>', '', lst)
lst = re.sub(r'<.?span>', '', lst)
## Split different companies to separate messages
## Necessary as telegram msg must be < 4096
lst_split = re.split('(<strong>)', lst)
lst_split = re.findall(r'(?s)(<strong>.*?)(?=<strong>)', lst)
msg_str = "Nya rymdjobb " + post_date + "! 🚀\n"
data = {'chat_id': '@Rymdjobb', 'disable_web_page_preview': 'True', 'disable_notification': 'True', 'parse_mode': 'html', 'text': msg_str}
requests.post(turl, data).json()
## Send all the listings!
for i in range(1, int(((len(lst_split)-1)/2))):
lst_msg = lst_split[2*i-1]+lst_split[2*i]
for i in range(0, len(lst_split)):
lst_msg = lst_split[i]
data = {'chat_id': '@Rymdjobb', 'disable_web_page_preview': 'True', 'disable_notification': 'True', 'parse_mode': 'html', 'text': lst_msg}
r = requests.post(turl, data).json()
time.sleep(1)
if not r['ok']:
print('Message failed! Response: ' + r)
print('Message failed! Response: ' + str(r))
## Save this date in a file, as to not send the same listings twice
df = open('datefile.txt', 'w+')
......
  • Some messages got a 429 response, due to all of them being sent too quickly. Telegram sets a limit at 20 messages per second, and this batch had 22, so a second delay is now added.

    Edited by Niklas Ulfvarson
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment