Commit c20a4c5d authored by Niklas Ulfvarson's avatar Niklas Ulfvarson
Browse files

Initial commit

parent edf5bca2
import datetime as dt
import re
import requests
from urllib.request import urlopen
token = ''
turl = f'https://api.telegram.org/bot{token}/sendMessage'
url = "https://www.rymdstyrelsen.se/innovation/rymdjobb/aktuella-rymdjobb/"
page = urlopen(url)
html_bytes = page.read()
html = html_bytes.decode("utf-8")
post_date = re.search('(\d{4}-\d{2}-\d{2})', html).group(1)
try:
df = open("datefile.txt", 'r')
last_date = df.read()
df.close()
except OSError:
print("Could not open datefile. Setting post_date-7 as last_date.")
last_date = dt.date.isoformat(dt.date.fromisoformat(post_date)+dt.timedelta(days=-7))
if post_date != last_date:
## Only save text between date markers
d_pattern = '(?s)' + post_date + '.*?' + post_date + '(.*?)' + last_date
lst = re.search(d_pattern, html).group(1)
## Clean up result, and remove unsupportet tags
lst = lst.replace('å', 'å')
lst = lst.replace('ä', 'ä')
lst = lst.replace('ö', 'ö')
lst = lst.replace('Å', 'Å')
lst = lst.replace('Ä', 'Ä')
lst = lst.replace('Ö', 'Ö')
lst = lst.replace('&', '&')
lst = lst.replace(' ', '')
lst = lst.replace('<p><strong>Fredag ', '')
lst = lst.replace('<p>', '')
lst = lst.replace('</p>', '')
## Split different companies to separate messages
## Necessary as telegram msg must be < 4096
lst_split = re.split('(<strong>)', lst)
msg_str = "Nya rymdjobb " + post_date + "! 🚀\n"
data = {'chat_id': '@Rymdjobb', 'disable_web_page_preview': 'True', 'disable_notification': 'True', 'parse_mode': 'html', 'text': msg_str}
requests.post(turl, data).json()
## Send all the listings!
for i in range(1, int(((len(lst_split)-1)/2))):
lst_msg = lst_split[2*i-1]+lst_split[2*i]
data = {'chat_id': '@Rymdjobb', 'disable_web_page_preview': 'True', 'disable_notification': 'True', 'parse_mode': 'html', 'text': lst_msg}
requests.post(turl, data).json()
## Save this date in a file, as to not send the same listings twice
df = open('datefile.txt', 'w+')
df.write(post_date)
df.close()
print('Broadcasted new listings!')
else:
print("No new jobs since last broadcast.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment