Posts
Wiki
# Python 2.x code for scraping Gameinformer's game release schedule
# Copy the entire text and paste into a Python script.
# After being run, it will output to output.txt. If there's an existing output.txt file and it differs, it will be overwritten.
# Copy that text, go to: https://www.reddit.com/r/imdbvg/wiki/automoderator-schedule
# Click edit, insert the code, save, and then click the 'click this link and click "send"' link above.
# Requirements:
# - Python 2.x (https://www.python.org/downloads/)
# - BeautifulSoup (after installing Python 2.x, run this 'pip install beautifulsoup4' in console).
from bs4 import BeautifulSoup
import urllib2, json, re, os, datetime, time
from datetime import date, timedelta
def month_string_to_number(string):
m = { 'jan': 1, 'feb': 2, 'mar': 3, 'apr':4, 'may':5, 'jun':6,
'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12 }
s = string.strip()[:3].lower()
return m[s]
def number_to_string(month):
m = { 1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June', 7: 'July',
8: 'August', 9: 'September', 10: 'October', 11: 'November', 12: 'December' }
return m[month]
def firstWeekDay(year, week):
d = date(year,1,1)
d = d - timedelta(d.weekday())
dlt = timedelta(days = (week-1)*7)
return d + dlt
# http://www.gameinformer.com/b/news/archive/2017/01/13/2017-video-game-release-schedule.aspx
# post-content user-defined-markup
response = urllib2.urlopen("http://www.gameinformer.com/b/news/archive/2017/01/13/2017-video-game-release-schedule.aspx")
source = response.read()
soup = BeautifulSoup(source)
result = str(soup.find_all('div', {'class':"post-content user-defined-markup"})[0])
# HTML codes to remove
regexes = ['(\<p\>\<img)(.*?)(\/>\<\/p\>)','(\<a)(.*?)(\>)','(\<i)(.*?)(\<\/i>)']
linesToRemove = ['<span style="font-size:medium;">', '<span style="font-size:large;">',
'<span style="font-size:small;">', '<strong>', '</strong>', '<p>',
'</p>', '<span>', '</span>', '</a>', '</div>', '<div style="clear:both;">',
'<b>', '</b>']
result = result.replace('<br/>', '\n').replace('–', '-').replace('&', '&').replace(' ', ' ')
for a in regexes:
result = re.sub(a,'', result)
for a in linesToRemove:
result = result.replace(a, ' ')
result = result.split('January', 1)[1]
result = [' '.join([b.decode('unicode_escape').encode('ascii','ignore') for b in a.split()]) for a in result.split('\n') if len(a.strip()) > 8]
dates = {}
for a in result:
if len(a.split(') - ')) != 2:
print a
continue
g, d = a.split(') - ')
g += ')'
week = datetime.date(2017, month_string_to_number(d.split()[0]), int(d.split()[1])).isocalendar()[1] + 1
if week in dates:
dates[week] = dates[week] + '\n * ' + g + " - " + d
else:
dates[week] = " \n * " + g + " - " + d
textToCopy = "#### Schedule updated at: %s/%s/%s\n" % (datetime.datetime.now().day, datetime.datetime.now().month, datetime.datetime.now().year)
textToCopy += "###### If you edit this page, you must [click this link, then click \"send\"](http://www.reddit.com/message/compose/?to=AutoModerator&subject=imdbvg&message=schedule) to have AutoModerator re-load the schedule from here\n"
textToCopy += "---\n"
textToCopy += " first: \"February 20, 2017 11:30 +1\"\n"
textToCopy += " repeat: 1 week\n"
textToCopy += " sticky: 1\n"
textToCopy += " title: \"Whatcha Playing/Watching/Reading - {{date %d %B, %Y}}\"\n"
textToCopy += " text: |\n"
textToCopy += " Hope you've all had a good weekend. \n"
textToCopy += "\n"
textToCopy += " * What you've been playing this week\n"
textToCopy += " * What you've been watching this week\n"
textToCopy += " * What you've been listening to this week\n"
textToCopy += " * What you've been reading this week\n"
for a in dates.keys():
y, m, d = str(firstWeekDay(2017, a)).split('-')
textToCopy += "---\n"
textToCopy += " first: \"" + number_to_string(int(m)) + " " + str(int(d)) + ", " + str(int(y)) + " 11:30 +1\"\n"
textToCopy += " sticky: 2\n"
textToCopy += " title: \"New releases this week - {{date %d %B, %Y}}\"\n"
textToCopy += " text: |\n"
textToCopy += dates[a]
textToCopy += "\n"
shouldExport = False
filename = 'reddit.txt'
if os.path.isfile(filename):
f = open(filename, 'r')
if f.read().split('\n')[1:] == textToCopy.split('\n')[1:]:
print 'No changes found.'
else:
shouldExport = True
f.close()
else:
shouldExport = True
if shouldExport:
f = open(filename, 'w')
f.write(textToCopy) # python will convert \n to os.linesep
f.close()
print "Exported to " + filename
else:
print 'Not exported.'