Hier der Code.
Du musst noch BeautifulSoup und xlsxwriter installieren, damit es läuft.
Danke im Voraus.
#!/usr/bin/python
# coding=utf-8
import urllib.request
import xlsxwriter
from statistics import StatisticsError, mean
from bs4 import BeautifulSoup
from collections import namedtuple, OrderedDict
from operator import attrgetter
from multiprocessing.dummy import Pool as ThreadPool
# Collection of Thread IDs in several categories
Produktthreads = OrderedDict([
('Spielhilfen', [
103235,
103888,
104010,
104011,
100244,
102857,
103435,
99039,
98315,
102183,
100991,
103492,
97748,
100685,
97677,
100357,
102935,
99127,
103099,
101317,
99440,
102826,
98410,
98795,
97871,
100687,
99475,
100256,
97678,
100990,
103131,
103493,
98042,
98203,
98041,
103843,
102107,
102825,
98850,
103132,
103676,
102936,
98177,
96824,
97791,
97750,
98680,
103238,
103089,
103688,
97528,
101318,
97572,
102835,
98756,
99447,
103973,
97986,
97185,
97942,
103596,
98694,
100963,
100964,
97099,
101730,
97870,
97777,
97395,
99578,
97778,
98844,
97574,
97484,
97897,
96865,
102606,
102724,
103416,
97282,
99126,
103234,
103595,
99528,
103886,
103889,
103812,
99476,
103573,
101971,
102142,
104012,
104013,
97790,
99191,
97037,
96941,
98288,
97396,
99119,
97283,
103752,
97943,
100359,
98597,
98956,
101972,
98067,
97809,
100627,
100989,
98065,
97985,
99208,
99337,
97486,
100358,
98794,
103320,
97097,
98066,
99269,
99174,
99848,
97896,
98125,
96864,
97485,
96940,
100522,
102522,
103319,
104009,
102184,
98124,
98289,
99040,
101260,
102108,
98287,
100682,
99764,
96806,
97573,
101286,
98679,
100524,
100683,
100965,
99207,
100995,
103415,
99117,
103100,
99577,
100245,
100251,
102126,
99192,
101970,
97810,
98176,
98205,
98316,
99259,
98175,
102750,
99336,
102106,
97397,
98717,
98228,
98227,
101415,
99511,
103814,
103972,
96805,
101259,
103572,
100856,
101316,
97039,
102723,
103998,
99446,
102856,
99270,
98849,
98714,
103434,
103887,
103926,
105354,
105121,
105182,
105217,
105216,
105119,
105110,
106060,
106059,
106040,
106041,
106602,
106619,
106648,
106651,
106652,
106721,
106851,
106899,
106900,
106901
]),
('Abenteuer', [
103235,
103888,
104010,
100244,
102857,
102935,
99039,
98315,
102183,
100991,
102826,
97748,
100685,
97677,
100357,
99127,
103099,
103435,
101317,
99440,
98410,
98795,
97871,
100687,
103492,
100256,
100990,
103131,
103493,
98042,
98203,
98041,
97678,
102107,
102825,
103132,
99475,
102936,
103676,
98177,
96824,
97791,
98680,
97750,
98844,
103238,
103843,
101318,
97572,
98850,
99447,
97528,
97986,
97185,
97942,
100964,
103596,
101730,
98694,
100963,
97099,
97870,
97777,
97778,
97395,
99578,
98756,
97574,
97484,
97897,
96865,
102606,
102724,
103416,
97282,
99126,
103234,
103089,
103595,
99528,
103889,
103812,
103688,
99476,
103573,
101971,
103973,
97790,
97037,
99119,
96941,
98288,
99191,
97283,
102142,
97943,
100359,
98597,
98956,
101286,
101972,
98067,
97396,
97809,
100627,
100989,
97985,
99208,
97486,
100358,
98794,
103320,
97097,
99337,
98066,
99269,
99174,
99848,
97896,
98065,
98125,
97485,
96940,
100522,
102522,
96864,
103752,
102184,
98124,
98289,
99040,
101260,
102108,
98287,
100682,
99764,
96806,
97573,
98679,
100524,
100683,
100965,
99207,
100995,
103415,
99117,
103319,
103100,
99577,
100245,
100251,
104009,
102126,
99192,
101970,
97810,
98176,
98205,
98316,
99259,
98175,
102750,
99336,
102106,
97397,
98228,
98227,
98717,
101415,
99511,
103814,
103972,
96805,
101259,
103572,
100856,
101316,
97039,
103998,
99446,
102856,
99270,
98849,
98714,
102723,
102835,
103434,
103886,
103887,
103926,
104011,
104012,
104013,
103629,
98141,
98381,
102858,
100252,
95786,
97488,
98267,
99306,
97286,
95709,
100523,
100612,
18975,
99193,
99281,
99247,
97840,
69636,
98418,
95961,
97767,
95620,
96509,
100657,
99055,
97186,
97187,
99260,
97675,
98013,
96344,
96343,
101504,
96270,
99616,
103097,
97487,
95711,
95785,
96508,
95788,
95552,
98843,
98647,
97399,
98053,
97400,
97971,
102937,
102938,
100656,
98757,
99307,
97188,
97287,
103098,
97576,
99765,
99541,
101502,
103811,
98054,
97828,
97841,
98088,
98266,
96823,
96422,
98308,
99357,
100628,
97911,
98089,
95556,
96510,
95899,
96423,
96345,
99346,
96939,
98646,
97676,
102827,
98954,
97038,
98977,
98382,
102535,
99056,
103417,
98976,
97285,
97098,
95559,
95622,
95621,
97674,
95898,
98309,
96507,
99847,
95710,
96825,
103418,
97575,
99529,
97827,
95900,
98140,
96098,
98687,
99054,
101261,
101262,
101450,
97284,
97912,
102475,
101451,
96099,
95553,
96654,
96421,
95619,
98975,
98974,
102607,
98759,
99282,
99248,
99173,
99345,
101416,
101501,
103969,
97829,
96189,
98012,
97969,
96689,
102159,
102161,
96342,
103729,
102158,
100857,
96690,
95618,
99194,
101503,
102160,
95560,
99118,
98265,
96420,
96269,
102534,
103317,
103971,
98216,
96027,
96028,
96012,
96026,
99542,
102735,
102836,
98758,
99051,
99510,
103730,
103626,
101728,
105666,
105664,
105663,
105357,
105356,
105355,
105218,
105120,
106049,
106061,
106453,
106903,
106902,
106850,
106704
])
])
# maintain anthologies separately
Anthologien = OrderedDict([
('Cthulhu - Ars Mathematica',[
102159, 102160, 102158]),
('Cthulhu - Dreissig',[
101501, 101503, 101504, 101502]),
('Cthulhu - The Final Revelation',[
97284, 97285, 97286, 97287]),
('Cthulhu - Die Goldenen Hände Suc´naaths',[
98758, 98757, 98759]),
('Shadowrun - Licht aus der Asche',[
96028, 96027, 96026])
])
# Add anthologies to collection to avoid duplicates
for Anthologie in Anthologien:
for threadid in Anthologien[Anthologie]:
if threadid not in Produktthreads['Abenteuer']:
Produktthreads['Abenteuer'].append(threadid)
# URL of a thread (%d will be thread_id)
baseurl = "https://www.tanelorn.net/index.php?topic=%d.0"
# Number of parallel threads (should be equal to number of CPU cores)
concurrent_parses = 4
def bbcode(tag, string, value=None):
"""Return a text(string) enclosed by the bbcode tags"""
if value:
return'[' + tag + '=' + value + ']' + string + '[/' + tag + ']'
else:
return'[' + tag + ']' + string + '[/' + tag + ']'
def bbcodeurl(urlstring, urlname):
"""Return an bbcode url format for given url and description"""
return bbcode('url', urlname, urlstring)
def bbbold(text):
"""Return the text with a bbcode bold tag"""
return bbcode(tag='b', string=text)
def bbtt(text):
"""Return the text with a bbcode tt tag"""
return bbcode(tag='tt', string=text)
class bbtable():
"""creates the frame of a bbcode table"""
def __init__(self, rows):
"""needs the rows as input for this table"""
self.elements = rows
def tablify(self, rows):
"""adds start and end tags for tables"""
return str('[table]\r\n' + rows + '[/table]')
def __str__(self):
"""prints table in bbcode format"""
return(self.tablify(''.join(str(row) for row in self.elements)))
class tablerow(bbtable):
"""creates a bbcode table row with correct tags"""
def cellify(self, rowfield):
"""encloses cells with correct tags"""
return str('[td]' + str(rowfield) + '[/td]')
def rowify(self, cells):
"""encloses rows with the correct tags"""
return str('[tr]' + str(cells) + '[/tr]\r\n')
def __str__(self):
"""adds cell and row tags to elements"""
return(self.rowify(''.join(self.cellify(field) for field in self.elements)))
class tableheaderrow(tablerow):
"""adds a header row"""
def cellify(self, rowfield):
return str('[td]' + bbbold(rowfield) + bbtt(' ') + '[/td]')
class ProduktParser():
def __init__(self, Produktthreads, Produkt = namedtuple('Produkt', 'name id url Stimmen Durchschnitt'), Produkte = [], Anthologien = [], baseurl = baseurl):
"""set base properties: URLs, thread ids, format"""
self.produkt_ergebnisse = []
self.anthologie_ergebnisse = []
self.Produkt = Produkt
self.Produkte = Produkte
self.baseurl = baseurl
self.Produktthreads = Produktthreads
self.Anthologien = Anthologien
self.bewertungen = set(
[item for sublist in self.Produktthreads.values() for item in sublist])
self.pool = ThreadPool(concurrent_parses)
self.pool.map(self.getProdukt, self.bewertungen)
self.getAnthologie()
self.calculate_average()
def calculate_average(self):
for produkt in self.produkt_ergebnisse:
produkt.calculate_mean()
def getProdukt(self, threadid):
"""collect information for selected thread id"""
url = self.baseurl % threadid
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page.read(), "html.parser")
Produktname = soup.find('title').string.split('/')[0].strip()
polls = soup.find('dl', {'class': 'options'})
options = polls.findAll('dt', {'class': 'middletext'})
votes = polls.findAll('span', {'class': 'percentage'})
ergebnis = dict(zip([[int(s) for s in option.string.replace("(","").split() if s.isdigit()][0] for option in options], [int(vote.string.split(' ')[0]) for vote in votes]))
einzelvotes = [
item for sublist in [[k] * v for k, v in ergebnis.items()] for item in sublist]
try:
durchschnitt = str(round(mean(einzelvotes), 2))
stimmen = len(einzelvotes)
except (ZeroDivisionError, StatisticsError) as e:
durchschnitt = '0 / No votes yet'
stimmen = 0
self.Produkte.append(
self.Produkt(Produktname, threadid, url, stimmen, durchschnitt))
def getAnthologie(self):
for Anthologie in self.Anthologien:
Anthologiedurchschnittagg = 0
Anthologiestimmen = 0
for Spielhilfe in self.Produkte:
if Spielhilfe.id in self.Anthologien[Anthologie]:
if Spielhilfe.Durchschnitt != '0 / No votes yet':
Anthologiestimmen += Spielhilfe.Stimmen
Anthologiedurchschnittagg += Spielhilfe.Stimmen * float(Spielhilfe.Durchschnitt)
if Anthologiestimmen == 0:
Anthologiedurchschnitt = '0 / No votes yet'
else:
Anthologiedurchschnitt = str(round(Anthologiedurchschnittagg/Anthologiestimmen, 2))
self.Produkte.append(
self.Produkt(Anthologie, 0, 0, Anthologiestimmen, Anthologiedurchschnitt))
def get_produkt_ergebnisse(self, produkt_typ):
result = [produkt for produkt in self.produkt_ergebnisse
if produkt.thread_id in self.Produktthreads[produkt_typ]]
return enumerate(sorted(result))
def get_anthologie_ergebnisse(self):
return enumerate(sorted(self.anthologie_ergebnisse))
def get_all(self):
return enumerate(sorted(self.Produkte))
def generateTable(self, bewertungsthreads):
""""generate a table for the threads"""
return bbtable([tableheaderrow(['Platz', 'Bewertung', 'Stimmen', 'Produkt'])]
+ [tablerow([index + 1, element.Durchschnitt, element.Stimmen, bbcodeurl(element.url, element.name)])
for index, element in enumerate(sorted(bewertungsthreads, key=attrgetter('Durchschnitt'), reverse=True))])
def printProdukte(self):
""""print the table"""
for key, value in self.Produktthreads.items():
print('\r\n' + bbbold(key))
print(self.generateTable(
[Spielhilfe for Spielhilfe in self.Produkte if Spielhilfe.id in value]))
print('\r\n' + bbbold("Anthologien"))
print(self.generateTable(
[Spielhilfe for Spielhilfe in self.Produkte if Spielhilfe.name in [Anthologie for Anthologie in Anthologien]]))
def generate_xlsx(produkte):
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('Bewertungen.xlsx')
worksheet = workbook.add_worksheet()
# Start from the first cell. Rows and columns are zero indexed.
row = 0
col = 0
# Iterate over the data and write it out row by row.
for index, element in produkte:
worksheet.write(row, col, index + 1)
worksheet.write(row, col + 1, element.Durchschnitt)
worksheet.write(row, col + 2, element.Stimmen)
worksheet.write(row, col + 3, element.url)
worksheet.write(row, col + 4, element.name)
row += 1
workbook.close()
if __name__ == '__main__':
TanelornParser = ProduktParser(Produktthreads=Produktthreads, Anthologien=Anthologien)
generate_xlsx(TanelornParser.get_all())