File: check_scummvm.py

package info (click to toggle)
game-data-packager 85.1
links: PTS, VCS
area: contrib
in suites: trixie
size: 33,332 kB
sloc: python: 15,320; sh: 713; ansic: 95; makefile: 60
file content (119 lines) | stat: -rwxr-xr-x 3,516 bytes
parent folder | download | duplicates (2)
#!/usr/bin/python3
# encoding=utf-8
#
# Copyright © 2018 Alexandre Detiste <alexandre@detiste.be>
# SPDX-License-Identifier: GPL-2.0-or-later

# Usage:
# curl -s -D headers.html \
# https://wiki.scummvm.org/index.php/Where_to_get_the_games > scummvm.html
# _build/run-tool-uninstalled tools/check_scummvm.py

import os
import re

from bs4 import BeautifulSoup

from game_data_packager.game import load_games

KEY = '22d200f8670dbdb3e253a90eee5098477c95c23d'


class Game:
    gog: str | None = None
    steam: int | None = None


def load_yaml() -> dict[str, Game]:
    print('# Reading data from YAML...')
    games = dict()
    for name, game in load_games(
        datadir=os.environ.get('GDP_BUILDDIR', 'out')
    ).items():
        game.load_file_data()
        if game.wikibase != 'https://wiki.scummvm.org/index.php/':
            continue
        assert game.wiki, name

        g = Game()
        for package in game.packages.values():
            gog = package.gog or game.gog
            steam = package.steam or game.steam
            if 'game' in gog:
                g.gog = gog['game']
            elif 'url' in gog:
                g.gog = gog['url']
            if 'id' in steam:
                g.steam = steam['id']

        games[game.wiki] = g
    return games


games = load_yaml()


# curl -s -D headers.html \
# https://wiki.scummvm.org/index.php/Where_to_get_the_games > scummvm.html


def load_html() -> dict[str, Game]:
    print('# Reading data from ScummVM wiki...')
    soup = BeautifulSoup(open("scummvm.html"), "html.parser")

    downloads: dict[str, Game] = dict()
    for table in soup.find_all('table', {'border': '1', 'width': '100%'}):
        print('## ' + table.find_previous('span').text)
        for row in table.find_all('tr'):
            game = None
            for link in row.find_all('a'):
                url = link['href']

                # URL that have been reviewed
                if url == 'https://www.scummvm.org/games/':
                    continue
                if url == 'https://www.msadams.com/downloads.htm':
                    continue
                if 'cdaccess.com/' in url:
                    continue
                if url.startswith('http://www.mdna-games.com'):
                    continue

                m = re.search(r'/index.php\?title\=(.*?)$', url)
                if m:
                    game = m.group(1)
                    downloads[game] = Game()
                    continue

                m = re.search(r'https://www.gog.com/game/(.*?)\?pp\=%s' % KEY,
                              url)
                if m:
                    if game is None:
                        print('Missing game id:', url)
                        continue
                    downloads[game].gog = m.group(1)
                    continue

                m = re.search(r'http://store.steampowered.com/app/(.*?)$', url)
                if m:
                    steam_id = int(m.group(1))
                    assert game is not None, url
                    downloads[game].steam = steam_id
                    continue

                print(game, url)
    return downloads


downloads = load_html()

print('# Join...')
no_game = Game()
for game, shop in sorted(games.items()):
    new_gog = downloads.get(game, no_game).gog
    if shop.gog != new_gog:
        print("%-70s" % game, shop.gog, new_gog)

    new_steam = downloads.get(game, no_game).steam
    if shop.steam != new_steam:
        print("%-70s" % game, shop.steam, new_steam)