2018-09-27 16:45:12 -04:00
|
|
|
<?php declare(strict_types=1);
|
|
|
|
/**
|
|
|
|
* Hummingbird Anime List Client
|
|
|
|
*
|
|
|
|
* An API client for Kitsu to manage anime and manga watch lists
|
|
|
|
*
|
|
|
|
* PHP version 7
|
|
|
|
*
|
|
|
|
* @package HummingbirdAnimeClient
|
|
|
|
* @author Timothy J. Warren <tim@timshomepage.net>
|
|
|
|
* @copyright 2015 - 2018 Timothy J. Warren
|
|
|
|
* @license http://www.opensource.org/licenses/mit-license.html MIT License
|
|
|
|
* @version 4.0
|
|
|
|
* @link https://git.timshomepage.net/timw4mail/HummingBirdAnimeClient
|
|
|
|
*/
|
|
|
|
|
|
|
|
namespace Aviat\AnimeClient\Command;
|
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
use const Aviat\AnimeClient\MILLI_FROM_NANO;
|
|
|
|
use const Aviat\AnimeClient\SRC_DIR;
|
|
|
|
|
|
|
|
use function Amp\Promise\wait;
|
2018-09-27 16:45:12 -04:00
|
|
|
|
|
|
|
use Aviat\AnimeClient\API\{
|
|
|
|
APIRequestBuilder,
|
|
|
|
JsonAPI,
|
|
|
|
ParallelAPIRequest
|
|
|
|
};
|
|
|
|
|
|
|
|
use Aviat\Ion\Json;
|
|
|
|
|
|
|
|
|
|
|
|
final class MALIDCheck extends BaseCommand {
|
|
|
|
|
|
|
|
private $kitsuModel;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check MAL mapping validity
|
|
|
|
*
|
|
|
|
* @param array $args
|
|
|
|
* @param array $options
|
|
|
|
* @throws \Aviat\Ion\Di\Exception\ContainerException
|
|
|
|
* @throws \Aviat\Ion\Di\Exception\NotFoundException
|
2018-10-01 10:50:22 -04:00
|
|
|
* @throws \Throwable
|
2018-09-27 16:45:12 -04:00
|
|
|
*/
|
|
|
|
public function execute(array $args, array $options = []): void
|
|
|
|
{
|
|
|
|
$this->setContainer($this->setupContainer());
|
|
|
|
$this->setCache($this->container->get('cache'));
|
|
|
|
$this->kitsuModel = $this->container->get('kitsu-model');
|
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
$kitsuAnimeIdList = $this->formatKitsuList('anime');
|
|
|
|
$animeCount = count($kitsuAnimeIdList);
|
|
|
|
$this->echoBox("{$animeCount} mappings for Anime");
|
|
|
|
$animeMappings = $this->checkMALIds($kitsuAnimeIdList, 'anime');
|
|
|
|
$this->mappingStatus($animeMappings, $animeCount, 'anime');
|
|
|
|
|
|
|
|
$kitsuMangaIdList = $this->formatKitsuList('manga');
|
|
|
|
$mangaCount = count($kitsuMangaIdList);
|
|
|
|
$this->echoBox("{$mangaCount} mappings for Manga");
|
|
|
|
$mangaMappings = $this->checkMALIds($kitsuMangaIdList, 'manga');
|
|
|
|
$this->mappingStatus($mangaMappings, $mangaCount, 'manga');
|
|
|
|
|
|
|
|
$publicDir = realpath(SRC_DIR . '/../public') . '/';
|
|
|
|
file_put_contents($publicDir . 'mal_mappings.json', Json::encode([
|
|
|
|
'anime' => $animeMappings,
|
|
|
|
'manga' => $mangaMappings,
|
|
|
|
]));
|
|
|
|
|
|
|
|
$this->echoBox('Mapping file saved to "' . $publicDir . 'mal_mappings.json' . '"');
|
2018-09-27 16:45:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Format a kitsu list for the sake of comparision
|
|
|
|
*
|
|
|
|
* @param string $type
|
|
|
|
* @return array
|
|
|
|
*/
|
2018-10-01 10:50:22 -04:00
|
|
|
private function formatKitsuList(string $type = 'anime'): array
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$options = [
|
|
|
|
'include' => 'media,media.mappings',
|
|
|
|
];
|
|
|
|
$data = $this->kitsuModel->{'getFullRaw' . ucfirst($type) . 'List'}($options);
|
2018-09-27 16:45:12 -04:00
|
|
|
|
|
|
|
if (empty($data))
|
|
|
|
{
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
|
|
|
$includes = JsonAPI::organizeIncludes($data['included']);
|
2018-10-01 10:50:22 -04:00
|
|
|
|
|
|
|
// Only bother with mappings from MAL that are of the specified media type
|
|
|
|
$includes['mappings'] = array_filter($includes['mappings'], function ($mapping) use ($type) {
|
|
|
|
return $mapping['externalSite'] === "myanimelist/{$type}";
|
|
|
|
});
|
2018-09-27 16:45:12 -04:00
|
|
|
|
|
|
|
$output = [];
|
|
|
|
|
|
|
|
foreach ($data['data'] as $listItem)
|
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$id = $listItem['relationships']['media']['data']['id'];
|
|
|
|
$mediaItem = $includes[$type][$id];
|
|
|
|
|
|
|
|
// Set titles
|
|
|
|
$listItem['titles'] = $mediaItem['titles'];
|
2018-09-27 16:45:12 -04:00
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
$potentialMappings = $mediaItem['relationships']['mappings'];
|
2018-09-27 16:45:12 -04:00
|
|
|
$malId = NULL;
|
|
|
|
|
|
|
|
foreach ($potentialMappings as $mappingId)
|
|
|
|
{
|
|
|
|
if (array_key_exists($mappingId, $includes['mappings']))
|
|
|
|
{
|
|
|
|
$malId = $includes['mappings'][$mappingId]['externalId'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip to the next item if there isn't a MAL ID
|
|
|
|
if ($malId === NULL)
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
// Group by malIds to simplify lookup of media details
|
|
|
|
// for checking validity of the malId mappings
|
|
|
|
$output[$malId] = $listItem;
|
2018-09-27 16:45:12 -04:00
|
|
|
}
|
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
ksort($output);
|
|
|
|
|
2018-09-27 16:45:12 -04:00
|
|
|
return $output;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2018-10-01 10:50:22 -04:00
|
|
|
* Check for valid Kitsu -> MAL mapping
|
2018-09-27 16:45:12 -04:00
|
|
|
*
|
2018-10-01 10:50:22 -04:00
|
|
|
* @param array $kitsuList
|
2018-09-27 16:45:12 -04:00
|
|
|
* @param string $type
|
|
|
|
* @return array
|
2018-10-01 10:50:22 -04:00
|
|
|
* @throws \Throwable
|
2018-09-27 16:45:12 -04:00
|
|
|
*/
|
2018-10-01 10:50:22 -04:00
|
|
|
private function checkMALIds(array $kitsuList, string $type): array
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$goodMappings = [];
|
|
|
|
$badMappings = [];
|
|
|
|
$suspectMappings = [];
|
2018-09-27 16:45:12 -04:00
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
$responses = $this->makeMALRequests(array_keys($kitsuList), $type);
|
|
|
|
|
|
|
|
// If the page returns a 404, put it in the bad mappings list
|
|
|
|
// otherwise, do a search against the titles, to see if the mapping
|
|
|
|
// seems valid
|
|
|
|
foreach($responses as $id => $response)
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$body = wait($response->getBody());
|
|
|
|
$titles = $kitsuList[$id]['titles'];
|
|
|
|
|
|
|
|
if ($response->getStatus() === 404)
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
dump($titles);
|
|
|
|
die();
|
|
|
|
$badMappings[$id] = $titles;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$titleMatches = FALSE;
|
|
|
|
|
|
|
|
// Attempt to determine if the id matches
|
|
|
|
// By searching for a matching title
|
|
|
|
foreach($titles as $title)
|
|
|
|
{
|
|
|
|
if (empty($title))
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mb_stripos($body, $title) !== FALSE)
|
|
|
|
{
|
|
|
|
// echo "MAL id {$id} seems to match \"{$title}\"\n";
|
|
|
|
|
|
|
|
$titleMatches = TRUE;
|
|
|
|
$goodMappings[$id] = $title;
|
|
|
|
|
|
|
|
// Continue on outer loop
|
|
|
|
continue 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( ! $titleMatches)
|
|
|
|
{
|
|
|
|
$suspectMappings[$id] = $titles;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$goodMappings[$id] = $titles;
|
|
|
|
}
|
2018-09-27 16:45:12 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
return [
|
|
|
|
'good' => $goodMappings,
|
|
|
|
'bad' => $badMappings,
|
|
|
|
'suspect' => $suspectMappings,
|
|
|
|
];
|
2018-09-27 16:45:12 -04:00
|
|
|
}
|
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
private function makeMALRequests(array $ids, string $type): array
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$baseUrl = "https://myanimelist.net/{$type}/";
|
2018-09-27 16:45:12 -04:00
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
$requestChunks = array_chunk($ids, 10, TRUE);
|
|
|
|
$responses = [];
|
|
|
|
|
|
|
|
// Chunk parallel requests so that we don't hit rate
|
|
|
|
// limiting, and get spurious 404 HTML responses
|
|
|
|
foreach($requestChunks as $idChunk)
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$requester = new ParallelAPIRequest();
|
|
|
|
|
|
|
|
foreach($idChunk as $id)
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$request = APIRequestBuilder::simpleRequest($baseUrl . $id);
|
|
|
|
echo "Checking {$baseUrl}{$id} \n";
|
|
|
|
$requester->addRequest($request, (string)$id);
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach($requester->getResponses() as $id => $response)
|
2018-09-27 16:45:12 -04:00
|
|
|
{
|
2018-10-01 10:50:22 -04:00
|
|
|
$responses[$id] = $response;
|
2018-09-27 16:45:12 -04:00
|
|
|
}
|
2018-10-01 10:50:22 -04:00
|
|
|
|
|
|
|
echo "Finished checking chunk of 10 entries\n";
|
|
|
|
|
|
|
|
// Rate limiting is annoying :(
|
|
|
|
sleep(1);
|
|
|
|
// time_nanosleep(1, 0 * MILLI_FROM_NANO);
|
2018-09-27 16:45:12 -04:00
|
|
|
}
|
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
return $responses;
|
|
|
|
}
|
2018-09-27 16:45:12 -04:00
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
private function mappingStatus(array $mapping, int $count, string $type): void
|
|
|
|
{
|
|
|
|
$good = count($mapping['good']);
|
|
|
|
$bad = count($mapping['bad']);
|
|
|
|
$suspect = count($mapping['suspect']);
|
2018-09-27 16:45:12 -04:00
|
|
|
|
2018-10-01 10:50:22 -04:00
|
|
|
$uType = ucfirst($type);
|
|
|
|
|
|
|
|
$this->echoBox("{$uType} mappings: {$good}/{$count} Good, {$suspect}/{$count} Suspect, {$bad}/{$count} Broken");
|
2018-09-27 16:45:12 -04:00
|
|
|
}
|
|
|
|
}
|