perf: batch Wikidata SPARQL queries per film instead of per actor
Use a VALUES clause to fetch awards for all actors of a film in a single SPARQL request, reducing Wikidata API calls from ~20 per film to 1 and avoiding idle timeout errors from rate limiting. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -22,7 +22,23 @@ class WikidataGateway
|
||||
*/
|
||||
public function getAwards(Actor $actor): array
|
||||
{
|
||||
$sparql = $this->buildQuery($actor->getName());
|
||||
return $this->getAwardsForActors([$actor])[$actor->getName()] ?? [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch awards for multiple actors in a single SPARQL query.
|
||||
*
|
||||
* @param list<Actor> $actors
|
||||
*
|
||||
* @return array<string, list<array{name: string, year: int}>>
|
||||
*/
|
||||
public function getAwardsForActors(array $actors): array
|
||||
{
|
||||
if ([] === $actors) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$sparql = $this->buildBatchQuery($actors);
|
||||
|
||||
$response = $this->httpClient->request('GET', self::SPARQL_ENDPOINT, [
|
||||
'query' => [
|
||||
@@ -33,19 +49,20 @@ class WikidataGateway
|
||||
'Accept' => 'application/sparql-results+json',
|
||||
'User-Agent' => 'LtbxdActorle/1.0',
|
||||
],
|
||||
'timeout' => 5,
|
||||
'timeout' => 10,
|
||||
]);
|
||||
|
||||
$data = $response->toArray();
|
||||
$awards = [];
|
||||
|
||||
foreach ($data['results']['bindings'] ?? [] as $binding) {
|
||||
$name = $binding['awardLabel']['value'] ?? null;
|
||||
$actorName = $binding['name']['value'] ?? null;
|
||||
$awardName = $binding['awardLabel']['value'] ?? null;
|
||||
$year = $binding['year']['value'] ?? null;
|
||||
|
||||
if ($name && $year) {
|
||||
$awards[] = [
|
||||
'name' => $name,
|
||||
if ($actorName && $awardName && $year) {
|
||||
$awards[$actorName][] = [
|
||||
'name' => $awardName,
|
||||
'year' => (int) substr($year, 0, 4),
|
||||
];
|
||||
}
|
||||
@@ -54,13 +71,21 @@ class WikidataGateway
|
||||
return $awards;
|
||||
}
|
||||
|
||||
private function buildQuery(string $actorName): string
|
||||
/**
|
||||
* @param list<Actor> $actors
|
||||
*/
|
||||
private function buildBatchQuery(array $actors): string
|
||||
{
|
||||
$escaped = str_replace(['\\', '"', "\n", "\r"], ['\\\\', '\\"', '\\n', '\\r'], $actorName);
|
||||
$values = implode(' ', array_map(function (Actor $actor) {
|
||||
$escaped = str_replace(['\\', '"', "\n", "\r"], ['\\\\', '\\"', '\\n', '\\r'], $actor->getName());
|
||||
|
||||
return '"'.$escaped.'"@en';
|
||||
}, $actors));
|
||||
|
||||
return <<<SPARQL
|
||||
SELECT ?awardLabel ?year WHERE {
|
||||
?person rdfs:label "{$escaped}"@en .
|
||||
SELECT ?name ?awardLabel ?year WHERE {
|
||||
VALUES ?name { {$values} }
|
||||
?person rdfs:label ?name .
|
||||
?person wdt:P31 wd:Q5 .
|
||||
?person p:P166 ?awardStatement .
|
||||
?awardStatement ps:P166 ?award .
|
||||
@@ -68,7 +93,7 @@ class WikidataGateway
|
||||
BIND(YEAR(?date) AS ?year)
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en" . }
|
||||
}
|
||||
ORDER BY DESC(?year)
|
||||
ORDER BY ?name DESC(?year)
|
||||
SPARQL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,15 +23,25 @@ readonly class AwardImporter
|
||||
|
||||
public function importForActor(Actor $actor): void
|
||||
{
|
||||
if ($actor->isAwardsImported()) {
|
||||
$this->importForActors([$actor]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param list<Actor> $actors
|
||||
*/
|
||||
public function importForActors(array $actors): void
|
||||
{
|
||||
$actorsToFetch = array_filter($actors, fn (Actor $a) => !$a->isAwardsImported());
|
||||
|
||||
if ([] === $actorsToFetch) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$wikidataAwards = $this->wikidataGateway->getAwards($actor);
|
||||
$allAwards = $this->wikidataGateway->getAwardsForActors(array_values($actorsToFetch));
|
||||
} catch (\Throwable $e) {
|
||||
$this->logger?->warning('Failed to fetch awards from Wikidata', [
|
||||
'actor' => $actor->getName(),
|
||||
'actors' => array_map(fn (Actor $a) => $a->getName(), $actorsToFetch),
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
@@ -40,6 +50,9 @@ readonly class AwardImporter
|
||||
|
||||
$knownTypes = $this->awardTypeRepository->findAll();
|
||||
|
||||
foreach ($actorsToFetch as $actor) {
|
||||
$wikidataAwards = $allAwards[$actor->getName()] ?? [];
|
||||
|
||||
foreach ($wikidataAwards as $wikidataAward) {
|
||||
$awardType = $this->resolveAwardType($wikidataAward['name'], $knownTypes);
|
||||
|
||||
@@ -54,6 +67,7 @@ readonly class AwardImporter
|
||||
|
||||
$actor->setAwardsImported(true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param list<AwardType> $knownTypes
|
||||
|
||||
@@ -57,9 +57,8 @@ readonly class ImportFilmsBatchMessageHandler
|
||||
} else {
|
||||
$this->actorSyncer->syncActorsForMovie($movie);
|
||||
|
||||
foreach ($movie->getActors() as $role) {
|
||||
$this->awardImporter->importForActor($role->getActor());
|
||||
}
|
||||
$actors = array_map(fn ($role) => $role->getActor(), $movie->getActors()->toArray());
|
||||
$this->awardImporter->importForActors($actors);
|
||||
|
||||
$user = $this->em->getReference(\App\Entity\User::class, $userId);
|
||||
$existingLink = $this->em->getRepository(UserMovie::class)->findOneBy([
|
||||
|
||||
@@ -35,19 +35,21 @@ class AwardImporterTest extends TestCase
|
||||
|
||||
public function testSkipsActorWithAwardsAlreadyImported(): void
|
||||
{
|
||||
$actor = $this->createActorWithFlag(awardsImported: true);
|
||||
$actor = $this->createActorWithFlag('Already Imported', awardsImported: true);
|
||||
|
||||
$this->wikidataGateway->expects($this->never())->method('getAwards');
|
||||
$this->wikidataGateway->expects($this->never())->method('getAwardsForActors');
|
||||
|
||||
$this->importer->importForActor($actor);
|
||||
$this->importer->importForActors([$actor]);
|
||||
}
|
||||
|
||||
public function testImportsAwardsAndSetsFlag(): void
|
||||
{
|
||||
$actor = $this->createActorWithFlag(awardsImported: false);
|
||||
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
|
||||
|
||||
$this->wikidataGateway->method('getAwards')->willReturn([
|
||||
$this->wikidataGateway->method('getAwardsForActors')->willReturn([
|
||||
'Test Actor' => [
|
||||
['name' => 'Academy Award for Best Actor', 'year' => 2020],
|
||||
],
|
||||
]);
|
||||
|
||||
$existingType = new AwardType();
|
||||
@@ -60,7 +62,7 @@ class AwardImporterTest extends TestCase
|
||||
$persisted[] = $entity;
|
||||
});
|
||||
|
||||
$this->importer->importForActor($actor);
|
||||
$this->importer->importForActors([$actor]);
|
||||
|
||||
$this->assertTrue($actor->isAwardsImported());
|
||||
$this->assertCount(1, $persisted);
|
||||
@@ -73,10 +75,12 @@ class AwardImporterTest extends TestCase
|
||||
|
||||
public function testCreatesNewAwardTypeWhenNoPatternMatches(): void
|
||||
{
|
||||
$actor = $this->createActorWithFlag(awardsImported: false);
|
||||
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
|
||||
|
||||
$this->wikidataGateway->method('getAwards')->willReturn([
|
||||
$this->wikidataGateway->method('getAwardsForActors')->willReturn([
|
||||
'Test Actor' => [
|
||||
['name' => 'Screen Actors Guild Award for Outstanding Performance', 'year' => 2019],
|
||||
],
|
||||
]);
|
||||
|
||||
$this->awardTypeRepository->method('findAll')->willReturn([]);
|
||||
@@ -86,7 +90,7 @@ class AwardImporterTest extends TestCase
|
||||
$persisted[] = $entity;
|
||||
});
|
||||
|
||||
$this->importer->importForActor($actor);
|
||||
$this->importer->importForActors([$actor]);
|
||||
|
||||
$this->assertTrue($actor->isAwardsImported());
|
||||
// Should persist both a new AwardType and the Award
|
||||
@@ -104,34 +108,65 @@ class AwardImporterTest extends TestCase
|
||||
|
||||
public function testDoesNotSetFlagOnWikidataError(): void
|
||||
{
|
||||
$actor = $this->createActorWithFlag(awardsImported: false);
|
||||
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
|
||||
|
||||
$this->wikidataGateway->method('getAwards')
|
||||
$this->wikidataGateway->method('getAwardsForActors')
|
||||
->willThrowException(new \RuntimeException('Wikidata timeout'));
|
||||
|
||||
$this->importer->importForActor($actor);
|
||||
$this->importer->importForActors([$actor]);
|
||||
|
||||
$this->assertFalse($actor->isAwardsImported());
|
||||
}
|
||||
|
||||
public function testHandlesActorWithNoAwards(): void
|
||||
{
|
||||
$actor = $this->createActorWithFlag(awardsImported: false);
|
||||
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
|
||||
|
||||
$this->wikidataGateway->method('getAwards')->willReturn([]);
|
||||
$this->wikidataGateway->method('getAwardsForActors')->willReturn([]);
|
||||
$this->awardTypeRepository->method('findAll')->willReturn([]);
|
||||
|
||||
$this->em->expects($this->never())->method('persist');
|
||||
|
||||
$this->importer->importForActor($actor);
|
||||
$this->importer->importForActors([$actor]);
|
||||
|
||||
$this->assertTrue($actor->isAwardsImported());
|
||||
}
|
||||
|
||||
private function createActorWithFlag(bool $awardsImported): Actor
|
||||
public function testBatchImportsMultipleActors(): void
|
||||
{
|
||||
$actor1 = $this->createActorWithFlag('Actor One', awardsImported: false);
|
||||
$actor2 = $this->createActorWithFlag('Actor Two', awardsImported: false);
|
||||
$alreadyImported = $this->createActorWithFlag('Actor Three', awardsImported: true);
|
||||
|
||||
$this->wikidataGateway->expects($this->once())->method('getAwardsForActors')
|
||||
->with($this->callback(fn (array $actors) => 2 === \count($actors)))
|
||||
->willReturn([
|
||||
'Actor One' => [['name' => 'Academy Award for Best Actor', 'year' => 2020]],
|
||||
'Actor Two' => [['name' => 'Golden Globe for Best Actor', 'year' => 2021]],
|
||||
]);
|
||||
|
||||
$existingType = new AwardType();
|
||||
$existingType->setName('Oscar')->setPattern('Academy Award');
|
||||
|
||||
$this->awardTypeRepository->method('findAll')->willReturn([$existingType]);
|
||||
|
||||
$persisted = [];
|
||||
$this->em->method('persist')->willReturnCallback(function ($entity) use (&$persisted) {
|
||||
$persisted[] = $entity;
|
||||
});
|
||||
|
||||
$this->importer->importForActors([$actor1, $actor2, $alreadyImported]);
|
||||
|
||||
$this->assertTrue($actor1->isAwardsImported());
|
||||
$this->assertTrue($actor2->isAwardsImported());
|
||||
// 2 Awards + 1 new AwardType (Golden Globe)
|
||||
$this->assertCount(3, $persisted);
|
||||
}
|
||||
|
||||
private function createActorWithFlag(string $name, bool $awardsImported): Actor
|
||||
{
|
||||
$actor = new Actor();
|
||||
$actor->setName('Test Actor');
|
||||
$actor->setName($name);
|
||||
$actor->setAwardsImported($awardsImported);
|
||||
|
||||
return $actor;
|
||||
|
||||
Reference in New Issue
Block a user