perf: batch Wikidata SPARQL queries per film instead of per actor

Use a VALUES clause to fetch awards for all actors of a film in a
single SPARQL request, reducing Wikidata API calls from ~20 per film
to 1 and avoiding idle timeout errors from rate limiting.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
thibaud-leclere
2026-04-01 20:37:13 +02:00
parent 369893a77e
commit 116d7b409e
4 changed files with 119 additions and 46 deletions

View File

@@ -22,7 +22,23 @@ class WikidataGateway
*/
public function getAwards(Actor $actor): array
{
$sparql = $this->buildQuery($actor->getName());
return $this->getAwardsForActors([$actor])[$actor->getName()] ?? [];
}
/**
* Fetch awards for multiple actors in a single SPARQL query.
*
* @param list<Actor> $actors
*
* @return array<string, list<array{name: string, year: int}>>
*/
public function getAwardsForActors(array $actors): array
{
if ([] === $actors) {
return [];
}
$sparql = $this->buildBatchQuery($actors);
$response = $this->httpClient->request('GET', self::SPARQL_ENDPOINT, [
'query' => [
@@ -33,19 +49,20 @@ class WikidataGateway
'Accept' => 'application/sparql-results+json',
'User-Agent' => 'LtbxdActorle/1.0',
],
'timeout' => 5,
'timeout' => 10,
]);
$data = $response->toArray();
$awards = [];
foreach ($data['results']['bindings'] ?? [] as $binding) {
$name = $binding['awardLabel']['value'] ?? null;
$actorName = $binding['name']['value'] ?? null;
$awardName = $binding['awardLabel']['value'] ?? null;
$year = $binding['year']['value'] ?? null;
if ($name && $year) {
$awards[] = [
'name' => $name,
if ($actorName && $awardName && $year) {
$awards[$actorName][] = [
'name' => $awardName,
'year' => (int) substr($year, 0, 4),
];
}
@@ -54,13 +71,21 @@ class WikidataGateway
return $awards;
}
private function buildQuery(string $actorName): string
/**
* @param list<Actor> $actors
*/
private function buildBatchQuery(array $actors): string
{
$escaped = str_replace(['\\', '"', "\n", "\r"], ['\\\\', '\\"', '\\n', '\\r'], $actorName);
$values = implode(' ', array_map(function (Actor $actor) {
$escaped = str_replace(['\\', '"', "\n", "\r"], ['\\\\', '\\"', '\\n', '\\r'], $actor->getName());
return '"'.$escaped.'"@en';
}, $actors));
return <<<SPARQL
SELECT ?awardLabel ?year WHERE {
?person rdfs:label "{$escaped}"@en .
SELECT ?name ?awardLabel ?year WHERE {
VALUES ?name { {$values} }
?person rdfs:label ?name .
?person wdt:P31 wd:Q5 .
?person p:P166 ?awardStatement .
?awardStatement ps:P166 ?award .
@@ -68,7 +93,7 @@ class WikidataGateway
BIND(YEAR(?date) AS ?year)
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en" . }
}
ORDER BY DESC(?year)
ORDER BY ?name DESC(?year)
SPARQL;
}
}

View File

@@ -23,15 +23,25 @@ readonly class AwardImporter
public function importForActor(Actor $actor): void
{
if ($actor->isAwardsImported()) {
$this->importForActors([$actor]);
}
/**
* @param list<Actor> $actors
*/
public function importForActors(array $actors): void
{
$actorsToFetch = array_filter($actors, fn (Actor $a) => !$a->isAwardsImported());
if ([] === $actorsToFetch) {
return;
}
try {
$wikidataAwards = $this->wikidataGateway->getAwards($actor);
$allAwards = $this->wikidataGateway->getAwardsForActors(array_values($actorsToFetch));
} catch (\Throwable $e) {
$this->logger?->warning('Failed to fetch awards from Wikidata', [
'actor' => $actor->getName(),
'actors' => array_map(fn (Actor $a) => $a->getName(), $actorsToFetch),
'error' => $e->getMessage(),
]);
@@ -40,19 +50,23 @@ readonly class AwardImporter
$knownTypes = $this->awardTypeRepository->findAll();
foreach ($wikidataAwards as $wikidataAward) {
$awardType = $this->resolveAwardType($wikidataAward['name'], $knownTypes);
foreach ($actorsToFetch as $actor) {
$wikidataAwards = $allAwards[$actor->getName()] ?? [];
$award = new Award();
$award->setName($wikidataAward['name']);
$award->setYear($wikidataAward['year']);
$award->setActor($actor);
$award->setAwardType($awardType);
foreach ($wikidataAwards as $wikidataAward) {
$awardType = $this->resolveAwardType($wikidataAward['name'], $knownTypes);
$this->em->persist($award);
$award = new Award();
$award->setName($wikidataAward['name']);
$award->setYear($wikidataAward['year']);
$award->setActor($actor);
$award->setAwardType($awardType);
$this->em->persist($award);
}
$actor->setAwardsImported(true);
}
$actor->setAwardsImported(true);
}
/**

View File

@@ -57,9 +57,8 @@ readonly class ImportFilmsBatchMessageHandler
} else {
$this->actorSyncer->syncActorsForMovie($movie);
foreach ($movie->getActors() as $role) {
$this->awardImporter->importForActor($role->getActor());
}
$actors = array_map(fn ($role) => $role->getActor(), $movie->getActors()->toArray());
$this->awardImporter->importForActors($actors);
$user = $this->em->getReference(\App\Entity\User::class, $userId);
$existingLink = $this->em->getRepository(UserMovie::class)->findOneBy([