fix: reduce false positives in award detection

Filter SPARQL query to only return entertainment awards (film, TV,
music, theater) and add a canonical award map to normalize variants
(e.g. all Oscar/Academy Award → "Oscar", all Golden Globe → "Golden
Globe"). Non-entertainment awards (orders, medals, honorary degrees)
are excluded both at SPARQL level and via PHP keyword filter.

Also restart messenger container on cache:clear to avoid stale DI
container errors.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
thibaud-leclere
2026-04-01 21:12:28 +02:00
parent 116d7b409e
commit 295bb16ab7
4 changed files with 255 additions and 7 deletions

View File

@@ -53,7 +53,7 @@ class AwardImporterTest extends TestCase
]);
$existingType = new AwardType();
$existingType->setName('Oscar')->setPattern('Academy Award');
$existingType->setName('Oscar')->setPattern('Oscar');
$this->awardTypeRepository->method('findAll')->willReturn([$existingType]);
@@ -73,7 +73,7 @@ class AwardImporterTest extends TestCase
$this->assertSame($actor, $persisted[0]->getActor());
}
public function testCreatesNewAwardTypeWhenNoPatternMatches(): void
public function testCanonicalMapGroupsRelatedAwards(): void
{
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
@@ -98,14 +98,67 @@ class AwardImporterTest extends TestCase
$newType = $persisted[0];
$this->assertInstanceOf(AwardType::class, $newType);
$this->assertSame('Screen Actors Guild Award', $newType->getName());
$this->assertSame('Screen Actors Guild Award', $newType->getPattern());
$this->assertSame('SAG', $newType->getName());
$this->assertSame('SAG', $newType->getPattern());
$award = $persisted[1];
$this->assertInstanceOf(Award::class, $award);
$this->assertSame($newType, $award->getAwardType());
}
public function testFallsBackToExtractPrefixWhenNotInCanonicalMap(): void
{
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
$this->wikidataGateway->method('getAwardsForActors')->willReturn([
'Test Actor' => [
['name' => 'Bambi for Best Film', 'year' => 2019],
],
]);
$this->awardTypeRepository->method('findAll')->willReturn([]);
$persisted = [];
$this->em->method('persist')->willReturnCallback(function ($entity) use (&$persisted) {
$persisted[] = $entity;
});
$this->importer->importForActors([$actor]);
$newType = $persisted[0];
$this->assertInstanceOf(AwardType::class, $newType);
$this->assertSame('Bambi', $newType->getName());
}
public function testExcludesNonEntertainmentAwards(): void
{
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
$this->wikidataGateway->method('getAwardsForActors')->willReturn([
'Test Actor' => [
['name' => 'chevalier de la Légion d\'honneur', 'year' => 2015],
['name' => 'docteur honoris causa', 'year' => 2018],
['name' => 'bourse Rhodes', 'year' => 2010],
['name' => 'Oscar du meilleur acteur', 'year' => 2020],
],
]);
$this->awardTypeRepository->method('findAll')->willReturn([]);
$persisted = [];
$this->em->method('persist')->willReturnCallback(function ($entity) use (&$persisted) {
$persisted[] = $entity;
});
$this->importer->importForActors([$actor]);
// Only the Oscar should be persisted (1 AwardType + 1 Award)
$this->assertCount(2, $persisted);
$this->assertInstanceOf(AwardType::class, $persisted[0]);
$this->assertSame('Oscar', $persisted[0]->getName());
$this->assertInstanceOf(Award::class, $persisted[1]);
}
public function testDoesNotSetFlagOnWikidataError(): void
{
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
@@ -146,7 +199,7 @@ class AwardImporterTest extends TestCase
]);
$existingType = new AwardType();
$existingType->setName('Oscar')->setPattern('Academy Award');
$existingType->setName('Oscar')->setPattern('Oscar');
$this->awardTypeRepository->method('findAll')->willReturn([$existingType]);
@@ -163,6 +216,60 @@ class AwardImporterTest extends TestCase
$this->assertCount(3, $persisted);
}
public function testExtractPrefixHandlesFrenchPatterns(): void
{
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
$this->wikidataGateway->method('getAwardsForActors')->willReturn([
'Test Actor' => [
['name' => 'Bodil du meilleur acteur', 'year' => 2019],
],
]);
$this->awardTypeRepository->method('findAll')->willReturn([]);
$persisted = [];
$this->em->method('persist')->willReturnCallback(function ($entity) use (&$persisted) {
$persisted[] = $entity;
});
$this->importer->importForActors([$actor]);
$newType = $persisted[0];
$this->assertInstanceOf(AwardType::class, $newType);
$this->assertSame('Bodil', $newType->getName());
}
public function testCanonicalMapReusesExistingType(): void
{
$actor = $this->createActorWithFlag('Test Actor', awardsImported: false);
$this->wikidataGateway->method('getAwardsForActors')->willReturn([
'Test Actor' => [
['name' => 'oscar du meilleur acteur', 'year' => 2020],
['name' => 'Oscar de la meilleure actrice', 'year' => 2021],
],
]);
$existingOscar = new AwardType();
$existingOscar->setName('Oscar')->setPattern('Oscar');
$this->awardTypeRepository->method('findAll')->willReturn([$existingOscar]);
$persisted = [];
$this->em->method('persist')->willReturnCallback(function ($entity) use (&$persisted) {
$persisted[] = $entity;
});
$this->importer->importForActors([$actor]);
// Both awards should reuse the same "Oscar" type — only 2 Awards persisted, no new AwardType
$this->assertCount(2, $persisted);
$this->assertContainsOnlyInstancesOf(Award::class, $persisted);
$this->assertSame($existingOscar, $persisted[0]->getAwardType());
$this->assertSame($existingOscar, $persisted[1]->getAwardType());
}
private function createActorWithFlag(string $name, bool $awardsImported): Actor
{
$actor = new Actor();