From a072c79ae64fe7d6cc29ae44f97871568f9f3724 Mon Sep 17 00:00:00 2001 From: Fabrice Gangler <fabrice.gangler@adullact.org> Date: Fri, 3 Jul 2020 18:06:52 +0200 Subject: [PATCH] POC END --- wikidata_200.php | 94 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 20 deletions(-) diff --git a/wikidata_200.php b/wikidata_200.php index af7b263..6be23a4 100644 --- a/wikidata_200.php +++ b/wikidata_200.php @@ -34,6 +34,15 @@ $rgIds = [ 'Q16994', // Région Pays de la Loire 'Q15104', // Région Provence-Alpes-Côte d'Azur ]; + + $outputDirectory = "./results"; + $cacheDirectory = "./cache"; + $csvGlobalFile = "$outputDirectory/000_global.csv"; + + if(is_file($csvGlobalFile)) { + unlink($csvGlobalFile); + } + foreach ($rgIds as $rgId) { echo "------------------------------- $rgId --------------------------- "; run($rgId); @@ -54,7 +63,16 @@ foreach ($rgIds as $rgId) { */ function run($rgId) { + $outputDirectory = "./results"; $cacheDirectory = "./cache"; + $csvGlobalFile = "$outputDirectory/000_global.csv"; + $csvGlobalData = ''; + + + if(!is_dir($outputDirectory)) { + mkdir($outputDirectory, 0777, true); + } + //if(!is_dir($cacheDirectory)) { // mkdir($cacheDirectory, 0777, true); //} @@ -112,13 +130,26 @@ function run($rgId) { // Population @@@TODO extract date $rgPopulationData = $rgExtraData->P1082; $rgOutputPopulation = ''; + $rgPopulationDataByYear = []; + $rgPopulationMostRecentDate = ''; foreach ($rgPopulationData as $key => $populationData) { $populationValue = $populationData->mainsnak->datavalue->value->amount; $populationValue = str_replace('+', '', $populationValue); $populationValue = number_format($populationValue, 0, ',', ' '); - $rgOutputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n"; + $rgPopulationDate = null; + if(isset($populationData->qualifiers->P585[0]->datavalue->value->time)){ + $rgPopulationDate = $populationData->qualifiers->P585[0]->datavalue->value->time; + $rgPopulationDate = substr($rgPopulationDate, 1, 4); + $rgPopulationDataByYear[$rgPopulationDate] = $populationValue; + if(is_null($rgPopulationMostRecentDate) | $rgPopulationMostRecentDate < $rgPopulationDate) { + $rgPopulationMostRecentDate = "$rgPopulationDate"; + } + } + + // $rgOutputPopulation .= "$key - $populationValue d'habitants ----> @@@TODO extract date\n"; } - $rgPopulationData = $rgExtraData->P1082; + $rgPopulation = $rgPopulationDataByYear[$rgPopulationMostRecentDate]; + $rgPopulationDate = $rgPopulationMostRecentDate; /////////////////////////////////////// @@ -142,8 +173,7 @@ function run($rgId) { echo "URL INSEE statistiques : $rgInseeUrlStat \n"; echo "URL Open Street Map : ${rgOsmUrl} \n"; echo "Open Street Map ID: $rgOsmId \n"; - echo "\n"; - echo "--- Population ----------------------- \n"; + echo "Population : $rgPopulation d'habitants ----> $rgPopulationDate\n"; echo $rgOutputPopulation; @@ -214,13 +244,25 @@ function run($rgId) { // Population @@@TODO extract date $dptPopulationData = $dptExtraData->P1082; $dptOutputPopulation = ''; + $dptPopulationDataByYear = []; + $dptPopulationMostRecentDate = ''; foreach ($dptPopulationData as $key => $populationData) { $populationValue = $populationData->mainsnak->datavalue->value->amount; $populationValue = str_replace('+', '', $populationValue); $populationValue = number_format($populationValue, 0, ',', ' '); - $dptOutputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n"; + $dptPopulationDate = null; + if(isset($populationData->qualifiers->P585[0]->datavalue->value->time)){ + $dptPopulationDate = $populationData->qualifiers->P585[0]->datavalue->value->time; + $dptPopulationDate = substr($dptPopulationDate, 1, 4); + $dptPopulationDataByYear[$dptPopulationDate] = $populationValue; + if(is_null($dptPopulationMostRecentDate) | $dptPopulationMostRecentDate < $dptPopulationDate) { + $dptPopulationMostRecentDate = "$dptPopulationDate"; + } + } + // $dptOutputPopulation .= "$key - $populationValue d'habitants ----> @@@TODO extract date\n"; } - + $dptPopulation = $dptPopulationDataByYear[$dptPopulationMostRecentDate]; + $dptPopulationDate = $dptPopulationMostRecentDate; // print_r($dptExtraData->P150); @@ -329,15 +371,25 @@ function run($rgId) { // Population @@@TODO extract date $entityPopulationData = $entityExtraData->P1082; $outputPopulation = ''; + $populationDataByYear = []; + $populationMostRecentDate = ''; foreach ($entityPopulationData as $key => $populationData) { $populationValue = $populationData->mainsnak->datavalue->value->amount; $populationValue = str_replace('+', '', $populationValue); $populationValue = number_format($populationValue, 0, ',', ' '); - $outputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n"; + $populationDate = null; + if(isset($populationData->qualifiers->P585[0]->datavalue->value->time)){ + $populationDate = $populationData->qualifiers->P585[0]->datavalue->value->time; + $populationDate = substr($populationDate, 1, 4); + $populationDataByYear[$populationDate] = $populationValue; + if(is_null($populationMostRecentDate) | $populationMostRecentDate < $populationDate) { + $populationMostRecentDate = "$populationDate"; + } + } + // $outputPopulation .= "$key - $populationValue d'habitants ----> $populationDate\n"; } - - - + $entityPopulation = $populationDataByYear[$populationMostRecentDate]; + $entityPopulationDate = $populationMostRecentDate; $dptSubEntitiesOutput .= "\n..... $entityInseeId - $entityName ........................................................................\n\n"; $dptSubEntitiesOutput .= "Cache: $filePath \n"; @@ -359,15 +411,14 @@ function run($rgId) { $dptSubEntitiesOutput .= "URL INSEE stat. 2 : ${entityInseeUrlStat2} \n"; $dptSubEntitiesOutput .= "URL Public directory : $entityPublicServiceDirectoryUrl \n"; $dptSubEntitiesOutput .= "URL Open Street Map : ${entityOsmUrl} \n"; - $dptSubEntitiesOutput .= "Open Street Map ID: $entityOsmId \n"; - $dptSubEntitiesOutput .= "\n"; - $dptSubEntitiesOutput .= "--- Population ----------------------- \n"; - $dptSubEntitiesOutput .= $outputPopulation; + $dptSubEntitiesOutput .= "Open Street Map ID : $entityOsmId \n"; + $dptSubEntitiesOutput .= "Population : $entityPopulation d'habitants ----> $entityPopulationDate\n"; $dptSubEntitiesOutput .= "\n"; - - - $dptNbOfValidSubEntities++; + + $entityPopulation = str_replace(' ', '', $entityPopulation); + $csvGlobalHead = "WikiData;INSEE;Région;Dpt;Département;Commune;Population;Date;Site web;\n"; + $csvGlobalData .= "$subEntityId;$entityInseeId;$rgName;$dptInseeId;$dptName;$entityName;$entityPopulation;$entityPopulationDate;$entityWebsite;\n"; } } @@ -390,9 +441,7 @@ function run($rgId) { echo "URL INSEE statistiques : $dptInseeUrlStat \n"; echo "URL Open Street Map : ${dptOsmUrl} \n"; echo "Open Street Map ID: $dptOsmId \n"; - echo "\n"; - echo "--- Population ----------------------- \n"; - echo $dptOutputPopulation; + echo "Population : $dptPopulation personnes ----> $dptPopulationDate\n"; echo "\n"; echo "----- $dptNbOfValidSubEntities communes (vs $dptNbOfSubEntities subdivisions administratives) ------------------------ \n"; echo "----- dont $dptNbOfValidSubEntitiesWithWebsite communes avec un site web ------------------------ \n"; @@ -403,6 +452,11 @@ function run($rgId) { //////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////// } + + if(!is_file($csvGlobalFile)) { + file_put_contents($csvGlobalFile, $csvGlobalHead); + } + file_put_contents($csvGlobalFile, $csvGlobalData, FILE_APPEND); } -- GitLab