From a072c79ae64fe7d6cc29ae44f97871568f9f3724 Mon Sep 17 00:00:00 2001
From: Fabrice Gangler <fabrice.gangler@adullact.org>
Date: Fri, 3 Jul 2020 18:06:52 +0200
Subject: [PATCH] POC END

---
 wikidata_200.php | 94 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 74 insertions(+), 20 deletions(-)

diff --git a/wikidata_200.php b/wikidata_200.php
index af7b263..6be23a4 100644
--- a/wikidata_200.php
+++ b/wikidata_200.php
@@ -34,6 +34,15 @@ $rgIds = [
     'Q16994',       // Région Pays de la Loire
     'Q15104',       // Région Provence-Alpes-Côte d'Azur
 ];
+
+    $outputDirectory = "./results";
+    $cacheDirectory = "./cache";
+    $csvGlobalFile = "$outputDirectory/000_global.csv";
+
+    if(is_file($csvGlobalFile)) {
+         unlink($csvGlobalFile);
+    }
+
 foreach ($rgIds as $rgId) {
     echo "------------------------------- $rgId --------------------------- ";
     run($rgId);
@@ -54,7 +63,16 @@ foreach ($rgIds as $rgId) {
  */
 function run($rgId) {
 
+    $outputDirectory = "./results";
     $cacheDirectory = "./cache";
+    $csvGlobalFile = "$outputDirectory/000_global.csv";
+    $csvGlobalData = '';
+
+
+    if(!is_dir($outputDirectory)) {
+       mkdir($outputDirectory, 0777, true);
+    }
+
     //if(!is_dir($cacheDirectory)) {
     //    mkdir($cacheDirectory, 0777, true);
     //}
@@ -112,13 +130,26 @@ function run($rgId) {
     // Population @@@TODO extract date
     $rgPopulationData = $rgExtraData->P1082;
     $rgOutputPopulation = '';
+    $rgPopulationDataByYear = [];
+    $rgPopulationMostRecentDate = '';
     foreach ($rgPopulationData as $key => $populationData) {
         $populationValue = $populationData->mainsnak->datavalue->value->amount;
         $populationValue = str_replace('+', '', $populationValue);
         $populationValue = number_format($populationValue, 0, ',', ' ');
-        $rgOutputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n";
+        $rgPopulationDate = null;
+        if(isset($populationData->qualifiers->P585[0]->datavalue->value->time)){
+            $rgPopulationDate = $populationData->qualifiers->P585[0]->datavalue->value->time;
+            $rgPopulationDate = substr($rgPopulationDate, 1, 4);
+            $rgPopulationDataByYear[$rgPopulationDate] = $populationValue;
+            if(is_null($rgPopulationMostRecentDate) | $rgPopulationMostRecentDate <  $rgPopulationDate) {
+                $rgPopulationMostRecentDate = "$rgPopulationDate";
+            }
+        }
+
+        // $rgOutputPopulation .= "$key - $populationValue d'habitants ----> @@@TODO extract date\n";
     }
-    $rgPopulationData = $rgExtraData->P1082;
+    $rgPopulation     = $rgPopulationDataByYear[$rgPopulationMostRecentDate];
+    $rgPopulationDate = $rgPopulationMostRecentDate;
 
 
     ///////////////////////////////////////
@@ -142,8 +173,7 @@ function run($rgId) {
     echo "URL INSEE statistiques : $rgInseeUrlStat \n";
     echo "URL Open Street Map : ${rgOsmUrl} \n";
     echo "Open Street Map ID: $rgOsmId \n";
-    echo "\n";
-    echo "--- Population ----------------------- \n";
+    echo "Population :  $rgPopulation d'habitants ---->  $rgPopulationDate\n";
     echo $rgOutputPopulation;
 
 
@@ -214,13 +244,25 @@ function run($rgId) {
         // Population @@@TODO extract date
         $dptPopulationData = $dptExtraData->P1082;
         $dptOutputPopulation = '';
+                $dptPopulationDataByYear = [];
+                $dptPopulationMostRecentDate = '';
         foreach ($dptPopulationData as $key => $populationData) {
             $populationValue = $populationData->mainsnak->datavalue->value->amount;
             $populationValue = str_replace('+', '', $populationValue);
             $populationValue = number_format($populationValue, 0, ',', ' ');
-            $dptOutputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n";
+            $dptPopulationDate = null;
+            if(isset($populationData->qualifiers->P585[0]->datavalue->value->time)){
+                $dptPopulationDate = $populationData->qualifiers->P585[0]->datavalue->value->time;
+                $dptPopulationDate = substr($dptPopulationDate, 1, 4);
+                $dptPopulationDataByYear[$dptPopulationDate] = $populationValue;
+                if(is_null($dptPopulationMostRecentDate) | $dptPopulationMostRecentDate <  $dptPopulationDate) {
+                    $dptPopulationMostRecentDate = "$dptPopulationDate";
+                }
+            }
+           // $dptOutputPopulation .= "$key - $populationValue d'habitants ----> @@@TODO extract date\n";
         }
-
+        $dptPopulation     = $dptPopulationDataByYear[$dptPopulationMostRecentDate];
+        $dptPopulationDate = $dptPopulationMostRecentDate;
 
         // print_r($dptExtraData->P150);
 
@@ -329,15 +371,25 @@ function run($rgId) {
                 // Population @@@TODO extract date
                 $entityPopulationData = $entityExtraData->P1082;
                 $outputPopulation = '';
+                $populationDataByYear = [];
+                $populationMostRecentDate = '';
                 foreach ($entityPopulationData as $key => $populationData) {
                     $populationValue = $populationData->mainsnak->datavalue->value->amount;
                     $populationValue = str_replace('+', '', $populationValue);
                     $populationValue = number_format($populationValue, 0, ',', ' ');
-                    $outputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n";
+                    $populationDate = null;
+                    if(isset($populationData->qualifiers->P585[0]->datavalue->value->time)){
+                        $populationDate = $populationData->qualifiers->P585[0]->datavalue->value->time;
+                        $populationDate = substr($populationDate, 1, 4);
+                        $populationDataByYear[$populationDate] = $populationValue;
+                        if(is_null($populationMostRecentDate) | $populationMostRecentDate <  $populationDate) {
+                            $populationMostRecentDate = "$populationDate";
+                        }
+                    }
+                    // $outputPopulation .= "$key - $populationValue d'habitants ----> $populationDate\n";
                 }
-
-
-
+                $entityPopulation     = $populationDataByYear[$populationMostRecentDate];
+                $entityPopulationDate = $populationMostRecentDate;
 
                 $dptSubEntitiesOutput .=   "\n..... $entityInseeId - $entityName ........................................................................\n\n";
                 $dptSubEntitiesOutput .=  "Cache: $filePath \n";
@@ -359,15 +411,14 @@ function run($rgId) {
                 $dptSubEntitiesOutput .=   "URL INSEE stat. 2 : ${entityInseeUrlStat2} \n";
                 $dptSubEntitiesOutput .=   "URL Public directory : $entityPublicServiceDirectoryUrl \n";
                 $dptSubEntitiesOutput .=   "URL Open Street Map : ${entityOsmUrl} \n";
-                $dptSubEntitiesOutput .=   "Open Street Map ID: $entityOsmId \n";
-                $dptSubEntitiesOutput .=   "\n";
-                $dptSubEntitiesOutput .=   "--- Population ----------------------- \n";
-                $dptSubEntitiesOutput .=   $outputPopulation;
+                $dptSubEntitiesOutput .=   "Open Street Map ID : $entityOsmId \n";
+                $dptSubEntitiesOutput .=   "Population : $entityPopulation d'habitants ---->  $entityPopulationDate\n";
                 $dptSubEntitiesOutput .=   "\n";
-
-
-
                 $dptNbOfValidSubEntities++;
+
+                $entityPopulation = str_replace(' ', '', $entityPopulation);
+                $csvGlobalHead = "WikiData;INSEE;Région;Dpt;Département;Commune;Population;Date;Site web;\n";
+                $csvGlobalData .= "$subEntityId;$entityInseeId;$rgName;$dptInseeId;$dptName;$entityName;$entityPopulation;$entityPopulationDate;$entityWebsite;\n";
             }
         }
 
@@ -390,9 +441,7 @@ function run($rgId) {
         echo "URL INSEE statistiques : $dptInseeUrlStat \n";
         echo "URL Open Street Map : ${dptOsmUrl} \n";
         echo "Open Street Map ID: $dptOsmId \n";
-        echo "\n";
-        echo "--- Population ----------------------- \n";
-        echo $dptOutputPopulation;
+        echo "Population : $dptPopulation personnes ---->  $dptPopulationDate\n";
         echo "\n";
         echo "----- $dptNbOfValidSubEntities communes (vs $dptNbOfSubEntities subdivisions administratives) ------------------------ \n";
         echo "----- dont $dptNbOfValidSubEntitiesWithWebsite communes avec un site web ------------------------ \n";
@@ -403,6 +452,11 @@ function run($rgId) {
         ////////////////////////////////////////////////////////////////////////////////////////////////////////
         ////////////////////////////////////////////////////////////////////////////////////////////////////////
     }
+
+    if(!is_file($csvGlobalFile)) {
+          file_put_contents($csvGlobalFile,  $csvGlobalHead);
+    }
+    file_put_contents($csvGlobalFile, $csvGlobalData, FILE_APPEND);
 }
 
 
-- 
GitLab