From ed0af3b70784cc05c21d80716ed120628f341f9c Mon Sep 17 00:00:00 2001
From: Fabrice Gangler <fabrice.gangler@adullact.org>
Date: Fri, 3 Jul 2020 11:12:01 +0200
Subject: [PATCH] poc v0.2

---
 wikidata_200.php | 373 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 373 insertions(+)
 create mode 100644 wikidata_200.php

diff --git a/wikidata_200.php b/wikidata_200.php
new file mode 100644
index 0000000..6949e4d
--- /dev/null
+++ b/wikidata_200.php
@@ -0,0 +1,373 @@
+<?php
+function slugify($string, $delimiter = '-') {
+    $oldLocale = setlocale(LC_ALL, '0');
+    setlocale(LC_ALL, 'en_US.UTF-8');
+    $clean = iconv('UTF-8', 'ASCII//TRANSLIT', $string);
+    $clean = preg_replace("/[^a-zA-Z0-9\/_|+ -]/", '', $clean);
+    $clean = strtolower($clean);
+    $clean = preg_replace("/[\/_|+ -]+/", $delimiter, $clean);
+    $clean = trim($clean, $delimiter);
+    setlocale(LC_ALL, $oldLocale);
+    return $clean;
+}
+
+$rgIds = [
+    'Q18678265', // 34 - Hérault
+];
+foreach ($rgIds as $rgId) {
+    run($rgId);
+}
+
+/**
+ * @param $rgId  wikidata region ID
+ */
+function run($rgId) {
+
+    $cacheDirectory = "./cache";
+    //if(!is_dir($cacheDirectory)) {
+    //    mkdir($cacheDirectory, 0777, true);
+    //}
+
+    /////////////////////////// Region
+    $rgId = 'Q18678265'; // 34 - Hérault
+    $url = "https://www.wikidata.org/wiki/Special:EntityData/$rgId.json";
+    $fileName = basename($url);
+    $rgCacheDirectory = "$cacheDirectory/regions" ;
+    $rgFilePath = "$rgCacheDirectory/" . basename($url);
+    if(!is_dir($rgCacheDirectory)) {
+        mkdir($rgCacheDirectory, 0777, true);
+    }
+    if (!is_file($rgFilePath)){
+        $contents = file_get_contents($url);
+        file_put_contents($rgFilePath, $contents);
+    }
+    $cacheData = file_get_contents($rgFilePath);
+    $results = json_decode($cacheData);
+    $rg = $results->entities->$rgId;
+    $rgModified = $rg->modified;
+    $rgIdProperty = $rg->id;
+    $rgLinkWikidata = "https://www.wikidata.org/wiki/$rgIdProperty";
+    $rgLinkWikidataJson = "https://www.wikidata.org/wiki/Special:EntityData/$rgIdProperty.json";
+    $rgName = $rg->labels->fr->value;
+    ///////////////////////////////////////
+    $rgDescriptionI18nFr = $rg->descriptions->fr->value;
+    $rgDescriptionI18nEn = $rg->descriptions->en->value;
+    ///////////////////////////////////////
+    $rgLinkCommonswiki = '';
+    if(isset($rg->sitelinks->commonswiki->url)) {
+        $rgLinkCommonswiki = $rg->sitelinks->commonswiki->url;
+    }
+    $rgLinkWikipediaI18nFr = $rg->sitelinks->frwiki->url;
+    $rgLinkWikipediaI18nEn = $rg->sitelinks->enwiki->url;
+    ///////////////////////////////////////
+    $rgExtraData = $rg->claims;
+    $rgInseeId = $rgExtraData->P2585[0]->mainsnak->datavalue->value;
+    $rgInseeUrlStat = "https://www.insee.fr/fr/statistiques?geo=REG-$rgInseeId";
+    $rgInseeUrlGeo = "https://www.insee.fr/fr/metadonnees/cog/region/REG$rgInseeId-".slugify($rgName);
+    $rgOsmId = $rgExtraData->P402[0]->mainsnak->datavalue->value;
+    $rgOsmUrl = "https://www.openstreetmap.org/relation/$rgOsmId";
+    ///////////////////////////////////////
+    $rgLinkTwitter = '';
+    if(isset($rgExtraData->P2002[0]->mainsnak->datavalue->value)) {
+        $rgLinkTwitter = $rgExtraData->P2002[0]->mainsnak->datavalue->value;
+    }
+    $rgDataGouvId = '';
+    $rgDataGouvUrl = '';
+    if(isset($rgExtraData->P3206[0]->mainsnak->datavalue->value)) {
+        $rgDataGouvId = $rgExtraData->P3206[0]->mainsnak->datavalue->value;
+        $rgDataGouvUrl = "https://www.data.gouv.fr/organizations/$rgDataGouvId/";
+    }
+
+
+    // Population @@@TODO extract date
+    $rgPopulationData = $rgExtraData->P1082;
+    $rgOutputPopulation = '';
+    foreach ($rgPopulationData as $key => $populationData) {
+        $populationValue = $populationData->mainsnak->datavalue->value->amount;
+        $populationValue = str_replace('+', '', $populationValue);
+        $populationValue = number_format($populationValue, 0, ',', ' ');
+        $rgOutputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n";
+    }
+    $rgPopulationData = $rgExtraData->P1082;
+
+
+    ///////////////////////////////////////
+    echo   "\n..... Région $rgName  ........................................................................\n\n";
+    echo "Cache: $rgFilePath \n";
+    echo "Wikidata update: $rgModified \n";
+    echo "Wikidata ID: $rgIdProperty \n";
+    echo "Wikidata Name: $rgName \n";
+    echo "INSEE Code: $rgInseeId \n";
+    echo "DataGouv ID: $rgDataGouvId  \n";
+    echo "Wikidata Type FR : $rgDescriptionI18nFr \n";
+    echo "Wikidata Type EN : $rgDescriptionI18nEn \n";
+    echo "URL Twitter : https://twitter.com/$rgLinkTwitter \n";
+    echo "URL wikipedia FR : ${rgLinkWikipediaI18nFr} \n";
+    echo "URL wikipedia EN : ${rgLinkWikipediaI18nEn} \n";
+    echo "URL wikimedia commons : ${rgLinkCommonswiki} \n";
+    echo "URL WikiData : ${rgLinkWikidata} \n";
+    echo "URL WikiData Json : $rgLinkWikidataJson \n";
+    echo "URL Data Gouv : $rgDataGouvUrl \n";
+    echo "URL INSEE : $rgInseeUrlGeo <--- @@@TODO slug \n";
+    echo "URL INSEE statistiques : $rgInseeUrlStat \n";
+    echo "URL Open Street Map : ${rgOsmUrl} \n";
+    echo "Open Street Map ID: $rgOsmId \n";
+    echo "\n";
+    echo "--- Population ----------------------- \n";
+    echo $rgOutputPopulation;
+
+
+    $rgSubEntities = $rgExtraData->P150;
+    $rgNbOfSubEntities = count($rgSubEntities);
+    $rgNbOfValidSubEntities = 0;
+    $rgSubEntitiesOutput = '';
+    foreach ($rgSubEntities as $key => $rgSubEntity) {
+        /////////// DEPARTEMENTS DE LA REGION ///////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        $dptId = 'Q3083'; // 01 - Ain
+        $dptId = 'Q12545'; // 34 - Hérault
+        $dptId = $rgSubEntity->mainsnak->datavalue->value->id;
+        $url = "https://www.wikidata.org/wiki/Special:EntityData/$dptId.json";
+        $fileName = basename($url);
+        $dptCacheDirectory = "$cacheDirectory/departements" ;
+        $dptFilePath = "$dptCacheDirectory/" . basename($url);
+        if(!is_dir($dptCacheDirectory)) {
+            mkdir($dptCacheDirectory, 0777, true);
+        }
+
+
+        if (!is_file($dptFilePath)){
+            $contents = file_get_contents($url);
+            file_put_contents($dptFilePath, $contents);
+        }
+
+        $cacheData = file_get_contents($dptFilePath);
+        $results = json_decode($cacheData);
+        $dpt = $results->entities->$dptId;
+        $dptModified = $dpt->modified;
+        $dptIdProperty = $dpt->id;
+        $dptLinkWikidata = "https://www.wikidata.org/wiki/$dptIdProperty";
+        $dptLinkWikidataJson = "https://www.wikidata.org/wiki/Special:EntityData/$dptIdProperty.json";
+        $dptName = $dpt->labels->fr->value;
+        ///////////////////////////////////////
+        $dptDescriptionI18nFr = $dpt->descriptions->fr->value;
+        $dptDescriptionI18nEn = $dpt->descriptions->en->value;
+        ///////////////////////////////////////
+        $dptCode = $dpt->aliases->fr[0]->value;
+        $dptFullName = $dptName;
+        if(isset($dpt->aliases->fr[1])) {
+            $dptFullName = $dpt->aliases->fr[1]->value;
+        }
+        ///////////////////////////////////////
+        $dptLinkCommonswiki = '';
+        if(isset($dpt->sitelinks->commonswiki->url)) {
+            $dptLinkCommonswiki = $dpt->sitelinks->commonswiki->url;
+        }
+        $dptLinkWikipediaI18nFr = $dpt->sitelinks->frwiki->url;
+        $dptLinkWikipediaI18nEn = $dpt->sitelinks->enwiki->url;
+        ///////////////////////////////////////
+        $dptExtraData = $dpt->claims;
+        $dptInseeId = $dptExtraData->P2586[0]->mainsnak->datavalue->value;
+        $dptInseeUrlStat = "https://www.insee.fr/fr/statistiques?geo=DEP-$dptInseeId";
+        $dptInseeUrlGeo = "https://www.insee.fr/fr/metadonnees/cog/departement/DEP$dptInseeId-".slugify($dptName);
+        $dptOsmId = $dptExtraData->P402[0]->mainsnak->datavalue->value;
+        $dptOsmUrl = "https://www.openstreetmap.org/relation/$dptOsmId";
+
+        // Population @@@TODO extract date
+        $dptPopulationData = $dptExtraData->P1082;
+        $dptOutputPopulation = '';
+        foreach ($dptPopulationData as $key => $populationData) {
+            $populationValue = $populationData->mainsnak->datavalue->value->amount;
+            $populationValue = str_replace('+', '', $populationValue);
+            $populationValue = number_format($populationValue, 0, ',', ' ');
+            $dptOutputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n";
+        }
+
+
+        // print_r($dptExtraData->P150);
+
+
+
+
+        //
+        $dptSubEntities = $dptExtraData->P150;
+        $dptNbOfSubEntities = count($dptSubEntities);
+        $dptNbOfValidSubEntities = 0;
+        $dptNbOfValidSubEntitiesWithWebsite = 0;
+        $dptSubEntitiesOutput = '';
+        foreach ($dptSubEntities as $key => $subEntity) {
+            $subEntityId = $subEntity->mainsnak->datavalue->value->id;
+            $subEntityCacheDirectoryCommon = "$cacheDirectory/communes" ;
+            if(!is_dir($subEntityCacheDirectoryCommon )) {
+                mkdir( $subEntityCacheDirectoryCommon, 0777, true);
+            }
+            $subEntityCacheDirectory = "$dptCacheDirectory/$dptId";
+            if(!is_dir($subEntityCacheDirectory)) {
+                mkdir($subEntityCacheDirectory, 0777, true);
+            }
+            $url = "https://www.wikidata.org/wiki/Special:EntityData/$subEntityId.json";
+            $fileName = basename($url);
+            $filePath = "$subEntityCacheDirectory/" . basename($url);
+            $filePathBackup = "$subEntityCacheDirectoryCommon/" . basename($url);
+            if (!is_file($filePath)){
+                $contents = file_get_contents($url);
+                file_put_contents($filePath, $contents);
+                file_put_contents($filePathBackup, $contents);
+            }
+            $cacheData = file_get_contents($filePath);
+            $results = json_decode($cacheData);
+
+            if(is_null($results)) {
+                $contents = file_get_contents($url);
+                file_put_contents($filePath, $contents);
+                $results = json_decode($cacheData);
+            }
+
+
+            $entity = $results->entities->$subEntityId;
+            $entityExtraData = $entity->claims;
+            $entityType = $entityExtraData->P31[0]->mainsnak->datavalue->value->id;
+            $allowedType = "Q484170";
+
+            $delegatedEntity = false;
+            if (isset($entityExtraData->P31[0]->qualifiers->P582)) {
+                $delegatedEntity = true;
+            }
+            if ( $entityType === $allowedType && $delegatedEntity === false) {
+
+                $entityModified = $entity->modified;
+                $entityIdProperty = $entity->id;
+                $entityName = $entity->labels->fr->value;
+                ///////////////////////////////////////
+                $entityDescriptionI18nFr = $entity->descriptions->fr->value;
+                $entityDescriptionI18nEn = $entity->descriptions->en->value;
+                ///////////////////////////////////////
+                //   $entityFullName = $entity->aliases->fr[1]->value;
+                ///////////////////////////////////////
+                $linkCommonswiki = '';
+                if(isset($entity->sitelinks->commonswiki->url)) {
+                    $linkCommonswiki = $entity->sitelinks->commonswiki->url;
+                }
+                $linkWikipediaI18nFr = $entity->sitelinks->frwiki->url;
+                $linkWikipediaI18nEn = $entity->sitelinks->enwiki->url;
+                ///////////////////////////////////////
+                $linkWikidata = "https://www.wikidata.org/wiki/$entityIdProperty";
+                $linkWikidataJson = "https://www.wikidata.org/wiki/Special:EntityData/$entityIdProperty.json";
+                ///////////////////////////////////////
+                $entityInseeId = '';
+                $entityInseeUrlStat1 = '';
+                $entityInseeUrlStat2 = '';
+                $entityInseeUrlGeo = '';
+                if(isset($entityExtraData->P374)) {
+                    $entityInseeId = $entityExtraData->P374[0]->mainsnak->datavalue->value;
+                    $entityInseeUrlStat1 = "https://www.insee.fr/fr/statistiques/2011101?geo=COM-$entityInseeId";
+                    $entityInseeUrlStat2 = "https://www.insee.fr/fr/statistiques?geo=COM-$entityInseeId";
+                    $entityInseeUrlGeo = "https://www.insee.fr/fr/metadonnees/cog/communes/COM$entityInseeId-".slugify($entityName);
+                }
+
+                $entityOsmId = '';
+                $entityOsmUrl = '';
+                if(isset($entityExtraData->P402)) {
+                    $entityOsmId = $entityExtraData->P402[0]->mainsnak->datavalue->value;
+                    $entityOsmUrl = "https://www.openstreetmap.org/relation/$entityOsmId";
+                }
+                ///////////////////////////////////////
+                $entityPostalCode = $entityExtraData->P281[0]->mainsnak->datavalue->value;
+                $entityWebsite = '';
+                if(isset($entityExtraData->P856)) {
+                    $dptNbOfValidSubEntitiesWithWebsite++;
+                    $entityWebsite = $entityExtraData->P856[0]->mainsnak->datavalue->value;
+                }
+
+                $entityPublicServiceDirectoryId = '';
+                $entityPublicServiceDirectoryUrl = '';
+                if(isset($entityExtraData->P6671)) {
+                    $entityPublicServiceDirectoryId = $entityExtraData->P6671[0]->mainsnak->datavalue->value;
+                    $entityPublicServiceDirectoryUrl = "https://lannuaire.service-public.fr/$entityPublicServiceDirectoryId";
+                }
+
+
+                // print_r($entityExtraData->P1082);exit();
+                // Population @@@TODO extract date
+                $entityPopulationData = $entityExtraData->P1082;
+                $outputPopulation = '';
+                foreach ($entityPopulationData as $key => $populationData) {
+                    $populationValue = $populationData->mainsnak->datavalue->value->amount;
+                    $populationValue = str_replace('+', '', $populationValue);
+                    $populationValue = number_format($populationValue, 0, ',', ' ');
+                    $outputPopulation .= "$key - $populationValue personnes ----> @@@TODO extract date\n";
+                }
+
+
+
+
+                $dptSubEntitiesOutput .=   "\n..... $entityInseeId - $entityName ........................................................................\n\n";
+                $dptSubEntitiesOutput .=  "Cache: $filePath \n";
+                $dptSubEntitiesOutput .=   "Wikidata update: $entityModified \n";
+                $dptSubEntitiesOutput .=   "Wikidata ID: $subEntityId  / $entityIdProperty \n";
+                $dptSubEntitiesOutput .=   "Wikidata Name: $entityName \n";
+                $dptSubEntitiesOutput .=   "INSEE Code: $entityInseeId \n";
+                $dptSubEntitiesOutput .=   "Postal Code: $entityPostalCode \n";
+                $dptSubEntitiesOutput .=   "Wikidata Type FR : $entityDescriptionI18nFr \n";
+                $dptSubEntitiesOutput .=   "Wikidata Type EN : $entityDescriptionI18nEn \n";
+                $dptSubEntitiesOutput .=   "URL website : $entityWebsite \n";
+                $dptSubEntitiesOutput .=   "URL wikipedia FR : ${linkWikipediaI18nFr} \n";
+                $dptSubEntitiesOutput .=   "URL wikipedia EN : ${linkWikipediaI18nEn} \n";
+                $dptSubEntitiesOutput .=   "URL wikimedia commons : ${linkCommonswiki} \n";
+                $dptSubEntitiesOutput .=   "URL WikiData : ${linkWikidata} \n";
+                $dptSubEntitiesOutput .=   "URL WikiData Json : $linkWikidataJson \n";
+                $dptSubEntitiesOutput .=   "URL INSEE : ${entityInseeUrlGeo} \n";
+                $dptSubEntitiesOutput .=   "URL INSEE stat. 1 : ${entityInseeUrlStat1} \n";
+                $dptSubEntitiesOutput .=   "URL INSEE stat. 2 : ${entityInseeUrlStat2} \n";
+                $dptSubEntitiesOutput .=   "URL Public directory : $entityPublicServiceDirectoryUrl \n";
+                $dptSubEntitiesOutput .=   "URL Open Street Map : ${entityOsmUrl} \n";
+                $dptSubEntitiesOutput .=   "Open Street Map ID: $entityOsmId \n";
+                $dptSubEntitiesOutput .=   "\n";
+                $dptSubEntitiesOutput .=   "--- Population ----------------------- \n";
+                $dptSubEntitiesOutput .=   $outputPopulation;
+                $dptSubEntitiesOutput .=   "\n";
+
+
+
+                $dptNbOfValidSubEntities++;
+            }
+        }
+
+        echo   "\n..... $dptFullName  ........................................................................\n\n";
+        echo "Cache: $dptFilePath \n";
+        echo "Wikidata update: $dptModified \n";
+        echo "Wikidata ID: $dptIdProperty \n";
+        echo "Wikidata Name: $dptName \n";
+        echo "Wikidata Full Name: $dptFullName \n";
+        echo "Wikidata Dpt Code: $dptCode \n";
+        echo "INSEE Dpt Code: $dptInseeId \n";
+        echo "Wikidata Type FR : $dptDescriptionI18nFr \n";
+        echo "Wikidata Type EN : $dptDescriptionI18nEn \n";
+        echo "URL wikipedia FR : ${dptLinkWikipediaI18nFr} \n";
+        echo "URL wikipedia EN : ${dptLinkWikipediaI18nEn} \n";
+        echo "URL wikimedia commons : ${dptLinkCommonswiki} \n";
+        echo "URL WikiData : ${dptLinkWikidata} \n";
+        echo "URL WikiData Json : $dptLinkWikidataJson \n";
+        echo "URL INSEE : $dptInseeUrlGeo <--- @@@TODO slug \n";
+        echo "URL INSEE statistiques : $dptInseeUrlStat \n";
+        echo "URL Open Street Map : ${dptOsmUrl} \n";
+        echo "Open Street Map ID: $dptOsmId \n";
+        echo "\n";
+        echo "--- Population ----------------------- \n";
+        echo $dptOutputPopulation;
+        echo "\n";
+        echo "----- $dptNbOfValidSubEntities communes (vs $dptNbOfSubEntities subdivisions administratives) ------------------------ \n";
+        echo "----- dont $dptNbOfValidSubEntitiesWithWebsite communes avec un site web ------------------------ \n";
+        echo "\n";
+        echo $dptSubEntitiesOutput;
+
+        /////////// FIN ----> DEPARTEMENTS DE LA REGION ///////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+    }
+}
+
+
-- 
GitLab