Création d'un service de récupération des datas via RPA
A partir du code source suivant que l'on prendra comme exemple :
<?php
namespace Quantum\RPA\Service\Scraper\Business\External;
use Quantum\RPA\Service\Scraper\Scraper;
use Quantum\RPA\Service\Scraper\ScraperConstraint;
use Quantum\RPA\Service\Scraper\SeleniumChromeClient;
use Quantum\RPA\Service\SimpleHtml\SimpleHtml;
class StructureScraper extends Scraper
{
const DEFAULT_SERVER_URL = 'http://selenium:4444/wd/hub';
const APP_HOMEPAGE = "https://justice.comarquage.fr/pois/search";
const APP_CITY_SEARCH = "https://justice.comarquage.fr/api/v1/territories/autocomplete";
//const APP_NATINF_DETAILS = "https://natinf.srj.intranet.justice.gouv.fr/natinf/faces/natinf.xhtml";
//const NATURE_IDT = 119;
//const PROCEDURE1_IDT = 123;
//const PROCEDURE2_IDT = 127;
//const NATAFF_IDT = 131;
public function __construct()
{
$serverUrl = self::DEFAULT_SERVER_URL;
$seleniumChromeClient = new SeleniumChromeClient($serverUrl);
parent::__construct($seleniumChromeClient);
}
public function start(): self
{
$url = self::APP_HOMEPAGE;
$this->prepare($url);
$this->execute();
return $this;
}
public function searchByCity(string $city): array
{
$url = self::APP_CITY_SEARCH;
$js = "return $.ajax({url:'$url',data: { term:'$city', page_size: 100 }});";
$blob = $this->execJS([$js]);
$output=['count'=>0,'items' =>[]];
if(!empty($blob)) {
$data = json_decode($blob, true);
$output['items']=$data['data']['items'] ?? [];
$output['count']=count($output['items']);
}
return $output;
}
public function getDetailsById(string $id): mixed
{
$url = "https://justice.comarquage.fr/pois/search?context_action=list&context_categories=Service%20p%C3%A9nitentiaire%20d%27insertion%20et%20de%20probation%20(SPIP)%20+%20antennes&context_group=&context_organism_type=&context_organization=&context_schema=All&context_subscription=&context_two_col=False&context_hide_directory=False&context_hide_list=False&context_pois_tab=&schema_name=All&territoires_de_competence=$id&search=search";
$this->prepare($url);
$this->execute();
$sh = $this->getSimpleHtml();
$tables = $sh->find('table tbody');
$output = [];
foreach($tables as $table)
{
$new = [];
foreach($table->find('tr') as $tr)
{
foreach($tr->find('td') as $index => $td)
{
$tmp = in_array($index,[0,2]) ? $td->text() : $td->html();
$tmp = str_replace(["<br>","\r","\n","\t","<td>","</td>"],[","," "," "," "," "," "],$tmp);
$tmp = preg_replace("/[ ]+/", " ", $tmp);
$tmp = trim($tmp);
if(0 === $index)
$new['name'] = utf8_decode($tmp);
if(1 === $index)
$new['address'] = utf8_decode($tmp);
if(2 === $index)
$new['phone'] = str_replace("+33 ","0", $tmp);
}
$output[]=$new;
}
}
$url = 'https://justice.comarquage.fr/pois/search';
$js = "return $.ajax({url:'$url',method: 'POST', data: { territoires_de_competence:'$id', nomenclature_dila:'tj' }});";
$blob = $this->execJS([$js]);
$simpleHtml = new SimpleHtml();
$simpleHtml->load($blob);
$container = $simpleHtml->getContainer();
$tables = $container->find('table tbody');
foreach($tables as $table)
{
$new = [];
foreach($table->find('tr') as $tr)
{
foreach($tr->find('td') as $index => $td)
{
$tmp = in_array($index,[0,2]) ? $td->text() : $td->html();
$tmp = str_replace(["<br>","\r","\n","\t","<td>","</td>"],[","," "," "," "," "," "],$tmp);
$tmp = preg_replace("/[ ]+/", " ", $tmp);
$tmp = trim($tmp);
if(0 === $index)
$new['name'] = utf8_decode($tmp);
if(1 === $index)
$new['address'] = utf8_decode($tmp);
if(2 === $index)
$new['phone'] = str_replace("+33 ","0", $tmp);
}
$output[]=$new;
}
}
return $output;
}
/**
public function gotoNatinfRecherche(): self
{
$constraint = new ScraperConstraint(ScraperConstraint::ACTION_CHECK_ID, ScraperConstraint::STATE_VISIBLE);
$constraint->setParam("j_idt11:numero");
$constraint->setTimeout(10);
$this->addConstraint($constraint);
$this->click('a[href="/natinf/"]', Scraper::STRATEGY_JS);
return $this;
}
public function searchNatinf(int $natinf): array
{
if(self::APP_NATINF_SEARCH !== $this->getCurrentURL())
{
$this->gotoNatinfRecherche();
}
$output = [];
$this->execJs([
"$('#j_idt11\\:numero').val($natinf);",
]);
$constraint = new ScraperConstraint(ScraperConstraint::ACTION_CHECK_CLASS, ScraperConstraint::STATE_PRESENT);
$constraint->setParam("natinf-titre");
$constraint->setTimeout(3);
$this->addConstraint($constraint);
$this->click('#j_idt11:consulter', Scraper::STRATEGY_JS);
if(self::APP_NATINF_DETAILS === $this->getCurrentURL())
{
$output=[
'natinf' => $natinf,
'principalSentences' => [],
'additionalSentences' => [],
'otherSentences' => [],
'active' => false,
'version' => null,
'dateStartApplication'=> null,
'dateEndApplication' => null,
'qualification' => null,
'nataff' => null,
'nature' => null,
'procedure' => null,
'replacementBy' => null,
'abrogatedBy' => null,
'replacementOf' => [],
'definedBy' => null,
'repressedBy' => null,
'numberMeasures' => 0,
'recidivism' => null,
];
$dom = $this->getSimpleHtml();
$nav = $dom->find('.navigation-versions');
if($nav)
{
$blob = $nav[0];
$htmlKeys = explode("<br>",$blob->find('.bloc1sur3')->html());
$htmlKeys = str_replace(["\t","\r","\n"],[""],preg_replace("/(^[ ]+|[ ]+)$/","",preg_replace("/[<]([^>]+)[>]/","",$htmlKeys)));
$htmlVals = explode("<br>",$blob->find('.bloc2sur3')->html());
$htmlVals = str_replace(["\t","\r","\n"],[""],preg_replace("/(^[ ]+|[ ]+)$/","",preg_replace("/[<]([^>]+)[>]/","",$htmlVals)));
foreach($htmlKeys as $index => $htmlKey)
{
$htmlKey = mb_strtolower($htmlKey);
switch($htmlKey)
{
case 'version':
$output['version'] = (int)$htmlVals[$index];
break;
case 'depuis le':
$output['active'] = true;
$output['dateStartApplication'] = new \DateTime(preg_replace("/(\d{2})\/(\d{2})\/(\d{4})/","$3-$2-$1",$htmlVals[$index]));
break;
case 'du':
if(preg_match("/(\d{2})\/(\d{2})\/(\d{4})[ ]+au[ ]+(\d{2})\/(\d{2})\/(\d{4})/i", $htmlVals[$index], $matches))
{
$output['dateStartApplication'] = new \DateTime($matches[3].'-'.$matches[2].'-'.$matches[1]);
$output['dateEndApplication'] = new \DateTime($matches[6].'-'.$matches[5].'-'.$matches[4]);
}
break;
default:
throw new \Exception('Erreur ici :'.$htmlKey.':'.$htmlVals[$index]);
}
}
}
$trs = $dom->find('tr[class="ui-widget-content"]');
foreach($trs as $tr)
{
$tds = $tr->find('td');
if(2 === count($tds))
{
$criteria = mb_strtolower(utf8_decode($tds[0]->text()));
$value = mb_strtoupper(utf8_decode($tds[1]->text()));
switch($criteria)
{
case 'qualification':
case 'nataff':
case 'nature':
$output[$criteria] = $value;
break;
case 'procédure':
$output['procedure'] = $value;
break;
case 'remplacée par n°':
$output['replacementBy'] = (int)$value;
break;
case 'abrogée par':
$output['abrogatedBy'] = $value;
break;
case 'remplace n°':
$output['replacementOf'] = explode(", ",$value);
break;
case 'définie par':
$output['definedBy'] = $value;
break;
case 'réprimée par':
$output['repressedBy'] = $value;
break;
case 'nombre de peines ou mesures :':
$output['numberMeasures'] = (int)$value;
break;
case 'natinf en récidive':
$output['recidivism'] = (int)$value;
break;
default:
throw new \Exception("Erreur ici [natinf $natinf] :".$criteria.':'.$value);
}
}
}
$sentences = [
75 => 'principal',
85 => 'additional',
95 => 'other',
];
// récupération des peines
foreach($sentences as $idt => $typeSentence)
{
$sentencesHtml = $this->execJs(['return $("#form\\:j_idt'.$idt.'").html();']);
$subdom = SimpleHtml::str_get_html($sentencesHtml)->getContainer();
if($subdom)
{
$trs = $subdom->find("table > tbody > tr");
foreach($trs as $tr)
{
$tds = $tr->find('td');
if(2 === count($tds))
{
$key = utf8_decode($tds[0]->text());
$value= utf8_decode($tds[1]->text());
$output[$typeSentence.'Sentences'][]=['thematic' =>$key, 'valuation' => $value];
}
}
unset($subdom);
}
}
return $output;
}
return [];
}
public function gotoRecherche(): self
{
$constraint = new ScraperConstraint(ScraperConstraint::ACTION_CHECK_CLASS, ScraperConstraint::STATE_PRESENT);
$constraint->setParam("ui-panel-titlebar");
$constraint->setTimeout(10);
$this->addConstraint($constraint);
$this->click('a[href="/natinf/faces/recherche.xhtml"]', Scraper::STRATEGY_JS);
return $this;
}
public function findAllProcedures1(): array
{
return $this->findAllReferences(self::PROCEDURE1_IDT);
}
public function findAllProcedures2(): array
{
return $this->findAllReferences(self::PROCEDURE2_IDT);
}
public function findAllNatures(): array
{
return $this->findAllReferences(self::NATURE_IDT);
}
public function findAllNataffs(): array
{
return $this->findAllReferences(self::NATAFF_IDT);
}
*/
/**
public function findAllReferences(int $idt): array
{
$dom = $this->getSimpleHtml();
$nodeList = $dom->find('.ui-selectonemenu-item');
$output = [];
foreach($nodeList as $node)
{
$attrs = $node->attributes();
// @var string|null $id
$id = !empty($attrs['id']) ? $attrs['id'] : null;
$data = utf8_decode($node->text());
$regexp = "j[_]idt".$idt."[_](?<id>\d+)";
if(preg_match("/$regexp/i", $id, $matches) && !empty($matches['id']))
{
$output[]=['key' => $id,'text' => $data, 'id' => (int)$matches['id']];
}
}
unset($dom);
return $output;
}
*/
/**
* Pré-requis : il faut être sur la page de recherche
*
*/
/**
public function searchBy(int $idt, string $option): array
{
$output = [];
$this->execJs([
"document.getElementById('form:bandeauAutre_toggler').click();",
'var elmtId="form\\:j_idt'.$idt.'_input";',
'var sel = $("#"+elmtId);',
'sel.val("'.$option.'");',
'return sel.val();',
]);
$constraint = new ScraperConstraint(ScraperConstraint::ACTION_CHECK_CLASS, ScraperConstraint::STATE_PRESENT);
$constraint->setParam("selection-info");
$constraint->setTimeout(10);
$this->addConstraint($constraint);
$constraint = new ScraperConstraint(ScraperConstraint::ACTION_AWAITING);
$constraint->setTimeout(5);
$this->addConstraint($constraint);
$this->click('#form:rechercher', Scraper::STRATEGY_JS);
if(false === $this->isValid()) {
return $output;
}
$block = (utf8_decode(str_replace(["\r","\n","\t"],[""],$this->getText(".selection-info"))));
// @var int $count Nombre de réponses
$cpt = preg_match("/^(?<count>\d+)[ ]/i", $block, $matches) ? (int)$matches['count'] : 0;
// @var int $pages Nombre de pages
$pages = ($cpt) ? 1+(int)(($cpt-1)/10) : 0;
for($i=1; $i<=$pages; $i++)
{
$this->execJs([
"$('.ui-paginator-pages > a:nth-of-type($i)').click();",
]);
sleep(3);
$simpleHtml = $this->getSimpleHtml();
$trs = $simpleHtml->find('table > tbody > tr');
foreach($trs as $tr)
{
$tds = $tr->find('td');
if(7 === count($tds))
{
$natinf = (int)mb_strtolower(trim(utf8_decode($tds[1]->text())));
$version = (int)preg_replace("/([^\d])/","",mb_strtolower(trim(utf8_decode($tds[2]->text()))));
$nature = mb_strtolower(trim(utf8_decode($tds[3]->text())));
$qualification = mb_strtoupper(trim(utf8_decode($tds[4]->text())));
$strDateApplication = preg_replace("/^(.*)(\d{2})\/(\d{2})\/(\d{4})(.*)$/i","$4-$3-$2",$tds[5]->text());
$dateApplication = $strDateApplication ? new \DateTime($strDateApplication) : null;
$output[]=[
'natinf' => $natinf,
'active' => true,
'version' => $version,
'nature' => $nature,
'qualification' => $qualification,
'dateStartApplication' => $dateApplication,
];
}
}
}
return $output;
}
*/
}
Créer un service dans le projet Opéra permettant de récupérer les datas sur le site https://justice.comarquage.fr