Api-mediawiki:一个机器人创建超链接


Api mediawiki : a bot creating hyperlink?

我想创建一个机器人程序,列出我的wiki页面,然后当他在另一个页面中找到页面名称时,搜索wiki的所有页面以创建超链接。

我有一个叫"维基"的页面,在另一个页面上有"维基"这个词。所以我想创建一个超链接来重定向到"Wiki"页面。

这是我第一次使用这个API,所以我不确定如何继续。我已经发现,你可以用"list=allpages"列出所有页面,并用"list=search"在所有wiki中搜索字符串,但当我有了存在该字符串的页面名称时,我如何只编辑页面中的那些字符串?

现在我正在用PHP做这件事,所以我可以做一些事情,比如获取页面的所有内容,更改它,然后编辑页面吗?

如果你感兴趣,下面是我创建这个机器人的方法:

$path_cookie = "______path________";
$botLogin="Bot";
$botPass="password";
$linkWiki="exemple.com";
if (!file_exists($path_cookie)) touch($path_cookie); //create a file to stay logged in
$curl = curl_init();
function requeteCurl($postfields, $curl, $linkWiki, $path_cookie) //the function you'll just use for each of your requests when logged in
{
    curl_setopt($curl, CURLOPT_URL, $linkWiki);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_POST, true);
    curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields);
    curl_setopt($curl, CURLOPT_COOKIEFILE, realpath($path_cookie));
    $resultat = curl_exec($curl);
    return $resultat;
}
    /* First you need to login with your bot */

$postfields = array(
        'action' => 'login',
        'format'=> 'json',
        'lgname' => $botLogin,
        'lgpassword' => $botPass
);
curl_setopt($curl, CURLOPT_URL, $linkWiki);
curl_setopt($curl, CURLOPT_COOKIESESSION, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields);
curl_setopt($curl, CURLOPT_COOKIEJAR, $path_cookie); //you need to stock your cookies the first time
$connexion=curl_exec($curl); 
if (!$connexion) {
    throw new Exception("Error getting data from server ($linkWiki: " . curl_error($curl));
}
$json_connexion = json_decode($connexion, true);
$tokenConnexion=$json_connexion['login']['token']; //sometime you need to login a second time with the token :
$postfields = array(
        'action' => 'login',
        'format'=> 'json',
        'lgtoken' => $tokenConnexion,
        'lgname' => $botLogin,
        'lgpassword' => $botPass
);
$connexionToken=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
var_dump($connexionToken);
    /* You have to list all the pages in your wiki to know which strings to search for */
$postfields = array(
            'action' => 'query',
            'format'=> 'json',
            'list' => 'allpages',
            'aplimit' => 'max'
    );
    $pagesWiki=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
    $json_pagesWikis = json_decode($pagesWiki, true);
    $tabPagesWiki= array();
    $i=0;
    foreach ($json_pagesWikis["query"]["allpages"] as $pages ) { // stock all the names in an array
        $tabNomsPagesWiki[$i] = $pages["title"]; // tableau contenant les noms sont modif des pages
        $i++;
    }
    /* Then you search on all the wiki to find the pages where the string you search is */
foreach ($tabNomsPagesWiki as $chaineRecherchee ) //you use each name as a string to search
{
    $postfields = array(
            'action' => 'query',
            'format'=> 'json',
            'list' => 'search',
            'srsearch' => $chaineRecherchee,
            'srwhat' => 'text',
            'srlimit' => 'max'
    );
    $pagesString = requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
    $json_pagesString = json_decode($pagesString, true);
    $pagesComportantLaRecherche= array(); //and you stock again your results in an array
    $i=0;
    foreach ($json_pagesString["query"]["search"] as $search ) {
        $pagesComportantLaRecherche[$i] = $search["title"] ;
        $i++;
    }
    /* now you have to find your string in the page */
    foreach($pagesComportantLaRecherche as $pageRecherche){ 
    if($pageRecherche != $chaineRecherchee){ //you don't want to do create link to the page in which you are !
            $postfields = array(
                    'action' => 'parse',
                    'format'=> 'json',
                    'page' => $pageRecherche,
                    'prop' => 'wikitext'
            );
            $pageContent=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
            $json_pagesContent = json_decode($pageContent, true);
            $text_pagesContent = $json_pagesContent["parse"]["wikitext"]["*"] ; //now you have all the content of your page in a var
    /* To find where your string is and replace it with a link you have to first search for the links in the page to not put a link in a link*/
            $stringLien = "[[".$chaineRecherchee."]]"; //that's the string which will replace the one in the text
            $stringLength = strlen($chaineRecherchee); 
            $patternLien = "/((''[''[[^'']]*)[''s](".$chaineRecherchee.")[''s'',''.][^'']]*'']''])|((''[[^'']]*)[''s'''](".$chaineRecherchee.")[''s'',''.'''][^'']]*''])/mi"; //a regex to find all the links with your string in it in the page
            preg_match_all($patternLien, $text_pagesContent, $liens,PREG_OFFSET_CAPTURE);
            $patternNomPage = "/[''s''']".$chaineRecherchee."[''s'',''.''']/im"; //now to find just your string
            preg_match_all($patternNomPage, $text_pagesContent, $nomPages,PREG_OFFSET_CAPTURE);         
            $decalage=1;
            foreach ($nomPages[0] as  $page){
                // you need to know the offset of all your strings and your links to compare it
                $offsetNomPagetrouvee = $page[1];       
                $est_dans_lien = false; 
                foreach ($liens[0] as $lien){
                    $lienOffset= $lien[1];      
                    $lienTaille = strlen($lien[0]); 
                    if($lienOffset <= $offsetNomPagetrouvee && $offsetNomPagetrouvee <= $lienOffset+ $lienTaille){
                        $est_dans_lien = true;
                        break;
                    }
                }
                if(!$est_dans_lien){ //if you find a string which is not in a link then you replace it with a link
                    $text_pagesContent = substr_replace($text_pagesContent, $stringLien, $offsetNomPagetrouvee+$decalage, $stringLength);
                    $decalage+=4; //you have to move your offset as you change a string by a link so you add four characters :[[]]
                }
            }

            if($decalage>1){ //if you created some new links, then you edit the page
                $postfields = array(
                        'action' => 'query',
                        'meta' => 'tokens',
                        'format' => 'json'
                );
                $tokenEdit=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
                $json_tokenEdit = json_decode($tokenEdit, true);
                $text_tokenEdit = $json_tokenEdit['query']['tokens']['csrftoken'];

                $postfields = array(
                        'action' => 'edit',
                        'format' => 'json',
                        'title' => $pageRecherche,
                        'text' => $text_pagesContent,
                        'bot' => '',
                        'token' => $text_tokenEdit
                );
                $edit=requeteCurl($postfields, $curl, $linkWiki, $path_cookie);
                echo "'n".$edit;
            }

        }
    }
}

unlink($path_cookie);

好吧,我确信这段代码中有很多不必要的东西,但我不是php和mediawiki的专业人员,脚本运行得很好,所以我认为^^