解析Mobi的PHP库


PHP Library to Parse Mobi

有没有免费的PHP库可以解析.mobi文件以获得:

  • 作者
  • 标题
  • Publisher
  • 封面

编辑

对于那些认为这是《PHP库是否存在以处理PRC/MOBI文件》的精确复制的人来说,你显然懒得阅读这些问题。

该询问者想知道如何使用PHP库生成.mobi文件。我想知道如何分解或解析已经创建的.mobi文件以获得某些信息。因此,这个问题的解决方案phpMobi将不起作用,因为它是一个从HTML生成.mobi文件的脚本,而不是解析.mobi。

这是一个非常非常蹩脚的例子,但如果你绝望了,你可以尝试这样的方法:

$data = file_get_contents("A Young Girl's Diary - Freud, Sigmund.mobi");
$chunk = mb_substr($data, mb_strpos($data, 'EXTH'), 512);
$chunks = explode("'x00", $chunk);
array_shift($chunks);
$chunks = array_filter($chunks, function($str){return preg_match('#([A-Z])#', $str) && mb_strlen($str) > 2;});
$chunks = array_combine(array('author', 'publisher', 'title'), $chunks);
print_r($chunks);

输出:

Array
(
    [author] => Freud, Sigmund
    [publisher] => Webarto
    [title] => A Young Girl's Diary
)

使用的文件:http://freekindlebooks.org/Freud/752-h.mobi(使用Calibre编辑发布者元数据)

文件解析甚至不是一件容易或有趣的事情。看看这个:http://code.google.com/p/xee/source/browse/XeePhotoshopLoader.m?r=a70d7396356997114b548f4ab2cbd49badd7d285#107

你应该做的是逐字节读取,但由于没有详细的文档,恐怕这不是一项容易的工作。

第页。S.我还没试着去拿封面照片。

如果有人仍然感兴趣,这里有一个mobi元数据读取示例:

class palmDOCHeader
{
    public $Compression = 0;
    public $TextLength = 0;
    public $Records = 0;
    public $RecordSize = 0;
}
class palmHeader
{
    public $Records = array();
}
class palmRecord
{
    public $Offset = 0;
    public $Attributes = 0;
    public $Id = 0;
}
class mobiHeader
{
    public $Length = 0;
    public $Type = 0;
    public $Encoding = 0;
    public $Id = 0;
    public $FileVersion = 0;
}
class exthHeader
{
    public $Length = 0;
    public $Records = array();  
}
class exthRecord
{
    public $Type = 0;
    public $Length = 0;
    public $Data = "";
}
class mobi {
    protected $mobiHeader;
    protected $exthHeader;
    public function __construct($file){
        $handle = fopen($file, "r");
        if ($handle){
            fseek($handle, 60, SEEK_SET);
            $content = fread($handle, 8);
            if ($content != "BOOKMOBI"){
                echo "Invalid file format";
                fclose($handle);
                return;
            }
            // Palm Database
            echo "'nPalm database:'n";
            $palmHeader = new palmHeader();
            fseek($handle, 0, SEEK_SET);
            $name = fread($handle, 32);
            echo "Name: ".$name."'n";
            fseek($handle, 76, SEEK_SET);
            $content = fread($handle, 2);
            $records = hexdec(bin2hex($content));
            echo "Records: ".$records."'n";
            fseek($handle, 78, SEEK_SET);
            for ($i=0; $i<$records; $i++){
                $record = new palmRecord();
                $content = fread($handle, 4);
                $record->Offset = hexdec(bin2hex($content));
                $content = fread($handle, 1);
                $record->Attributes = hexdec(bin2hex($content));
                $content = fread($handle, 3);
                $record->Id = hexdec(bin2hex($content));
                array_push($palmHeader->Records, $record);
                echo "Record ".$i." offset: ".$record->Offset." attributes: ".$record->Attributes."  id : ".$record->Id."'n";
            }
            // PalmDOC Header
            $palmDOCHeader = new palmDOCHeader();
            fseek($handle, $palmHeader->Records[0]->Offset, SEEK_SET);
            $content = fread($handle, 2);
            $palmDOCHeader->Compression = hexdec(bin2hex($content));
            $content = fread($handle, 2);
            $content = fread($handle, 4);
            $palmDOCHeader->TextLength = hexdec(bin2hex($content));
            $content = fread($handle, 2);
            $palmDOCHeader->Records = hexdec(bin2hex($content));
            $content = fread($handle, 2);
            $palmDOCHeader->RecordSize = hexdec(bin2hex($content));
            $content = fread($handle, 4);
            echo "'nPalmDOC Header:'n";
            echo "Compression:".$palmDOCHeader->Compression."'n";
            echo "TextLength:".$palmDOCHeader->TextLength."'n";
            echo "Records:".$palmDOCHeader->Records."'n";
            echo "RecordSize:".$palmDOCHeader->RecordSize."'n";
            // MOBI Header
            $mobiStart = ftell($handle);
            $content = fread($handle, 4);
            if ($content == "MOBI"){
                $this->mobiHeader = new mobiHeader();
                echo "'nMOBI header:'n";
                $content = fread($handle, 4);
                $this->mobiHeader->Length = hexdec(bin2hex($content));
                $content = fread($handle, 4);
                $this->mobiHeader->Type = hexdec(bin2hex($content));
                $content = fread($handle, 4);
                $this->mobiHeader->Encoding = hexdec(bin2hex($content));
                $content = fread($handle, 4);
                $this->mobiHeader->Id = hexdec(bin2hex($content));
                echo "Header length: ".$this->mobiHeader->Length."'n";
                echo "Type: ".$this->mobiHeader->Type."'n";
                echo "Encoding: ".$this->mobiHeader->Encoding."'n";
                echo "Id: ".$this->mobiHeader->Id."'n";
                fseek($handle, $mobiStart+$this->mobiHeader->Length, SEEK_SET);
                $content = fread($handle, 4);
                if ($content == "EXTH"){
                    $this->exthHeader = new exthHeader();
                    echo "'nEXTH header:'n";
                    $content = fread($handle, 4);
                    $this->exthHeader->Length = hexdec(bin2hex($content));
                    $content = fread($handle, 4);
                    $records = hexdec(bin2hex($content));
                    echo "Records: ".$records."'n";
                    for ($i=0; $i<$records; $i++){
                        $record = new exthRecord();
                        $content = fread($handle, 4);
                        $record->Type = hexdec(bin2hex($content));
                        $content = fread($handle, 4);
                        $record->Length = hexdec(bin2hex($content));
                        $record->Data = fread($handle, $record->Length - 8);
                        array_push($this->exthHeader->Records, $record);
                        echo "Record ".$i." type: ".$record->Type." length: ".$record->Length."'n";
                        echo "  data: ".$record->Data."'n";
                    }
                }
            }
            fclose($handle);
        }
    }
    protected function GetRecord($type)
    {
        foreach ($this->exthHeader->Records as $record){
            if ($record->Type == $type)
                return $record;
        }
        return NULL;
    }
    protected function GetRecordData($type)
    {
        $record = $this->GetRecord($type);
        if ($record)
            return $record->Data;
        return "";
    }
    public function Title()
    {
        return $this->GetRecordData(503);
    }
    public function Author()
    {
        return $this->GetRecordData(100);
    }
    public function Isbn()
    {
        return $this->GetRecordData(104);
    }
    public function Subject()
    {
        return $this->GetRecordData(105);
    }
    public function Publisher()
    {
        return $this->GetRecordData(101);
    }
}
$mobi = new mobi("test.mobi");
echo "'nTitle: ".$mobi->Title();
echo "'nAuthor: ".$mobi->Author();
echo "'nIsbn: ".$mobi->Isbn();
echo "'nSubject: ".$mobi->Subject();
echo "'nPublisher: ".$mobi->Publisher();

也有同样的问题,没有找到任何PHP解析器,不得不自己编写(不幸的是,我不能透露我的代码)。这里有一个关于.mobi结构的好资源http://wiki.mobileread.com/wiki/MOBI