Python:从受保护的文件夹(.htaccess)下载


Python: Downloading from a protected folder (.htaccess)

我在Web服务器上设置了一个受保护的区域。有两个文件—一个文本文件和一个rar文件。在rar文件中,您将找到一个pdf文件。我这里有一个可执行程序。首先,我在成功登录到文本文件的内容后得到。下面我抓取路径(download_path = self.url.text)。现在我试着下载文件。但这行不通。有人能帮帮我吗?

FILE_NAME = "downloader2.py"
import os
import requests
import sys
from requests.auth import HTTPBasicAuth
from PyQt4.QtCore import QThread, pyqtSignal, Qt, QSemaphore
from PyQt4.QtGui import QVBoxLayout, QPushButton, QDialog, QProgressBar, QApplication, QMessageBox   
class Download_Thread(QThread):
    finished_thread = pyqtSignal()
    error_http = pyqtSignal()
    finished_download = pyqtSignal()
    notify_progress = pyqtSignal(int)
    def __init__(self, location, link, parent=None):
        QThread.__init__(self, parent)
        self.link = link
        self.location = location
        self._run_semaphore = QSemaphore(1)

    def run(self):
        try:
            self.url= requests.get(self.link, auth=HTTPBasicAuth('user_name', 'user_password'))
            download_path = self.url.text
            print "URL PATH ", download_path
            file = requests.get(download_path, stream=True)
            status = self.url.status_code
            print "STATUS ", status
            if not status == 200:
                self.error_http.emit()
        except (requests.exceptions.URLRequired,
                requests.exceptions.ConnectionError,
                requests.exceptions.HTTPError,
                requests.exceptions.Timeout,
                requests.exceptions.ConnectTimeout,
                requests.exceptions.ReadTimeout), g:
            print 'Could not download ', g
            self.error_http.emit()
        else:
            file_size = int(requests.head(download_path).headers.get('content-length', [0]))
            print "file_size", file_size
            r = requests.head(download_path)
            print "heanders", r.headers
            print "%s Byte" %file_size
            result = 2000 / (1024*5)
            print "result", result
            chunk_size = int(result)
            print "chunk_size", chunk_size
            downloaded_bytes = 0
            with open(self.location, 'wb') as fd:
                for chunk in file.iter_content(chunk_size):
                    fd.write(chunk)
                    downloaded_bytes = fd.tell()
                    #print (float(downloaded_bytes)/file_size*100)
                    self.notify_progress.emit(float(downloaded_bytes)/file_size*100)
                    if self._run_semaphore.available() == 0:
                        self._run_semaphore.release(1)
                        break
                print "Finish"
                self.finished_download.emit()
                self.finished_thread.emit()
    def stop(self):
        print "stop"
        self._run_semaphore.acquire(1)
class MyCustomDialog(QDialog):
    def __init__(self):
        super(MyCustomDialog, self).__init__()
        layout = QVBoxLayout(self)
        #self.url = get_access_data_and_link('Sophus','danny5658')
        #print "CALLING DOWNLOAD", self.url
        self.url = 'http://xarphus.de/schutz/'
        # Create a progress bar and a button and add them to the main layout
        self.progressBarUpdate = QProgressBar(self)
        self.progressBarUpdate.setAlignment(Qt.AlignCenter)
        layout.addWidget(self.progressBarUpdate)
        pushButtonUpdate = QPushButton("Start", self)
        layout.addWidget(pushButtonUpdate)
        pushButtonCancel = QPushButton("Cancel", self)
        layout.addWidget(pushButtonCancel)
        pushButtonUpdate.clicked.connect(self.check_folder_exists)
        # Set data for download and saving in path
        self.location = os.path.abspath(os.path.join('temp', 'example-app-0.3.win32.zip'))
        #self.url = 'http://sophus.bplaced.net/download/example-app-0.3.win32.zip'
        self.download_task = Download_Thread(self.location, self.url)
        self.download_task.notify_progress.connect(self.on_progress)
        self.download_task.finished_thread.connect(self.on_finished)
        self.download_task.error_http.connect(self.on_HTTPError)
        self.download_task.finished_download.connect(self.on_finish_download)
        pushButtonCancel.clicked.connect(self.on_finished)
    def on_start(self):
        self.progressBarUpdate.setRange(0, 0)
        self.download_task.start()
    def on_finish_download(self):
        msg_box = QMessageBox()
        QMessageBox.question(msg_box, ' Message ',
                                           "The file has been fully downloaded.", msg_box.Ok)
    def on_HTTPError(self):
        reply = QMessageBox.question(self, ' Error ',
                                           "The file could not be downloaded. Will they do it again?", QMessageBox.Yes | 
            QMessageBox.No, QMessageBox.No)
        if reply == QMessageBox.Yes:
            self.on_start()
        else:
            print "Close button pressed"
            #event.ignore()
    def on_progress(self, i):
        self.progressBarUpdate.setRange(0, 100)
        self.progressBarUpdate.setValue(i)
    def check_folder_exists(self):
        location = os.path.abspath(os.path.join('temp'))
        if not os.path.exists(location):
            os.makedirs(location)
            print "Folder was created"
            self.on_start()
        else:
            print "Folder already exists"
            self.on_start()
    def on_finished(self):
        self.progressBarUpdate.setValue(0)
        self.close()
    def closeEvent(self, event):
        self.download_task.stop()
def main():
    app = QApplication(sys.argv)
    window = MyCustomDialog()
    window.resize(600, 400)
    window.show()
    sys.exit(app.exec_())
if __name__ == "__main__":
    main()

如果我运行这个程序,我在控制台上得到这些行:

文件夹已经存在URL路径http://xarphus.de/schutz/datei.rar200年状态file_size 290headers {'content-length': '290', 'x-varnish': '150708046 ' 150707392', 'content-encoding': 'gzip', 'accept-ranges': 'bytes', 'vary': 'Accept-Encoding', 'server': 'Apache', 'age': '0', 'connection': 'keep-alive', 'via': '1.1 varnish', 'date': 'Wed, 22 Jul 2015 23:20:06 GMT', 'content-type': 'text/html;charset=iso-8859-1', 'www-authenticate': '基本领域="Service-Bereich"'}290字节结果0chunk_size 0完成停止

我还没有找到解决方案,但我已经减少了我的来源。我希望这能帮助我们找到问题所在。

import requests
import shutil
from requests.auth import HTTPBasicAuth
def log_in(user, pwd):
    s = requests.session()
    resp = s.get('http://xarphus.de/protect_folder/', auth=HTTPBasicAuth(user, pwd))
    print "Status: ", resp.status_code
    content_txt_file = resp.text
    print "Cookies: ", requests.utils.dict_from_cookiejar(s.cookies)
    print "Content of txt file: ", content_txt_file
    print "start downloading"
    response = s.get(content_txt_file, stream=True)
    with open('test_rar.rar', 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    del response
if __name__ == '__main__':
    user_name = 'test_account'
    user_password = 'test_user'
    log_in(user_name, user_password)

如果我运行下载文件的程序,但是文件只有209字节。它太小了。原始文件大约有36兆字节。

伙计,虽然没有人帮助我(不管出于什么原因),我找到了一个解决方案。下面是正确的代码:

def get_logged_in_session(user, pwd):
    url = 'http://xarphus.de/protect_folder'
    s = requests.session()
    auth = HTTPBasicAuth(user, pwd)
    resp = s.get(url=url, auth=auth)
    print "Status: ", resp.status_code
    content_txt_file = resp.text
    print "Cookies: ", s.cookies
    print "Content of txt file: ", content_txt_file
    print "init the download"
    response = s.get(url=content_txt_file, auth=auth, stream=True)
    print "start downloading"
    with open('test_rar.rar', 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    print "Closing response"
    response.close
    print "response is closed"
    print "Closing resp"
    resp.close
    print "resp is closed"

我已经改变了这一行

response = s.get(content_txt_file, stream=True)

到这行

response = s.get(url=content_txt_file, auth=auth, stream=True)

为什么?要想访问文件,我得再验证一次我的身份。