Aug-25-2018, 01:56 PM
(This post was last modified: Aug-25-2018, 01:56 PM by eddywinch82.)
Also this is a later Python Code, can it be adapted in the sense of, instead of last number of Planes etc, use last number of pages of the 253 total ? Here is the code, that was used for the Project AI Website .zip Files :-
from bs4 import BeautifulSoup import requests from tqdm import tqdm, trange from itertools import islice def all_planes(): '''Generate url links for all planes''' url = 'http://web.archive.org/web/20031124231537/http://www.projectai.com:80/libraries/acfiles.php?cat=6' url_get = requests.get(url) soup = BeautifulSoup(url_get.content, 'lxml') td = soup.find_all('td', width="50%") plain_link = [link.find('a').get('href') for link in td] for ref in tqdm(plain_link): url_file_id = 'http://web.archive.org/web/20031124231537/http://www.projectai.com:80/libraries/{}'.format(ref) yield url_file_id def download(all_planes): '''Download zip for 1 plain,feed with more url download all planes''' # A_300 = next(all_planes()) # Test with first link last_47 = islice(all_planes(), 25, 72) for plane_url in last_47: url_get = requests.get(plane_url) soup = BeautifulSoup(url_get.content, 'lxml') td = soup.find_all('td', class_="text", colspan="2") zip_url = 'http://web.archive.org/web/20031124231537/http://www.projectai.com:80/libraries/download.php?fileid={}' for item in tqdm(td): zip_name = item.text zip_number = item.find('a').get('href').split('=')[-1] with open(zip_name, 'wb') as f_out: down_url = requests.get(zip_url.format(zip_number)) f_out.write(down_url.content) if __name__ == '__main__': download(all_planes)Eddie