i have try lot's of crawler in this world but not fulfil my total requirement
so i decide to create a new crawler that find all ip address and home page and store to my folder
it's work fine but little bit slow presently i have 70,000 website in my folder in 3 days this program find all ip address all over world
first of all create a folder where your program will store and create a python file with extension (.py) then copy this to the file and save
'''
This is ip finder it's find all ip that have a website all over world
'''
from socket import gethostbyaddr
import threading
import sys
import time
from itertools import product
import urllib2
from netaddr import IPRange
from colorama import Fore, Back, Style
NB_OF_THREADS_MAX = 254
def show_name(ip):
global lock
try:
host=gethostbyaddr(ip)[0]
store_html(host,ip)
lock.acquire()
lock.release()
except:
print Fore.BLUE+"%s ------ not resolve\n" % (ip)
print Fore.RESET
semaphore.release()
def store_html(host,ip):
try:
response = urllib2.urlopen('http://'+host)
html = response.read()
fh = open("website/"+ip, "w")
fh.write(html)
fh.close()
print Fore.GREEN+"%s (%s) -------- ok\n"%(host,ip)
print Fore.RESET
return True
except:
print Fore.RED+"%s (%s) -------- error\n"%(host,ip)
print Fore.RESET
return False
def main ():
global lock
lock = threading.Lock()
global semaphore
semaphore = threading.BoundedSemaphore(value = NB_OF_THREADS_MAX)
for n in IPRange('1.1.1.1','254.254.254.254'):
ip_to_test=n.format()
semaphore.acquire()
current_thread = threading.Thread(None,show_name,None,(ip_to_test,),None)
try:
current_thread.start()
except:
print "can't start so many threads."
while threading.activeCount() != 1:
time.sleep(2)
main()
now open command pompt and type this command:
$python file_name.py
you will see like bellow screenshoot
green colour for resolve and website exist ip address ,red for resolve but website not exist,blue for not exist
if you want to increase thread then increase NB_OF_THREADS_MAX value this for find no. of ip address parallely
let's celebrate, you have discover your best cocktail
enjoy