-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtwittermain.py
More file actions
144 lines (112 loc) · 3.53 KB
/
twittermain.py
File metadata and controls
144 lines (112 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from simplejson import loads
import threading
import urllib2
import requests
import MySQLdb
import time
import twitterdump
import os
import dump_zipper
import db_deletion
proxy=urllib2.ProxyHandler({})
opener=urllib2.build_opener(proxy)
opener.addheaders=[('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
from bs4 import BeautifulSoup as bs
from collections import OrderedDict
dic=OrderedDict()
#main_list=[]
start="737848638"
block= False
filetracker=open("/home/sys8/twitter/twitter_track.txt","a+")
temptracker=open("/home/sys8/twitter/temp_tracker.txt","a+")
dic[737848638]="https://twitter.com/legendstevejobs"
#def file_temp():
def id_Checker(user_id):
global dic
if user_id in dic.keys():
return False
else :
return True
count=0
def comeback():
global block
time.sleep(18000)
block=False
def pagedownloader(user_id,user_url):
url=urllib2.urlopen(user_url).read()
user_name=user_url.split('/')[3]
global count
filetracker.write(str(user_id)+"##"+str(user_url)+"##"+str(user_name)+"\n")
temptracker.write(str(user_url)+"\n")
count=count+1
def urlmaker(user_id,user_url):
try:
global dic
global main_list
global count
global block
r=requests.get("https://twitter.com/i/related_users/"+str(user_id))
#print r.status_code
if r.status_code==requests.codes.ok:
pagedownloader(user_id,user_url)
print user_url
#count=count+1
data=(loads(r.content))
data=data.get('related_users_html')
soup=bs(data)
div=soup.findAll("div",{"class":"content"})
for i in range(len(div)):
#print "https://twitter.com"+div[i].find("a").get("href")
user_id=div[i].find("a").get("data-user-id")
check=id_Checker(user_id)
if check:
#main_list.append(div[i].find("a").get("data-user-id"))
dic[div[i].find("a").get("data-user-id")]=str("https://twitter.com"+div[i].find("a").get("href"))
else :
print r.status_code
block=True
#print dic
#print main_list
except Exception as exp:
print "******************EXCEPTION********************************"
print exp
urlmaker(737848638,"https://twitter.com/legendstevejobs")
index=1
zipper_index=1
while True:
try :
if threading.activeCount()<5 and block == False:
t=threading.Thread(target=urlmaker,args=(dic.keys()[index],dic.values()[index],))
t.start()
index=index+1
print "count is :: "+str(index)
if index%10000==0:
print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
print " SLEEPING "
print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
temptracker.close()
#time.sleep(100)
twitterdump.dumper()
print "Removing file : "
os.remove("/home/sys8/twitter/temp_tracker.txt")
print "File removed "
temptracker=open("/home/sys8/twitter/temp_tracker.txt","a+")
print "File Created Again "
print "Zipper Started"
dump_zipper.zipper_main(zipper_index)
zipper_index+=1
print "Db deletion starting..."
db_deletion.db_deleter_main()
print "indexing started again"
#temptracker=open("/home/sys8/twitter/temp_tracker.txt","a+")
print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
print " WAKE UP "
print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
else:
continue
else:
comeback()
except Exception as exp:
print "@@@@@@@@@@@@@@@@@@@@@@@EXCEPTION@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
print exp