-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgithubSearch.py
More file actions
executable file
·76 lines (70 loc) · 3.11 KB
/
githubSearch.py
File metadata and controls
executable file
·76 lines (70 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/local/bin/python3
import subprocess
import json
import time
import urllib
import requests
from bs4 import BeautifulSoup
def checkProgramOfInterest(rawLinkString, programOfInterest):
containsAsyncTask = False
containsGetResources = False
print('\n\n')
for line in programOfInterest.splitlines():
print(line)
if 'AsyncTask' in line:
containsAsyncTask = True
if 'getResources' in line:
containsGetResources = True
if containsAsyncTask and containsGetResources:
print('!!!!!!!!!!!!!!!!!!!!!')
print('{0} contains types of interest'.format(rawLinkString))
return
def main():
pageNumber = 1
command = 'curl -n https://api.github.com/search/code?q=getResources+AsyncTask+in:file+language:java?page={0}&per_page=100&sort=stars&order=desc'.format(pageNumber)
commandList = command.split(" ")
commandOutput = subprocess.run(commandList, check=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
searchResult = json.loads(commandOutput)
currentCount = 0
while currentCount < 99:
urlToSearch = searchResult['items'][currentCount]['html_url']
if urlToSearch.endswith('.java'):
#print('searching first url: {0}'.format(urlToSearch))
print('{0}'.format(currentCount))
#response = session.get(urlToSearch).content.decode('utf-8')
#soup = BeautifulSoup(response, 'html.parser')
#for link in soup.find_all('a'):
#print(link.contents)
# if(link.contents[0].endswith('.java')):
time.sleep(1)
#pageRequest = session.get(urlToSearch).content
with urllib.request.urlopen(urlToSearch) as pageRequest:
#read is read once, so save the result
pageResult = pageRequest.read()
soup2 = BeautifulSoup(pageResult, 'html.parser')
rawLink = soup2.find_all(id='raw-url')[0]
time.sleep(1)
#print('raw link: {0}'.format(rawLink))
rawLinkString = "https://github.com/" + rawLink['href']
with urllib.request.urlopen(rawLinkString) as finalResults:
programOfInterest = finalResults.read().decode('utf-8', errors="ignore")
checkProgramOfInterest(rawLinkString,programOfInterest)
currentCount = currentCount + 1
#pageNumber = 1
#notDone = True
#changeSet = set()
#while notDone:
# saveFileName = 'savedGitHubSearches/savedSearch{0}.json'.format(pageNumber)
# if os.path.isfile(saveFileName):
# with open(saveFileName,'r') as fin:
# searchResult = json.loads(fin.read())
# else:
# #command = 'curl -n https://api.github.com/search/code?q=onCreate+Fragment+onCreateOptionsMenu+in:file+language:java?page={0}&per_page=100&sort=stars&order=desc'.format(pageNumber)
# command = 'curl -n https://api.github.com/search/code?q=onCreateView+Fragment+in:file+language:java?page={0}&per_page=100&sort=stars&order=desc'.format(pageNumber)
# commandList = command.split(" ")
# commandOutput = subprocess.run(commandList, check=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
# searchResult = json.loads(commandOutput)
# with open(saveFileName,'w') as fout:
# json.dump(searchResult,fout)
if __name__ == "__main__":
main()