-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
automation is working, but need try/execpt cases for some code
- Loading branch information
1 parent
94f3e05
commit f79d10f
Showing
14 changed files
with
321 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
.env | ||
follow_users.log | ||
/scraper_usernames/.env | ||
fething_new_users.ipynb | ||
check_pages.ipynb | ||
fething_new_users.ipynb |
Binary file not shown.
Binary file not shown.
This file was deleted.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,47 @@ | ||
# fetching_new_users.py = only getting 100 new users for each iteration | ||
|
||
import os | ||
import requests | ||
import dotenv | ||
|
||
from typing import List | ||
from state_manager import load_state, save_state | ||
|
||
dotenv.load_dotenv() | ||
github_token = os.getenv("GITHUB_TOKEN")# your github token | ||
github_token = os.getenv("GITHUB_TOKEN") # your github token | ||
|
||
|
||
def fetching_users_from_github(users_to_fetch=100, token=None) -> List[str]: | ||
|
||
def fething_users_from_github(users_to_fetch=1, token=None) -> list: | ||
scraped_users = [] | ||
state = load_state() | ||
current_page = state.get('current_page', 0) | ||
|
||
querry = 'language:python repos:>5 followers:>10' | ||
url = "https://api.github.com/search/users" | ||
params = { | ||
'per_page': users_to_fetch, | ||
'since': 0, | ||
'page': current_page, | ||
'q': querry | ||
|
||
} | ||
headers = { | ||
'Authorization': token | ||
'Authorization': token, | ||
'Accept': 'application/vnd.github.v3+json', | ||
'User-Agent': 'Github Follow Script' | ||
} | ||
|
||
try: | ||
response = requests.get(url, params=params, headers=headers) | ||
response.raise_for_status() | ||
users = response.json().get('items', []) | ||
data = response.json() | ||
fetched_users_api = data.get('items', []) | ||
fetched_users = [user['login'] for user in fetched_users_api] | ||
state['current_page'] = current_page + 1 | ||
save_state(state) | ||
|
||
for user in users: | ||
scraped_users.append(user['login']) | ||
|
||
except requests.exceptions.HTTPError as e: | ||
print(f"Error: {e}") | ||
except requests.exceptions.RequestException as e: | ||
print(f"Error: {e}") | ||
|
||
return scraped_users | ||
return fetched_users |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
3b1b |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,98 @@ | ||
import os | ||
from datetime import time | ||
|
||
from time import sleep | ||
import requests | ||
from fetching_new_users import fething_users_from_github | ||
from fetching_new_users import fetching_users_from_github | ||
import logging | ||
import dotenv | ||
import json | ||
from state_manager import load_state, save_state | ||
|
||
|
||
dotenv.load_dotenv() | ||
USERNAMES_FILE = 'usernames.txt' | ||
LAST_LINE_FILE = 'last_line.txt' | ||
github_token = os.getenv("GITHUB_TOKEN")# your github token | ||
|
||
### fetch 100 users from github | ||
|
||
users = fething_users_from_github(100, github_token) | ||
### read the users from the file | ||
def read_users_from_file(): | ||
with open(USERNAMES_FILE, 'r') as file: | ||
users = file.readlines() | ||
return [user.strip() for user in users] | ||
|
||
|
||
### write the users to a file | ||
def write_users_to_file(users): | ||
with open(USERNAMES_FILE, 'w') as file: | ||
with open(USERNAMES_FILE, 'a') as file: | ||
existing_users = read_users_from_file() | ||
for user in users: | ||
file.write(f"{user}\n") | ||
if user not in existing_users: | ||
file.write(f"{user}\n") | ||
|
||
|
||
### keep track of last followed user | ||
def read_last_followed_user(): | ||
state = load_state() | ||
return state.get('last_followed_user', None) | ||
|
||
|
||
### write the last followed user to a file | ||
def write_last_followed_user(user): | ||
state = load_state() | ||
state['last_followed_user'] = user | ||
save_state(state) | ||
|
||
|
||
def simple_counter(): | ||
state = load_state() | ||
state['how_many_bot_followed_so_far_counter'] = state.get('how_many_bot_followed_so_far_counter', 0) + 1 | ||
save_state(state) | ||
|
||
### read the users from the file | ||
|
||
### follow the users | ||
def follow_users(users): | ||
|
||
headers = { | ||
'Authorization': f'token {github_token}', | ||
'Accept': 'application/vnd.github.v3+json', | ||
'User-Agent': 'Github Follow Script' | ||
} | ||
|
||
### mark the last user followed | ||
for user in users: | ||
url = f'https://api.github.com/user/following/{user}' | ||
try: | ||
response = requests.put(url, headers=headers) | ||
print(f"Response status code for {user}: {response.status_code}") | ||
write_last_followed_user(user) | ||
simple_counter() | ||
print("sleeping for 3 second") | ||
sleep(2) | ||
if response.status_code == 204: | ||
print(f"Successfully followed {user}") | ||
except requests.exceptions.RequestException as e: | ||
print(f"Error occurred while following {user}: {e}") | ||
|
||
### repeat the process - main loop | ||
|
||
def main(): | ||
while True: | ||
users = fething_users_from_github(100, github_token) | ||
write_users_to_file(users) | ||
logging.info(f"Following {len(users)} users.") | ||
logging.info(f"Waiting for 10 minutes...") | ||
time.sleep(600) | ||
### fetch 100 users from GitHub | ||
fetched_users = fetching_users_from_github(100, github_token) | ||
print(fetched_users) | ||
write_users_to_file(fetched_users) | ||
print("Users written to file") | ||
users = read_users_from_file() | ||
print("Users read from file") | ||
print(users) | ||
last_user = read_last_followed_user() | ||
print(f"Last followed user: {last_user}") | ||
last_user_index = users.index(last_user) | ||
print(f"Last user index: {last_user_index}") | ||
users_to_follow = users[last_user_index + 1:] | ||
print(f"Users to follow: {users_to_follow}") | ||
follow_users(users_to_follow) | ||
print("Users followed") | ||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
print("Done") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
requests | ||
aiohttp | ||
python-dotenv | ||
aiolimiter | ||
ratelimit |
Binary file added
BIN
+4.01 KB
scraper_usernames/__pycache__/scrape_it_from_github_user.cpython-311.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"last_followed_user": "yunjey", | ||
"how_many_bot_followed_so_far_counter": 205, | ||
"current_page": 3 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# state_manager.py - json operations with main and fetching_new_users | ||
|
||
import json | ||
|
||
STATE_FILE = 'state.json' | ||
|
||
def load_state(): | ||
with open(STATE_FILE, 'r') as file: | ||
state = json.load(file) | ||
return state | ||
|
||
def save_state(state): | ||
with open(STATE_FILE, 'w') as file: | ||
try: | ||
json.dump(state, file, indent=4) | ||
except Exception as e: | ||
print(f"Error occurred while saving state: {e}") |
Oops, something went wrong.