diff --git a/.gitignore b/.gitignore index 278da0a..5c83b2b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ .env follow_users.log /scraper_usernames/.env -fething_new_users.ipynb -check_pages.ipynb \ No newline at end of file +fething_new_users.ipynb \ No newline at end of file diff --git a/__pycache__/fetching_new_users.cpython-311.pyc b/__pycache__/fetching_new_users.cpython-311.pyc index d4c12bf..64bf2ab 100644 Binary files a/__pycache__/fetching_new_users.cpython-311.pyc and b/__pycache__/fetching_new_users.cpython-311.pyc differ diff --git a/__pycache__/state_manager.cpython-311.pyc b/__pycache__/state_manager.cpython-311.pyc new file mode 100644 index 0000000..75f95d6 Binary files /dev/null and b/__pycache__/state_manager.cpython-311.pyc differ diff --git a/check_pages.ipynb b/check_pages.ipynb deleted file mode 100644 index 54f657b..0000000 --- a/check_pages.ipynb +++ /dev/null @@ -1,37 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "initial_id", - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/counter.txt b/counter.txt deleted file mode 100644 index e69de29..0000000 diff --git a/fetching_new_users.py b/fetching_new_users.py index 450ab3e..7a569fe 100644 --- a/fetching_new_users.py +++ b/fetching_new_users.py @@ -1,38 +1,47 @@ +# fetching_new_users.py = only getting 100 new users for each iteration + import os import requests import dotenv - +from typing import List +from state_manager import load_state, save_state dotenv.load_dotenv() -github_token = os.getenv("GITHUB_TOKEN")# your github token +github_token = os.getenv("GITHUB_TOKEN") # your github token + +def fetching_users_from_github(users_to_fetch=100, token=None) -> List[str]: -def fething_users_from_github(users_to_fetch=1, token=None) -> list: - scraped_users = [] + state = load_state() + current_page = state.get('current_page', 0) querry = 'language:python repos:>5 followers:>10' url = "https://api.github.com/search/users" params = { 'per_page': users_to_fetch, - 'since': 0, + 'page': current_page, 'q': querry } headers = { - 'Authorization': token + 'Authorization': token, + 'Accept': 'application/vnd.github.v3+json', + 'User-Agent': 'Github Follow Script' } try: response = requests.get(url, params=params, headers=headers) response.raise_for_status() - users = response.json().get('items', []) + data = response.json() + fetched_users_api = data.get('items', []) + fetched_users = [user['login'] for user in fetched_users_api] + state['current_page'] = current_page + 1 + save_state(state) - for user in users: - scraped_users.append(user['login']) except requests.exceptions.HTTPError as e: print(f"Error: {e}") except requests.exceptions.RequestException as e: print(f"Error: {e}") - return scraped_users + return fetched_users diff --git a/last_followed_user.txt b/last_followed_user.txt index e69de29..4781be6 100644 --- a/last_followed_user.txt +++ b/last_followed_user.txt @@ -0,0 +1 @@ +3b1b \ No newline at end of file diff --git a/main.py b/main.py index 6776ca1..0bbe74e 100644 --- a/main.py +++ b/main.py @@ -1,39 +1,98 @@ import os from datetime import time - +from time import sleep import requests -from fetching_new_users import fething_users_from_github +from fetching_new_users import fetching_users_from_github import logging import dotenv +import json +from state_manager import load_state, save_state dotenv.load_dotenv() USERNAMES_FILE = 'usernames.txt' -LAST_LINE_FILE = 'last_line.txt' github_token = os.getenv("GITHUB_TOKEN")# your github token -### fetch 100 users from github -users = fething_users_from_github(100, github_token) +### read the users from the file +def read_users_from_file(): + with open(USERNAMES_FILE, 'r') as file: + users = file.readlines() + return [user.strip() for user in users] + ### write the users to a file def write_users_to_file(users): - with open(USERNAMES_FILE, 'w') as file: + with open(USERNAMES_FILE, 'a') as file: + existing_users = read_users_from_file() for user in users: - file.write(f"{user}\n") + if user not in existing_users: + file.write(f"{user}\n") + + +### keep track of last followed user +def read_last_followed_user(): + state = load_state() + return state.get('last_followed_user', None) + + +### write the last followed user to a file +def write_last_followed_user(user): + state = load_state() + state['last_followed_user'] = user + save_state(state) + + +def simple_counter(): + state = load_state() + state['how_many_bot_followed_so_far_counter'] = state.get('how_many_bot_followed_so_far_counter', 0) + 1 + save_state(state) -### read the users from the file ### follow the users +def follow_users(users): + + headers = { + 'Authorization': f'token {github_token}', + 'Accept': 'application/vnd.github.v3+json', + 'User-Agent': 'Github Follow Script' + } -### mark the last user followed + for user in users: + url = f'https://api.github.com/user/following/{user}' + try: + response = requests.put(url, headers=headers) + print(f"Response status code for {user}: {response.status_code}") + write_last_followed_user(user) + simple_counter() + print("sleeping for 3 second") + sleep(2) + if response.status_code == 204: + print(f"Successfully followed {user}") + except requests.exceptions.RequestException as e: + print(f"Error occurred while following {user}: {e}") -### repeat the process - main loop def main(): - while True: - users = fething_users_from_github(100, github_token) - write_users_to_file(users) - logging.info(f"Following {len(users)} users.") - logging.info(f"Waiting for 10 minutes...") - time.sleep(600) \ No newline at end of file + ### fetch 100 users from GitHub + fetched_users = fetching_users_from_github(100, github_token) + print(fetched_users) + write_users_to_file(fetched_users) + print("Users written to file") + users = read_users_from_file() + print("Users read from file") + print(users) + last_user = read_last_followed_user() + print(f"Last followed user: {last_user}") + last_user_index = users.index(last_user) + print(f"Last user index: {last_user_index}") + users_to_follow = users[last_user_index + 1:] + print(f"Users to follow: {users_to_follow}") + follow_users(users_to_follow) + print("Users followed") + + +if __name__ == '__main__': + main() + print("Done") + diff --git a/requirements.txt b/requirements.txt index 06add41..97a54f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ requests -aiohttp python-dotenv -aiolimiter \ No newline at end of file +ratelimit \ No newline at end of file diff --git a/scraper_usernames/__pycache__/scrape_it_from_github_user.cpython-311.pyc b/scraper_usernames/__pycache__/scrape_it_from_github_user.cpython-311.pyc new file mode 100644 index 0000000..351eaaa Binary files /dev/null and b/scraper_usernames/__pycache__/scrape_it_from_github_user.cpython-311.pyc differ diff --git a/scraper_usernames/scrape_it_from_github_user.py b/scraper_usernames/scrape_it_from_github_user.py index 47d810a..07a86d2 100644 --- a/scraper_usernames/scrape_it_from_github_user.py +++ b/scraper_usernames/scrape_it_from_github_user.py @@ -6,10 +6,9 @@ from dotenv import load_dotenv load_dotenv() - # Replace with your GitHub username and token -username = dotenv.get("USERNAME") # -token = dotenv.get("GITHUB_TOKEN") # +username = os.getenv("USERNAME") # +token = os.getenv("GITHUB_TOKEN") # # GitHub API endpoint and headers api_url = f"https://api.github.com/users/{username}/following" diff --git a/state.json b/state.json new file mode 100644 index 0000000..12edace --- /dev/null +++ b/state.json @@ -0,0 +1,5 @@ +{ + "last_followed_user": "yunjey", + "how_many_bot_followed_so_far_counter": 205, + "current_page": 3 +} \ No newline at end of file diff --git a/state_manager.py b/state_manager.py new file mode 100644 index 0000000..7a80c6a --- /dev/null +++ b/state_manager.py @@ -0,0 +1,17 @@ +# state_manager.py - json operations with main and fetching_new_users + +import json + +STATE_FILE = 'state.json' + +def load_state(): + with open(STATE_FILE, 'r') as file: + state = json.load(file) + return state + +def save_state(state): + with open(STATE_FILE, 'w') as file: + try: + json.dump(state, file, indent=4) + except Exception as e: + print(f"Error occurred while saving state: {e}") diff --git a/usernames.txt b/usernames.txt index e69de29..b005dc6 100644 --- a/usernames.txt +++ b/usernames.txt @@ -0,0 +1,200 @@ +karpathy +openai +rafaballerini +google +geohot +huggingface +michaelliao +llSourcell +taylorotwell +3b1b +ry +krishnaik06 +kennethreitz +buckyroberts +tiangolo +facebookresearch +rasbt +jwasham +gvanrossum +python +techwithtim +mitsuhiko +MorvanZhou +donnemartin +elyxdev +Visualize-ML +BEPb +jakevdp +liyupi +fchollet +tensorflow +iam-veeramalla +chiphuyen +wesm +ageron +lllyasviel +goodfeli +fengdu78 +breakwa11 +angusshire +miguelgrinberg +leerob +aws-samples +Stability-AI +JohnHammond +GoogleCloudPlatform +jrohitofficial +amueller +htr-tech +eust-w +mnielsen +t3dotgg +flipperdevices +bojone +jhaddix +google-research +home-assistant +jvns +alex +Sentdex +AllenDowney +replicate +pjreddie +noob-hackers +pytorch +george0st +Jack-Cherish +colah +abhishekkrthakur +jcjohnson +THUDM +ChristianLempa +521xueweihan +CleverProgrammer +s0md3v +carpedm20 +NeuralNine +lepture +evilsocket +swisskyrepo +lilianweng +EbookFoundation +vinta +django +Germey +drkostas +UberGuidoZ +davidbombal +awslabs +ip681 +rougier +phith0n +simonw +byt3bl33d3r +divanov11 +ethanflower1903 +rwightman +Rafaelmdcarneiro +labuladong +rhiever +giswqs +Lightning-AI +Fernanda-Kipper +LingDong- +qiwsir +ultralytics +PySimpleGUI +cmlsharp +yihong0618 +ashishps1 +daveshap +Neo23x0 +eliben +avelino +fogleman +sebastianruder +sobolevn +asottile +lmstudio-ai +hak5darren +lidangzzz +vakila +KevinHock +aymericdamien +murtazahassan +dabeaz +huangzworks +SkalskiP +alanhamlett +jindongwang +Uniswap +rfyiamcool +samuelcolvin +NationalSecurityAgency +wepe +hardmaru +asweigart +ogrisel +meta-llama +joelgrus +mistralai +CamDavidsonPilon +gakonst +acantril +marcan +Ignitetechnologies +oobabooga +arpitbbhayani +pandas-dev +odoo +inconvergent +lazyprogrammer +hmason +rochacbruno +iamtrask +cclauss +yeasy +kovidgoyal +orangetw +pydanny +AtsushiSakai +scikit-learn +OpenBMB +ageitgey +karan +jlevy +lodhik9 +ring04h +ramalho +anyproto +josephmisiti +HackerPoet +blackmatrix7 +hunkim +bregman-arie +developerrahulofficial +willmcgugan +dunossauro +zedshaw +LAION-AI +cypherm0nk +0dayCTF +rhettinger +progrium +tomchristie +Borda +adamchainz +wangshub +Significant-Gravitas +lijiejie +crowsonkb +jalammar +ymcui +Akkariiin +x0rz +formidablae +xinntao +brightmart +omarsar +yunjey