Skip to content

Commit

Permalink
refactor start to full automation
Browse files Browse the repository at this point in the history
  • Loading branch information
OfficialCodeVoyage committed Oct 17, 2024
1 parent 55555bd commit f4cdec0
Show file tree
Hide file tree
Showing 10 changed files with 571,972 additions and 285,950 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.env
follow_users.log
/scraper_usernames/.env
fething_new_users.ipynb
Binary file added __pycache__/fetching_new_users.cpython-311.pyc
Binary file not shown.
38 changes: 38 additions & 0 deletions fetching_new_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import requests
import dotenv


dotenv.load_dotenv()
github_token = os.getenv("GITHUB_TOKEN")# your github token


def fething_users_from_github(users_to_fetch=1, token=None) -> list:
scraped_users = []

querry = 'language:python repos:>5 followers:>10'
url = "https://api.github.com/search/users"
params = {
'per_page': users_to_fetch,
'since': 0,
'q': querry

}
headers = {
'Authorization': token
}

try:
response = requests.get(url, params=params, headers=headers)
response.raise_for_status()
users = response.json().get('items', [])

for user in users:
scraped_users.append(user['login'])

except requests.exceptions.HTTPError as e:
print(f"Error: {e}")
except requests.exceptions.RequestException as e:
print(f"Error: {e}")

return scraped_users
112 changes: 112 additions & 0 deletions fething_new_users.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-16T23:58:18.707985Z",
"start_time": "2024-10-16T23:58:18.703958Z"
}
},
"cell_type": "code",
"source": [
"import os\n",
"import requests\n",
"\n",
"github_token = \"#\"## your github token"
],
"id": "b91c72b336bd44c8",
"outputs": [],
"execution_count": 48
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-17T01:40:59.610303Z",
"start_time": "2024-10-17T01:40:59.604896Z"
}
},
"cell_type": "code",
"source": [
"def fething_users_from_github(users_to_fetch = 1, token = None) -> list:\n",
" scraped_users = []\n",
" \n",
" querry = 'language:python repos:>5 followers:>10'\n",
" url = \"https://api.github.com/search/users\"\n",
" params = {\n",
" 'per_page' : users_to_fetch,\n",
" 'since' : 0,\n",
" 'q' : querry\n",
" \n",
" }\n",
" headers = {\n",
" 'Authorization' : token\n",
" }\n",
" \n",
" try:\n",
" response = requests.get(url, params=params, headers=headers)\n",
" response.raise_for_status()\n",
" users = response.json().get('items', [])\n",
" \n",
" for user in users:\n",
" scraped_users.append(user['login'])\n",
"\n",
" except requests.exceptions.HTTPError as e:\n",
" print(f\"Error: {e}\")\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"Error: {e}\")\n",
"\n",
" return scraped_users"
],
"id": "a4f14605767e6f53",
"outputs": [],
"execution_count": 77
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-17T01:41:01.602561Z",
"start_time": "2024-10-17T01:41:00.955263Z"
}
},
"cell_type": "code",
"source": [
"users = fething_users_from_github(50)\n",
"print(users)\n",
"print(len(users))"
],
"id": "e781948fa8616f68",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['karpathy', 'openai', 'rafaballerini', 'google', 'geohot', 'huggingface', 'michaelliao', 'llSourcell', 'taylorotwell', '3b1b', 'ry', 'krishnaik06', 'kennethreitz', 'buckyroberts', 'tiangolo', 'facebookresearch', 'rasbt', 'jwasham', 'gvanrossum', 'python', 'techwithtim', 'mitsuhiko', 'MorvanZhou', 'donnemartin', 'elyxdev', 'Visualize-ML', 'BEPb', 'jakevdp', 'liyupi', 'fchollet', 'tensorflow', 'iam-veeramalla', 'chiphuyen', 'wesm', 'ageron', 'lllyasviel', 'goodfeli', 'fengdu78', 'breakwa11', 'angusshire', 'miguelgrinberg', 'leerob', 'aws-samples', 'Stability-AI', 'JohnHammond', 'GoogleCloudPlatform', 'jrohitofficial', 'amueller', 'htr-tech', 'mnielsen']\n",
"50\n"
]
}
],
"execution_count": 78
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
143 changes: 27 additions & 116 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,128 +1,39 @@
import asyncio
import aiohttp
import time
import os
import logging
from typing import List
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Configure logging
logging.basicConfig(
level=logging.INFO, # Set to INFO for general logs; use DEBUG for more verbosity
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler("follow_users.log"),
logging.StreamHandler()
]
)

# Constants
USERNAMES_FILE = os.getenv('USERNAMES_FILE', 'usernames.txt')
LAST_LINE_FILE = os.getenv('LAST_LINE_FILE', 'last_line.txt')
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')

if not GITHUB_TOKEN:
logging.error("GitHub token not found. Please set GITHUB_TOKEN in the environment variables.")
exit(1)
from datetime import time

# Semaphore to limit concurrent requests (set to 1 for sequential processing)
SEM = asyncio.Semaphore(1)

# Function to read usernames from a file
def read_usernames(file_path: str) -> List[str]:
try:
with open(file_path, 'r') as file:
usernames = [line.strip() for line in file if line.strip()]
logging.info(f"Loaded {len(usernames)} usernames from '{file_path}'.")
return usernames
except FileNotFoundError:
logging.error(f"Usernames file '{file_path}' not found.")
exit(1)
except Exception as e:
logging.exception(f"An error occurred while reading '{file_path}': {e}")
exit(1)

# Function to read the last processed line number
def read_last_line(file_path: str) -> int:
if os.path.exists(file_path):
try:
with open(file_path, 'r') as file:
last_line = int(file.read().strip())
logging.info(f"Resuming from line {last_line + 1}.")
return last_line
except ValueError:
logging.warning(f"Invalid content in '{file_path}'. Starting from the beginning.")
return 0
except Exception as e:
logging.exception(f"An error occurred while reading '{file_path}': {e}")
return 0
logging.info(f"No last line file found. Starting from the beginning.")
return 0

# Function to write the last processed line number
def write_last_line(file_path: str, line_number: int) -> None:
try:
with open(file_path, 'w') as file:
file.write(str(line_number))
logging.debug(f"Updated last line to {line_number} in '{file_path}'.")
except Exception as e:
logging.exception(f"An error occurred while writing to '{file_path}': {e}")
import requests
from fetching_new_users import fething_users_from_github
import logging
import dotenv

# Asynchronous function to follow a user on GitHub
async def follow_user(session: aiohttp.ClientSession, username: str, line_number: int) -> None:
url = f'https://api.github.com/user/following/{username}'
async with SEM: # Ensure sequential processing
try:
async with session.put(url) as response:
status = response.status
text = await response.text()

if status == 204:
logging.info(f"Line {line_number + 1}: Successfully followed '{username}'.")
elif status == 404:
logging.warning(f"Line {line_number + 1}: User '{username}' not found.")
elif status == 403 or status == 429:
logging.error(f"Line {line_number + 1}: Rate limit exceeded or forbidden access.")
else:
logging.error(f"Line {line_number + 1}: Failed to follow '{username}': {status}, {text}")
dotenv.load_dotenv()
USERNAMES_FILE = 'usernames.txt'
LAST_LINE_FILE = 'last_line.txt'
github_token = os.getenv("GITHUB_TOKEN")# your github token

except Exception as e:
logging.exception(f"Line {line_number + 1}: Error following user '{username}': {e}")
### fetch 100 users from github

# Main asynchronous function
async def main():
usernames = read_usernames(USERNAMES_FILE)
last_line = read_last_line(LAST_LINE_FILE)
total_usernames = len(usernames)
logging.info(f"Starting to follow users from line {last_line + 1} to {total_usernames}.")
users = fething_users_from_github(100, github_token)

headers = {
'Authorization': f'token {GITHUB_TOKEN}',
'Accept': 'application/vnd.github.v3+json',
'User-Agent': 'GitHub-Follow-Script' # Replace with your application's name
}
### write the users to a file
def write_users_to_file(users):
with open(USERNAMES_FILE, 'w') as file:
for user in users:
file.write(f"{user}\n")

async with aiohttp.ClientSession(headers=headers) as session:
for i, username in enumerate(usernames[last_line:], start=last_line):
await follow_user(session, username, i)
### read the users from the file

# Wait for 10 seconds before processing the next user
if i < total_usernames - 1:
#logging.info("Waiting for 10 seconds before following the next user...")
await asyncio.sleep(10)
### follow the users

# Update the last processed line
write_last_line(LAST_LINE_FILE, i + 1)
### mark the last user followed

logging.info("Finished processing all usernames.")
### repeat the process - main loop

if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
logging.info("Script interrupted by user.")
except Exception as e:
logging.exception(f"An unexpected error occurred: {e}")
def main():
while True:
users = fething_users_from_github(100, github_token)
write_users_to_file(users)
logging.info(f"Following {len(users)} users.")
logging.info(f"Waiting for 10 minutes...")
time.sleep(600)
Loading

0 comments on commit f4cdec0

Please sign in to comment.