From abeb5e3994642b64854b74c11c70bdbaf90f0753 Mon Sep 17 00:00:00 2001 From: rhysd Date: Tue, 12 Dec 2017 18:18:22 +0900 Subject: [PATCH 1/7] reconstruct options of command - query is now positional and required - lang is removed - sensible default query is removed --- README.md | 36 ++++++++++++++++++++++++++---------- ghca/cli.go | 15 ++++++++++----- ghca/cli_test.go | 29 ++++++++++++++++++----------- main.go | 45 ++++++++++++++++++++++++++++----------------- 4 files changed, 82 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index ff936e3..bd15e49 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,22 @@ Clone matching repos on GitHub [![Windows Build Status][]][Appveyor] [![Coverage Status][]][Codecov] +``` +$ github-clone-all [flags] {query} +``` + `github-clone-all` is a small command to clone all repositories matching to given query and -language via [GitHub Search API][]. +language via [GitHub Search API][]. Query must not be empty. It clones many repositories in parallel. Please see `-help` option to know all flags. -Query is the same as GitHub search syntax. And 'stars>1 fork:false' is added by default for -sensible search results. - Repository is cloned to 'dest' directory. It is `$cwd/repos` by default and can be specified with `-dest` flag. And in order to reduce size of cloned repositories, `-extract` option is available. `-extract` only leaves files matching to given regular expression. Because of restriction of GitHub search API, max number of results is 1000. And you need to -gain GitHub API token in advance. `github-clone-all` will refer the token via `-token` flag or -`$GITHUB_TOKEN` environment variable. +gain GitHub API token in advance to avoid API rate limit. `github-clone-all` will refer the token +via `-token` flag or `$GITHUB_TOKEN` environment variable. + ## Installation @@ -27,14 +29,26 @@ Use `go get` or [released binaries](https://github.com/rhysd/github-clone-all/re $ go get github.com/rhysd/github-clone-all ``` + ## Example ``` -$ github-clone-all -token $GITHUB_TOKEN -lang vim -extract '(\.vim|vimrc)$' +$ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 +``` + +It clones first 1000 repositories into 'repos' directory in the current working directory. + +Query condition: +- language is 'vim' +- not a fork repo +- stars of repo is more than 1 + +If the token is set to `$GITHUB_TOKEN` environment variable, following should also work fine. + +``` +$ github-clone-all -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 ``` -It clones first 1000 repositories whose language is 'vim' into 'repos' directory in the current -working directory. ## How to get GitHub API token @@ -42,7 +56,8 @@ working directory. 2. Click 'Generate new token' 3. Add token description 4. Without checking any checkbox, click 'Generate token' -5. Key is shown in your tokens list +5. Generated token is shown at the top of your tokens list + ## Use github-clone-all programmatically @@ -53,6 +68,7 @@ functions of the tool. import "github.com/rhysd/github-clone-all/ghca" ``` + ## License [MIT license](LICENSE) diff --git a/ghca/cli.go b/ghca/cli.go index f5f405d..5c8b5ac 100644 --- a/ghca/cli.go +++ b/ghca/cli.go @@ -1,6 +1,7 @@ package ghca import ( + "errors" "fmt" "os" "path/filepath" @@ -34,14 +35,14 @@ func (c *CLI) Run() (err error) { return } -func NewCLI(t, q, l, d, e string) (*CLI, error) { +func NewCLI(t, q, d, e string) (*CLI, error) { var err error - if env := os.Getenv("GITHUB_TOKEN"); env != "" && t == "" { - t = env + if t == "" { + t = os.Getenv("GITHUB_TOKEN") } - if t == "" || l == "" { + if t == "" { return nil, fmt.Errorf("API token and language must be set. Please see -help for more detail") } @@ -61,6 +62,10 @@ func NewCLI(t, q, l, d, e string) (*CLI, error) { } } - q = fmt.Sprintf("%s language:%s fork:false", q, l) + q = strings.TrimSpace(q) + if q == "" { + return nil, errors.New("Query cannot be empty") + } + return &CLI{t, q, d, r}, nil } diff --git a/ghca/cli_test.go b/ghca/cli_test.go index c032fe4..ed26d55 100644 --- a/ghca/cli_test.go +++ b/ghca/cli_test.go @@ -7,7 +7,7 @@ import ( ) func TestNewCLI(t *testing.T) { - cli, err := NewCLI("token", "foo stars>1", "lang", "dest", "") + cli, err := NewCLI("token", "foo stars>1", "dest", "") if err != nil { t.Fatal(err) } @@ -26,7 +26,7 @@ func TestNewCLI(t *testing.T) { } func TestEmptyDest(t *testing.T) { - cli, err := NewCLI("token", "query", "lang", "", "") + cli, err := NewCLI("token", "query", "", "") if err != nil { t.Fatal(err) } @@ -37,23 +37,31 @@ func TestEmptyDest(t *testing.T) { } } -func TestEmptyTokenOrLang(t *testing.T) { +func TestEmptyToken(t *testing.T) { token := os.Getenv("GITHUB_TOKEN") os.Setenv("GITHUB_TOKEN", "") - if _, err := NewCLI("", "", "vim", "", ""); err == nil { + if _, err := NewCLI("", "query", "", ""); err == nil { t.Error("Empty token should raise an error") } + os.Setenv("GITHUB_TOKEN", token) +} - if _, err := NewCLI("", "foobar", "", "", ""); err == nil { - t.Error("Empty lang should raise an error") +func TestEmptyQuery(t *testing.T) { + for _, q := range []string{ + "", + " ", + " ", + } { + if _, err := NewCLI("token", q, "", ""); err == nil { + t.Errorf("Empty query should raise an error: '%s'", q) + } } - os.Setenv("GITHUB_TOKEN", token) } func TestGitHubTokenEnv(t *testing.T) { token := os.Getenv("GITHUB_TOKEN") os.Setenv("GITHUB_TOKEN", "foobar") - cli, err := NewCLI("", "", "vim", "", "") + cli, err := NewCLI("", "query", "", "") if err != nil { t.Error(err) } @@ -64,16 +72,15 @@ func TestGitHubTokenEnv(t *testing.T) { } func TestInvalidRegexp(t *testing.T) { - if _, err := NewCLI("token", "", "vim", "", "(foo"); err == nil { + if _, err := NewCLI("token", "query", "", "(foo"); err == nil { t.Error("Broken regexp must raise an error") } - } func TestMakeDest(t *testing.T) { defer os.Remove("repos") - cli, err := NewCLI("token", "", "lang", "", "") + cli, err := NewCLI("token", "query", "", "") if err != nil { t.Fatal(err) } diff --git a/main.go b/main.go index 8c3c0a9..e50acfb 100644 --- a/main.go +++ b/main.go @@ -7,23 +7,22 @@ import ( "io/ioutil" "log" "os" + "strings" ) -const usageHeader = `Usage: github-clone-all {Flags} +const usageHeader = `Usage: github-clone-all [Flags] {Query} github-clone-all is a command to clone all repositories matching to given - query and language via GitHub Search API. + query via GitHub Search API. Query must not be empty. It clones many repositories in parallel. - Query is the same as GitHub search syntax. And 'stars>1 fork:false' is - added by default for sensible search results. - Repository is cloned to 'dest' directory. It is $cwd/repos by default and can be specified with -dest flag. Because of restriction of GitHub search API, max number of results is 1000. - And you need to gain GitHub API token in advance. You can get the token as - following: + And you need to gain GitHub API token in advance to avoid API rate limit. + + You can get the token as following: 1. Visit https://github.com/settings/tokens in a browser 2. Click 'Generate new token' @@ -33,12 +32,24 @@ const usageHeader = `Usage: github-clone-all {Flags} ref: https://developer.github.com/v3/search/ + Example: - $ github-clone-all -token $GITHUB_TOKEN -lang vim -extract '(\.vim|vimrc)$' + $ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 + + It clones first 1000 repositories into 'repos' directory in the current + working directory. + + Query condition: + - language is 'vim' + - not a fork repo + - stars of repo is more than 1 + + If the token is set to $GITHUB_TOKEN environment variable, following should work + fine. + + $ github-clone-all -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 - It clones first 1000 repositories whose language is 'vim' into 'repos' - directory in the current working directory. Flags:` @@ -50,12 +61,10 @@ func usage() { func main() { help := flag.Bool("help", false, "Show this help") h := flag.Bool("h", false, "Show this help") - token := flag.String("token", "", "GitHub token to call GitHub API. $GITHUB_TOKEN environment variable is also referred (required)") - query := flag.String("query", "", "Additional query string to search (optional)") - lang := flag.String("lang", "", "Language name to search repos (required)") - dest := flag.String("dest", "", "Directory to store the downloaded files. By default 'repos' in current working directory (optional)") - extract := flag.String("extract", "", "Regular expression to extract files in each cloned repo (optional)") - quiet := flag.Bool("quiet", false, "Run quietly. Exit status is non-zero, it means error occurred (optional)") + token := flag.String("token", "", "GitHub token to call GitHub API. If this option is not specified, $GITHUB_TOKEN environment variable needs to be set") + dest := flag.String("dest", "", "Directory to store the downloaded files. By default 'repos' in current working directory") + extract := flag.String("extract", "", "Regular expression to extract files in each cloned repo") + quiet := flag.Bool("quiet", false, "Run quietly. Exit status is non-zero, it means error occurred") flag.Usage = usage flag.Parse() @@ -69,7 +78,9 @@ func main() { log.SetOutput(ioutil.Discard) } - cli, err := ghca.NewCLI(*token, *query, *lang, *dest, *extract) + query := strings.Join(flag.Args(), " ") + + cli, err := ghca.NewCLI(*token, query, *dest, *extract) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(3) From cebafb202d7bb196c790937eb98d6a6e9287719f Mon Sep 17 00:00:00 2001 From: rhysd Date: Wed, 13 Dec 2017 04:48:18 +0900 Subject: [PATCH 2/7] fix missing import --- ghca/cli.go | 1 + 1 file changed, 1 insertion(+) diff --git a/ghca/cli.go b/ghca/cli.go index 5c8b5ac..64b203b 100644 --- a/ghca/cli.go +++ b/ghca/cli.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "regexp" + "strings" ) type CLI struct { From 7765f82ca2ad0c9aa7761dae645906244b1e6531 Mon Sep 17 00:00:00 2001 From: rhysd Date: Wed, 13 Dec 2017 04:48:31 +0900 Subject: [PATCH 3/7] remove symlinks in repos --- Guardfile | 2 +- ghca/clone.go | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Guardfile b/Guardfile index cb6cd76..ced6f53 100644 --- a/Guardfile +++ b/Guardfile @@ -5,7 +5,7 @@ guard :shell do when /_test\.go$/ parent = File.dirname m[0] sources = Dir["#{parent}/*.go"].reject{|p| p.end_with? '_test.go'}.join(' ') - system "go test -v #{m[0]} #{sources}" + system "go test -v -short #{m[0]} #{sources}" else system 'go build' end diff --git a/ghca/clone.go b/ghca/clone.go index 6c7b8e4..4e8ec31 100644 --- a/ghca/clone.go +++ b/ghca/clone.go @@ -84,10 +84,8 @@ func (cl *Cloner) newWorker() { if info.IsDir() { return nil } - if !extract.MatchString(path) { - if err := os.Remove(path); err != nil { - return err - } + if (info.Mode()&os.ModeSymlink != 0) || !extract.MatchString(path) { + return os.Remove(path) } return nil }); err != nil { From a2af1b49a4412cadf3df1f5a252bbce6ae4616ee Mon Sep 17 00:00:00 2001 From: rhysd Date: Wed, 13 Dec 2017 04:55:04 +0900 Subject: [PATCH 4/7] introduce short test and fix some test cases --- ghca/cli_test.go | 4 ++-- ghca/clone_test.go | 4 ++++ ghca/collect_test.go | 8 ++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ghca/cli_test.go b/ghca/cli_test.go index ed26d55..6d353a4 100644 --- a/ghca/cli_test.go +++ b/ghca/cli_test.go @@ -14,7 +14,7 @@ func TestNewCLI(t *testing.T) { if cli.token != "token" { t.Error("Unexpected token", cli.token) } - if cli.query != "foo stars>1 language:lang fork:false" { + if cli.query != "foo stars>1" { t.Error("Unexpected query", cli.query) } if cli.dest != "dest" { @@ -111,7 +111,7 @@ func TestDestAlreadyExistAsFile(t *testing.T) { if err := f.Close(); err != nil { t.Fatal(err) } - cli, err := NewCLI("token", "", "lang", "", "") + cli, err := NewCLI("token", "query", "", "") if err != nil { t.Fatal(err) } diff --git a/ghca/clone_test.go b/ghca/clone_test.go index 07b8132..983bfc9 100644 --- a/ghca/clone_test.go +++ b/ghca/clone_test.go @@ -26,6 +26,10 @@ func TestNewCloner(t *testing.T) { } func testRepos(repos []string, t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode.") + } + c := NewCloner("test", nil) defer func() { os.RemoveAll("test") diff --git a/ghca/collect_test.go b/ghca/collect_test.go index 8d22e8a..469356b 100644 --- a/ghca/collect_test.go +++ b/ghca/collect_test.go @@ -42,6 +42,10 @@ func TestNewCollectorWithConfig(t *testing.T) { } func TestCollectReposTotalIsAFew(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode") + } + token := os.Getenv("GITHUB_TOKEN") if token == "" { t.Skip("Skipping because API token not found") @@ -72,6 +76,10 @@ func TestCollectReposTotalIsAFew(t *testing.T) { } func TestCollectReposTotalIsLarge(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode") + } + token := os.Getenv("GITHUB_TOKEN") if token == "" { t.Skip("Skipping because API token not found") From 051b27a58fbc99a7c9ad83c67e06f752325a817b Mon Sep 17 00:00:00 2001 From: rhysd Date: Wed, 13 Dec 2017 05:33:48 +0900 Subject: [PATCH 5/7] fix doc and add notice for quoting --- README.md | 8 ++++++-- main.go | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index bd15e49..bef2856 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,10 @@ Because of restriction of GitHub search API, max number of results is 1000. And gain GitHub API token in advance to avoid API rate limit. `github-clone-all` will refer the token via `-token` flag or `$GITHUB_TOKEN` environment variable. +All arguments in {Query} are regarded as query. For example, `github-clone-all foo bar` will search +`foo bar`. But quoting the query is recommended to avoid conflicting with shell special characters +as `github-clone-all 'foo bar'`. + ## Installation @@ -33,7 +37,7 @@ $ go get github.com/rhysd/github-clone-all ## Example ``` -$ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 +$ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' language:vim fork:false stars:>1 ``` It clones first 1000 repositories into 'repos' directory in the current working directory. @@ -46,7 +50,7 @@ Query condition: If the token is set to `$GITHUB_TOKEN` environment variable, following should also work fine. ``` -$ github-clone-all -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 +$ github-clone-all -extract '(\.vim|vimrc)$' language:vim fork:false stars:>1 ``` diff --git a/main.go b/main.go index e50acfb..9589487 100644 --- a/main.go +++ b/main.go @@ -22,6 +22,16 @@ const usageHeader = `Usage: github-clone-all [Flags] {Query} Because of restriction of GitHub search API, max number of results is 1000. And you need to gain GitHub API token in advance to avoid API rate limit. + All arguments in {Query} are regarded as query. + For example, + + $ github-clone-all foo bar + + will search 'foo bar'. But quoting the query is recommended to avoid + conflicting with shell special characters as following: + + $ github-clone-all 'foo bar' + You can get the token as following: 1. Visit https://github.com/settings/tokens in a browser @@ -35,7 +45,7 @@ const usageHeader = `Usage: github-clone-all [Flags] {Query} Example: - $ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 + $ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' 'language:vim fork:false stars:>1' It clones first 1000 repositories into 'repos' directory in the current working directory. @@ -45,10 +55,10 @@ Example: - not a fork repo - stars of repo is more than 1 - If the token is set to $GITHUB_TOKEN environment variable, following should work - fine. + If the token is set to $GITHUB_TOKEN environment variable, following should + also work fine. - $ github-clone-all -extract '(\.vim|vimrc)$' language:vim fork:false stars>1 + $ github-clone-all -extract '(\.vim|vimrc)$' 'language:vim fork:false stars:>1' Flags:` From c52b4e14e780c6fde8ab9a43c582e9b9e5a0183a Mon Sep 17 00:00:00 2001 From: rhysd Date: Wed, 13 Dec 2017 06:13:15 +0900 Subject: [PATCH 6/7] refer number of cores to determine number of workers --- ghca/clone.go | 5 ++++- ghca/collect.go | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ghca/clone.go b/ghca/clone.go index 4e8ec31..9ee49c1 100644 --- a/ghca/clone.go +++ b/ghca/clone.go @@ -7,6 +7,7 @@ import ( "os/exec" "path/filepath" "regexp" + "runtime" "sync" ) @@ -103,7 +104,9 @@ func (cl *Cloner) newWorker() { } func (cl *Cloner) Start() { - for i := 0; i < maxConcurrency; i++ { + para := runtime.NumCPU() - 1 + log.Println("Start to clone with", para, "workers") + for i := 0; i < para; i++ { cl.newWorker() } } diff --git a/ghca/collect.go b/ghca/collect.go index 1644728..e9b404a 100644 --- a/ghca/collect.go +++ b/ghca/collect.go @@ -38,6 +38,7 @@ func (col *Collector) searchRepos() (*github.RepositoriesSearchResult, error) { func (col *Collector) Collect() (int, int, error) { log.Println("Searching GitHub repositories with query:", col.Query) + start := time.Now() cloner := NewCloner(col.Dest, col.Extract) cloner.Start() @@ -74,7 +75,7 @@ func (col *Collector) Collect() (int, int, error) { cloner.Shutdown() - log.Println(count, "repositories were cloned into", col.Dest, "for total", total, "search results") + log.Printf("%d repositories were cloned into '%s' for total %d search results (%f seconds)\n", count, col.Dest, total, time.Now().Sub(start).Seconds()) return count, total, nil } From da12798d134c788cf17879842cd777f9d9a032c4 Mon Sep 17 00:00:00 2001 From: rhysd Date: Wed, 13 Dec 2017 06:15:28 +0900 Subject: [PATCH 7/7] tweak help docs --- README.md | 2 +- main.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bef2856..42477fa 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Because of restriction of GitHub search API, max number of results is 1000. And gain GitHub API token in advance to avoid API rate limit. `github-clone-all` will refer the token via `-token` flag or `$GITHUB_TOKEN` environment variable. -All arguments in {Query} are regarded as query. For example, `github-clone-all foo bar` will search +All arguments in {query} are regarded as query. For example, `github-clone-all foo bar` will search `foo bar`. But quoting the query is recommended to avoid conflicting with shell special characters as `github-clone-all 'foo bar'`. diff --git a/main.go b/main.go index 9589487..c844af0 100644 --- a/main.go +++ b/main.go @@ -10,7 +10,7 @@ import ( "strings" ) -const usageHeader = `Usage: github-clone-all [Flags] {Query} +const usageHeader = `USAGE: github-clone-all [FLAGS] {query} github-clone-all is a command to clone all repositories matching to given query via GitHub Search API. Query must not be empty. @@ -22,7 +22,7 @@ const usageHeader = `Usage: github-clone-all [Flags] {Query} Because of restriction of GitHub search API, max number of results is 1000. And you need to gain GitHub API token in advance to avoid API rate limit. - All arguments in {Query} are regarded as query. + All arguments in {query} are regarded as query. For example, $ github-clone-all foo bar @@ -43,7 +43,7 @@ const usageHeader = `Usage: github-clone-all [Flags] {Query} ref: https://developer.github.com/v3/search/ -Example: +EXAMPLE: $ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' 'language:vim fork:false stars:>1' @@ -61,7 +61,7 @@ Example: $ github-clone-all -extract '(\.vim|vimrc)$' 'language:vim fork:false stars:>1' -Flags:` +FLAGS:` func usage() { fmt.Fprintln(os.Stderr, usageHeader)