Skip to content

Commit

Permalink
Merge branch 'v2'
Browse files Browse the repository at this point in the history
  • Loading branch information
rhysd committed Dec 12, 2017
2 parents 021af7b + da12798 commit 92b70bb
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 54 deletions.
2 changes: 1 addition & 1 deletion Guardfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ guard :shell do
when /_test\.go$/
parent = File.dirname m[0]
sources = Dir["#{parent}/*.go"].reject{|p| p.end_with? '_test.go'}.join(' ')
system "go test -v #{m[0]} #{sources}"
system "go test -v -short #{m[0]} #{sources}"
else
system 'go build'
end
Expand Down
40 changes: 30 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,26 @@ Clone matching repos on GitHub
[![Windows Build Status][]][Appveyor]
[![Coverage Status][]][Codecov]

```
$ github-clone-all [flags] {query}
```

`github-clone-all` is a small command to clone all repositories matching to given query and
language via [GitHub Search API][].
language via [GitHub Search API][]. Query must not be empty.
It clones many repositories in parallel. Please see `-help` option to know all flags.

Query is the same as GitHub search syntax. And 'stars>1 fork:false' is added by default for
sensible search results.

Repository is cloned to 'dest' directory. It is `$cwd/repos` by default and can be specified with
`-dest` flag. And in order to reduce size of cloned repositories, `-extract` option is available.
`-extract` only leaves files matching to given regular expression.

Because of restriction of GitHub search API, max number of results is 1000. And you need to
gain GitHub API token in advance. `github-clone-all` will refer the token via `-token` flag or
`$GITHUB_TOKEN` environment variable.
gain GitHub API token in advance to avoid API rate limit. `github-clone-all` will refer the token
via `-token` flag or `$GITHUB_TOKEN` environment variable.

All arguments in {query} are regarded as query. For example, `github-clone-all foo bar` will search
`foo bar`. But quoting the query is recommended to avoid conflicting with shell special characters
as `github-clone-all 'foo bar'`.


## Installation

Expand All @@ -27,22 +33,35 @@ Use `go get` or [released binaries](https://github.com/rhysd/github-clone-all/re
$ go get github.com/rhysd/github-clone-all
```


## Example

```
$ github-clone-all -token $GITHUB_TOKEN -lang vim -extract '(\.vim|vimrc)$'
$ github-clone-all -token xxxxxxxx -extract '(\.vim|vimrc)$' language:vim fork:false stars:>1
```

It clones first 1000 repositories into 'repos' directory in the current working directory.

Query condition:
- language is 'vim'
- not a fork repo
- stars of repo is more than 1

If the token is set to `$GITHUB_TOKEN` environment variable, following should also work fine.

```
$ github-clone-all -extract '(\.vim|vimrc)$' language:vim fork:false stars:>1
```

It clones first 1000 repositories whose language is 'vim' into 'repos' directory in the current
working directory.

## How to get GitHub API token

1. Visit https://github.com/settings/tokens in a browser
2. Click 'Generate new token'
3. Add token description
4. Without checking any checkbox, click 'Generate token'
5. Key is shown in your tokens list
5. Generated token is shown at the top of your tokens list


## Use github-clone-all programmatically

Expand All @@ -53,6 +72,7 @@ functions of the tool.
import "github.com/rhysd/github-clone-all/ghca"
```


## License

[MIT license](LICENSE)
Expand Down
16 changes: 11 additions & 5 deletions ghca/cli.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package ghca

import (
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
)

type CLI struct {
Expand Down Expand Up @@ -34,14 +36,14 @@ func (c *CLI) Run() (err error) {
return
}

func NewCLI(t, q, l, d, e string) (*CLI, error) {
func NewCLI(t, q, d, e string) (*CLI, error) {
var err error

if env := os.Getenv("GITHUB_TOKEN"); env != "" && t == "" {
t = env
if t == "" {
t = os.Getenv("GITHUB_TOKEN")
}

if t == "" || l == "" {
if t == "" {
return nil, fmt.Errorf("API token and language must be set. Please see -help for more detail")
}

Expand All @@ -61,6 +63,10 @@ func NewCLI(t, q, l, d, e string) (*CLI, error) {
}
}

q = fmt.Sprintf("%s language:%s fork:false", q, l)
q = strings.TrimSpace(q)
if q == "" {
return nil, errors.New("Query cannot be empty")
}

return &CLI{t, q, d, r}, nil
}
33 changes: 20 additions & 13 deletions ghca/cli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ import (
)

func TestNewCLI(t *testing.T) {
cli, err := NewCLI("token", "foo stars>1", "lang", "dest", "")
cli, err := NewCLI("token", "foo stars>1", "dest", "")
if err != nil {
t.Fatal(err)
}
if cli.token != "token" {
t.Error("Unexpected token", cli.token)
}
if cli.query != "foo stars>1 language:lang fork:false" {
if cli.query != "foo stars>1" {
t.Error("Unexpected query", cli.query)
}
if cli.dest != "dest" {
Expand All @@ -26,7 +26,7 @@ func TestNewCLI(t *testing.T) {
}

func TestEmptyDest(t *testing.T) {
cli, err := NewCLI("token", "query", "lang", "", "")
cli, err := NewCLI("token", "query", "", "")
if err != nil {
t.Fatal(err)
}
Expand All @@ -37,23 +37,31 @@ func TestEmptyDest(t *testing.T) {
}
}

func TestEmptyTokenOrLang(t *testing.T) {
func TestEmptyToken(t *testing.T) {
token := os.Getenv("GITHUB_TOKEN")
os.Setenv("GITHUB_TOKEN", "")
if _, err := NewCLI("", "", "vim", "", ""); err == nil {
if _, err := NewCLI("", "query", "", ""); err == nil {
t.Error("Empty token should raise an error")
}
os.Setenv("GITHUB_TOKEN", token)
}

if _, err := NewCLI("", "foobar", "", "", ""); err == nil {
t.Error("Empty lang should raise an error")
func TestEmptyQuery(t *testing.T) {
for _, q := range []string{
"",
" ",
" ",
} {
if _, err := NewCLI("token", q, "", ""); err == nil {
t.Errorf("Empty query should raise an error: '%s'", q)
}
}
os.Setenv("GITHUB_TOKEN", token)
}

func TestGitHubTokenEnv(t *testing.T) {
token := os.Getenv("GITHUB_TOKEN")
os.Setenv("GITHUB_TOKEN", "foobar")
cli, err := NewCLI("", "", "vim", "", "")
cli, err := NewCLI("", "query", "", "")
if err != nil {
t.Error(err)
}
Expand All @@ -64,16 +72,15 @@ func TestGitHubTokenEnv(t *testing.T) {
}

func TestInvalidRegexp(t *testing.T) {
if _, err := NewCLI("token", "", "vim", "", "(foo"); err == nil {
if _, err := NewCLI("token", "query", "", "(foo"); err == nil {
t.Error("Broken regexp must raise an error")
}

}

func TestMakeDest(t *testing.T) {
defer os.Remove("repos")

cli, err := NewCLI("token", "", "lang", "", "")
cli, err := NewCLI("token", "query", "", "")
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -104,7 +111,7 @@ func TestDestAlreadyExistAsFile(t *testing.T) {
if err := f.Close(); err != nil {
t.Fatal(err)
}
cli, err := NewCLI("token", "", "lang", "", "")
cli, err := NewCLI("token", "query", "", "")
if err != nil {
t.Fatal(err)
}
Expand Down
11 changes: 6 additions & 5 deletions ghca/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"os/exec"
"path/filepath"
"regexp"
"runtime"
"sync"
)

Expand Down Expand Up @@ -84,10 +85,8 @@ func (cl *Cloner) newWorker() {
if info.IsDir() {
return nil
}
if !extract.MatchString(path) {
if err := os.Remove(path); err != nil {
return err
}
if (info.Mode()&os.ModeSymlink != 0) || !extract.MatchString(path) {
return os.Remove(path)
}
return nil
}); err != nil {
Expand All @@ -105,7 +104,9 @@ func (cl *Cloner) newWorker() {
}

func (cl *Cloner) Start() {
for i := 0; i < maxConcurrency; i++ {
para := runtime.NumCPU() - 1
log.Println("Start to clone with", para, "workers")
for i := 0; i < para; i++ {
cl.newWorker()
}
}
Expand Down
4 changes: 4 additions & 0 deletions ghca/clone_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ func TestNewCloner(t *testing.T) {
}

func testRepos(repos []string, t *testing.T) {
if testing.Short() {
t.Skip("Skipping test in short mode.")
}

c := NewCloner("test", nil)
defer func() {
os.RemoveAll("test")
Expand Down
3 changes: 2 additions & 1 deletion ghca/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func (col *Collector) searchRepos() (*github.RepositoriesSearchResult, error) {

func (col *Collector) Collect() (int, int, error) {
log.Println("Searching GitHub repositories with query:", col.Query)
start := time.Now()
cloner := NewCloner(col.Dest, col.Extract)
cloner.Start()

Expand Down Expand Up @@ -74,7 +75,7 @@ func (col *Collector) Collect() (int, int, error) {

cloner.Shutdown()

log.Println(count, "repositories were cloned into", col.Dest, "for total", total, "search results")
log.Printf("%d repositories were cloned into '%s' for total %d search results (%f seconds)\n", count, col.Dest, total, time.Now().Sub(start).Seconds())

return count, total, nil
}
Expand Down
8 changes: 8 additions & 0 deletions ghca/collect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ func TestNewCollectorWithConfig(t *testing.T) {
}

func TestCollectReposTotalIsAFew(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test in short mode")
}

token := os.Getenv("GITHUB_TOKEN")
if token == "" {
t.Skip("Skipping because API token not found")
Expand Down Expand Up @@ -72,6 +76,10 @@ func TestCollectReposTotalIsAFew(t *testing.T) {
}

func TestCollectReposTotalIsLarge(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test in short mode")
}

token := os.Getenv("GITHUB_TOKEN")
if token == "" {
t.Skip("Skipping because API token not found")
Expand Down
Loading

0 comments on commit 92b70bb

Please sign in to comment.