Skip to content

Commit

Permalink
Merge pull request #15 from coreweave/rwang.fixs3chan.10222023
Browse files Browse the repository at this point in the history
fix minor file saving issue
  • Loading branch information
wbrown authored Oct 30, 2023
2 parents eb30a00 + 263c230 commit e7f1dea
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions cmd/dataset_tokenizer/dataset_tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,10 @@ func (tt TextsTokenizer) TokenizeTextsToContexts(
func WriteContexts(outPath string, contexts chan gpt_bpe.Tokens,
encoder *gpt_bpe.GPTEncoder, sampling int, shuffle bool) (int, error) {
totalTokens := 0
// create file AND filepath if not exists
if err := os.MkdirAll(filepath.Dir(outPath), os.ModePerm); err != nil {
return 0, err
}
outFile, err := os.OpenFile(outPath, os.O_TRUNC|os.O_RDWR|os.O_CREATE,
0755)
if err != nil {
Expand Down

0 comments on commit e7f1dea

Please sign in to comment.