Skip to content

Commit

Permalink
Adding series_first_word and cut off length to book title and file na…
Browse files Browse the repository at this point in the history
…me key maps
  • Loading branch information
rupor-github committed Feb 11, 2024
1 parent b7b8cef commit 5feb68b
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 42 deletions.
16 changes: 15 additions & 1 deletion Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,23 @@ tasks:
- task: go-build
vars: { FLAGS: 'debug', PACKAGE: './cmd/fb2c', TARGET: '{{joinPath .DEV_BUILD_DIR "fb2c"}}{{exeExt}}' }

test:
desc: Runs all available tests
deps: [test-hyphenator, test-processor]

test-processor:
desc: Runs tests on processor package
cmds:
- go test -v -mod=mod -gcflags 'all=-N -l' -coverprofile={{joinPath .DEV_BUILD_DIR "test_processor.out"}} ./processor

test-hyphenator:
desc: Runs tests on hyphenator package
cmds:
- go test -v -mod=mod -gcflags 'all=-N -l' -coverprofile={{joinPath .DEV_BUILD_DIR "test_hyphenator.out"}} ./hyphenator

release:
desc: Cross-builds release for all supported platforms
deps: [ get-dictionaries, get-sentences ]
deps: [get-dictionaries, get-sentences]
cmds:
- mkdir -p '{{.REL_BUILD_DIR}}'
- for: [linux-amd64, linux-arm64, linux-386, darwin-amd64, darwin-arm64, windows-amd64-.exe, windows-arm64-.exe, windows-386-.exe]
Expand Down
2 changes: 2 additions & 0 deletions config/cfg.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ type Doc struct {
ChapterPerFile bool `json:"chapter_per_file"`
ChapterLevel int `json:"chapter_level"`
SeqNumPos int `json:"series_number_positions"`
SeqFirstWordLen int `json:"series_first_word_length"`
RemovePNGTransparency bool `json:"remove_png_transparency"`
OptimizeImages bool `json:"optimize_images"`
JPEGQuality int `json:"jpeq_quality_level"`
Expand Down Expand Up @@ -221,6 +222,7 @@ var defaultConfig = []byte(`{
"chapter_per_file": true,
"chapter_level": 2147483647,
"series_number_positions": 2,
"series_first_word_length": 4,
"characters_per_page": 2300,
"pages_per_file": 2147483647,
"fix_zip_format": true,
Expand Down
2 changes: 1 addition & 1 deletion processor/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ func (p *Processor) generateOPF() error {

var title string
if len(p.env.Cfg.Doc.TitleFormat) > 0 {
title = ReplaceKeywords(p.env.Cfg.Doc.TitleFormat, CreateTitleKeywordsMap(p.Book, p.env.Cfg.Doc.SeqNumPos, p.src))
title = ReplaceKeywords(p.env.Cfg.Doc.TitleFormat, CreateTitleKeywordsMap(p.Book, p.env.Cfg.Doc.SeqNumPos, p.env.Cfg.Doc.SeqFirstWordLen, p.src))
}
if len(title) == 0 {
title = p.Book.Title
Expand Down
4 changes: 3 additions & 1 deletion processor/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,9 @@ func (p *Processor) prepareOutputName() string {
return dirs
}

name = filepath.FromSlash(ReplaceKeywords(p.env.Cfg.Doc.FileNameFormat, CreateFileNameKeywordsMap(p.Book, p.env.Cfg.Doc.AuthorFormatFileName, p.env.Cfg.Doc.SeqNumPos)))
name = filepath.FromSlash(
ReplaceKeywords(p.env.Cfg.Doc.FileNameFormat,
CreateFileNameKeywordsMap(p.Book, p.env.Cfg.Doc.AuthorFormatFileName, p.env.Cfg.Doc.SeqNumPos, p.env.Cfg.Doc.SeqFirstWordLen)))
if len(name) > 0 {
first := true
dirs := make([]string, 0, 16)
Expand Down
55 changes: 39 additions & 16 deletions processor/textutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,43 @@ func CreateAuthorKeywordsMap(an *config.AuthorName) map[string]string {
return rd
}

func firstWordSeq(seq string, l int) (word string) {
if l <= 0 {
l = utf8.RuneCountInString(seq)
}
nonSpace := 0
for _, r := range seq {
if nonSpace >= l {
return
}
if unicode.IsSpace(r) {
if nonSpace > 0 {
return
}
continue
}
word += string(r)
nonSpace++
}
return
}

func abbrSeq(seq string) (abbr string) {
for _, w := range strings.Fields(seq) {
for len(w) > 0 {
r, l := utf8.DecodeRuneInString(w)
if r != utf8.RuneError && unicode.IsLetter(r) {
abbr += string(r)
break
}
w = w[l:]
}
}
return
}

// CreateTitleKeywordsMap prepares keywords map for replacement.
func CreateTitleKeywordsMap(b *Book, pos int, src string) map[string]string {
func CreateTitleKeywordsMap(b *Book, pos, wlen int, src string) map[string]string {
rd := make(map[string]string)
rd["#title"] = ""
if len(b.Title) > 0 {
Expand All @@ -178,6 +213,7 @@ func CreateTitleKeywordsMap(b *Book, pos int, src string) map[string]string {
rd["#series"], rd["#abbrseries"], rd["#ABBRseries"] = "", "", ""
if len(b.SeqName) > 0 {
rd["#series"] = b.SeqName
rd["#series_first_word"] = firstWordSeq(b.SeqName, wlen)
abbr := abbrSeq(b.SeqName)
if len(abbr) > 0 {
rd["#abbrseries"] = strings.ToLower(abbr)
Expand All @@ -196,22 +232,8 @@ func CreateTitleKeywordsMap(b *Book, pos int, src string) map[string]string {
return rd
}

func abbrSeq(seq string) (abbr string) {
for _, w := range strings.Split(seq, " ") {
for len(w) > 0 {
r, l := utf8.DecodeRuneInString(w)
if r != utf8.RuneError && unicode.IsLetter(r) {
abbr += string(r)
break
}
w = w[l:]
}
}
return
}

// CreateFileNameKeywordsMap prepares keywords map for replacement.
func CreateFileNameKeywordsMap(b *Book, format string, pos int) map[string]string {
func CreateFileNameKeywordsMap(b *Book, format string, pos, wlen int) map[string]string {
rd := make(map[string]string)
rd["#title"] = ""
if len(b.Title) > 0 {
Expand All @@ -220,6 +242,7 @@ func CreateFileNameKeywordsMap(b *Book, format string, pos int) map[string]strin
rd["#series"], rd["#abbrseries"], rd["#ABBRseries"] = "", "", ""
if len(b.SeqName) > 0 {
rd["#series"] = b.SeqName
rd["#series_first_word"] = firstWordSeq(b.SeqName, wlen)
abbr := abbrSeq(b.SeqName)
if len(abbr) > 0 {
rd["#abbrseries"] = strings.ToLower(abbr)
Expand Down
50 changes: 47 additions & 3 deletions processor/textutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,58 @@ func TestReplaceKeywords(t *testing.T) {
t.Logf("OK - %s: %d cases", t.Name(), len(cases))
}

var cases1 = []string{
type testCaseWord struct {
cut int
in string
out string
}

var casesFirstWord = []testCaseWord{
{4, " abbreviated case", "abbr"},
{4, " abb case", "abb"},
{4, "abbreviated case", "abbr"},
{0, "abbreviated case", "abbreviated"},
{5, "abbr case", "abbr"},
{4, " ", ""},
{4, " ", ""},
{-1, "abbra case", "abbra"},
}

func TestFirstWord(t *testing.T) {
for i, c := range casesFirstWord {
res := firstWordSeq(c.in, c.cut)
if res != c.out {
t.Fatalf("BAD RESULT for case %d\nEXPECTED:\n[%s]\nGOT:\n[%s]\ncut len - %d", i+1, c.out, res, c.cut)
}
}
t.Logf("OK - %s: %d cases", t.Name(), len(casesFirstWord))
}

var casesAbbr = []testCaseWord{
{0, " abbreviated case", "ac"},
{0, "abbreviated case", "ac"},
{0, "abbr case more", "acm"},
{0, " ", ""},
}

func TestAbbr(t *testing.T) {
for i, c := range casesAbbr {
res := abbrSeq(c.in)
if res != c.out {
t.Fatalf("BAD RESULT for case %d\nEXPECTED:\n[%s]\nGOT:\n[%s]", i+1, c.out, res)
}
}
t.Logf("OK - %s: %d cases", t.Name(), len(casesFirstWord))
}

var casesDisposition = []string{
"1",
"test book.epub",
"Знаменитые расследования Мисс Марпл в одном томе .epub",
}

func TestContentDisposition(t *testing.T) {
for i, c := range cases1 {
for i, c := range casesDisposition {
res1 := url.PathEscape(c)
res2 := ""
for _, part := range encodeParts(c) {
Expand All @@ -114,5 +158,5 @@ func TestContentDisposition(t *testing.T) {
t.Fatalf("BAD RESULT for case %d [%s]\nEXPECTED:\n[%s]\nGOT:\n[%s]", i+1, c, res1, res2)
}
}
t.Logf("OK - %s: %d cases", t.Name(), len(cases1))
t.Logf("OK - %s: %d cases", t.Name(), len(casesDisposition))
}
44 changes: 24 additions & 20 deletions static/configuration.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,21 @@
# jpeq_quality_level = 75

#---- Pattern to format book title
#---- "#title" - book title
#---- "#file_name" - name of original FB2 file (no path, no extension)
#---- "#file_name_ext" - name of original FB2 file (no path)
#---- "#series" - name of sequence book belongs to
#---- "#abbrseries" - abbreviated #series, lower case
#---- "#ABBRseries" - abbreviated #series, upper case
#---- "#number" - number in a series
#---- "#padnumber" - number in a series padded with zeros to "series_number_positions"
#---- "#date" - date specified in a book description
#---- "#title" - book title
#---- "#file_name" - name of original FB2 file (no path, no extension)
#---- "#file_name_ext" - name of original FB2 file (no path)
#---- "#series" - name of sequence book belongs to
#---- "#series_first_word" - first word in the name of series book belongs to, up to "series_first_word_length" letters
#---- "#abbrseries" - abbreviated #series, lower case
#---- "#ABBRseries" - abbreviated #series, upper case
#---- "#number" - number in a series
#---- "#padnumber" - number in a series padded with zeros to "series_number_positions"
#---- "#date" - date specified in a book description
title_format = "{(#ABBRseries{ - #padnumber}) }#title"
#---- How many positions padded series number will take
series_number_positions = 2
# series_number_positions = 2
#---- How many letters take from first word of series name, if less or equal 0 - take whole word, if word is shorter than specified - take whole word only
# series_first_word_length = 4

#---- Patterns to format author name (#author, #autors) in different places
#---- "#f" - first name
Expand All @@ -86,16 +89,17 @@

#---- Output file name pattern - output file will have name created using FB2 information
#---- NOTE: watch out for path separators, directories will be created!
#---- "#title" - book title
#---- "#series" - name of sequence book belongs to
#---- "#abbrseries" - abbreviated #series, lower case
#---- "#ABBRseries" - abbreviated #series, upper case
#---- "#number" - number in a series
#---- "#padnumber" - number in a series padded with zeros to "series_number_positions"
#---- "#authors" - list of all authors (each formatted as specified in "author_format")
#---- "#author" - name of the first author (formatted as specified in "author_format"). If more then one - it will
#---- be indicated with either ", et al" or " и др" depending on book language
#---- "#bookid" - Book UUID (either parsed from or genrated based of fb2 information)
#---- "#title" - book title
#---- "#series" - name of sequence book belongs to
#---- "#series_first_word" - first word in the name of series book belongs to, up to "series_first_word_length" letters
#---- "#abbrseries" - abbreviated #series, lower case
#---- "#ABBRseries" - abbreviated #series, upper case
#---- "#number" - number in a series
#---- "#padnumber" - number in a series padded with zeros to "series_number_positions"
#---- "#authors" - list of all authors (each formatted as specified in "author_format")
#---- "#author" - name of the first author (formatted as specified in "author_format"). If more then one - it will
#---- be indicated with either ", et al" or " и др" depending on book language
#---- "#bookid" - Book UUID (either parsed from or genrated based of fb2 information)
# file_name_format = "{#author - }#title"

#---- Slugify/transliterate output file name - after all other processing on file name is completed
Expand Down

0 comments on commit 5feb68b

Please sign in to comment.