Adding series_first_word and cut off length to book title and file na…

…me key maps
rupor-github · Feb 11, 2024 · 5feb68b · 5feb68b
1 parent b7b8cef
commit 5feb68b
Show file tree

Hide file tree

Showing 7 changed files with 131 additions and 42 deletions.
diff --git a/Taskfile.yaml b/Taskfile.yaml
@@ -39,9 +39,23 @@ tasks:
       - task: go-build
         vars: { FLAGS: 'debug', PACKAGE: './cmd/fb2c', TARGET: '{{joinPath .DEV_BUILD_DIR "fb2c"}}{{exeExt}}' }
 
+  test:
+    desc: Runs all available tests
+    deps: [test-hyphenator, test-processor]
+
+  test-processor:
+    desc: Runs tests on processor package
+    cmds:
+      - go test -v -mod=mod -gcflags 'all=-N -l' -coverprofile={{joinPath .DEV_BUILD_DIR "test_processor.out"}} ./processor
+
+  test-hyphenator:
+    desc: Runs tests on hyphenator package
+    cmds:
+      - go test -v -mod=mod -gcflags 'all=-N -l' -coverprofile={{joinPath .DEV_BUILD_DIR "test_hyphenator.out"}} ./hyphenator
+
   release:
     desc: Cross-builds release for all supported platforms
-    deps: [ get-dictionaries, get-sentences ]
+    deps: [get-dictionaries, get-sentences]
     cmds:
       - mkdir -p '{{.REL_BUILD_DIR}}'
       - for: [linux-amd64, linux-arm64, linux-386, darwin-amd64, darwin-arm64, windows-amd64-.exe, windows-arm64-.exe, windows-386-.exe]

diff --git a/config/cfg.go b/config/cfg.go
@@ -125,6 +125,7 @@ type Doc struct {
 	ChapterPerFile        bool     `json:"chapter_per_file"`
 	ChapterLevel          int      `json:"chapter_level"`
 	SeqNumPos             int      `json:"series_number_positions"`
+	SeqFirstWordLen       int      `json:"series_first_word_length"`
 	RemovePNGTransparency bool     `json:"remove_png_transparency"`
 	OptimizeImages        bool     `json:"optimize_images"`
 	JPEGQuality           int      `json:"jpeq_quality_level"`
@@ -221,6 +222,7 @@ var defaultConfig = []byte(`{
     "chapter_per_file": true,
     "chapter_level": 2147483647,
     "series_number_positions": 2,
+    "series_first_word_length": 4,
     "characters_per_page": 2300,
     "pages_per_file": 2147483647,
     "fix_zip_format": true,

diff --git a/processor/generate.go b/processor/generate.go
@@ -447,7 +447,7 @@ func (p *Processor) generateOPF() error {
 
 	var title string
 	if len(p.env.Cfg.Doc.TitleFormat) > 0 {
-		title = ReplaceKeywords(p.env.Cfg.Doc.TitleFormat, CreateTitleKeywordsMap(p.Book, p.env.Cfg.Doc.SeqNumPos, p.src))
+		title = ReplaceKeywords(p.env.Cfg.Doc.TitleFormat, CreateTitleKeywordsMap(p.Book, p.env.Cfg.Doc.SeqNumPos, p.env.Cfg.Doc.SeqFirstWordLen, p.src))
 	}
 	if len(title) == 0 {
 		title = p.Book.Title

diff --git a/processor/process.go b/processor/process.go
@@ -433,7 +433,9 @@ func (p *Processor) prepareOutputName() string {
 			return dirs
 		}
 
-		name = filepath.FromSlash(ReplaceKeywords(p.env.Cfg.Doc.FileNameFormat, CreateFileNameKeywordsMap(p.Book, p.env.Cfg.Doc.AuthorFormatFileName, p.env.Cfg.Doc.SeqNumPos)))
+		name = filepath.FromSlash(
+			ReplaceKeywords(p.env.Cfg.Doc.FileNameFormat,
+				CreateFileNameKeywordsMap(p.Book, p.env.Cfg.Doc.AuthorFormatFileName, p.env.Cfg.Doc.SeqNumPos, p.env.Cfg.Doc.SeqFirstWordLen)))
 		if len(name) > 0 {
 			first := true
 			dirs := make([]string, 0, 16)

diff --git a/processor/textutils.go b/processor/textutils.go
@@ -164,8 +164,43 @@ func CreateAuthorKeywordsMap(an *config.AuthorName) map[string]string {
 	return rd
 }
 
+func firstWordSeq(seq string, l int) (word string) {
+	if l <= 0 {
+		l = utf8.RuneCountInString(seq)
+	}
+	nonSpace := 0
+	for _, r := range seq {
+		if nonSpace >= l {
+			return
+		}
+		if unicode.IsSpace(r) {
+			if nonSpace > 0 {
+				return
+			}
+			continue
+		}
+		word += string(r)
+		nonSpace++
+	}
+	return
+}
+
+func abbrSeq(seq string) (abbr string) {
+	for _, w := range strings.Fields(seq) {
+		for len(w) > 0 {
+			r, l := utf8.DecodeRuneInString(w)
+			if r != utf8.RuneError && unicode.IsLetter(r) {
+				abbr += string(r)
+				break
+			}
+			w = w[l:]
+		}
+	}
+	return
+}
+
 // CreateTitleKeywordsMap prepares keywords map for replacement.
-func CreateTitleKeywordsMap(b *Book, pos int, src string) map[string]string {
+func CreateTitleKeywordsMap(b *Book, pos, wlen int, src string) map[string]string {
 	rd := make(map[string]string)
 	rd["#title"] = ""
 	if len(b.Title) > 0 {
@@ -178,6 +213,7 @@ func CreateTitleKeywordsMap(b *Book, pos int, src string) map[string]string {
 	rd["#series"], rd["#abbrseries"], rd["#ABBRseries"] = "", "", ""
 	if len(b.SeqName) > 0 {
 		rd["#series"] = b.SeqName
+		rd["#series_first_word"] = firstWordSeq(b.SeqName, wlen)
 		abbr := abbrSeq(b.SeqName)
 		if len(abbr) > 0 {
 			rd["#abbrseries"] = strings.ToLower(abbr)
@@ -196,22 +232,8 @@ func CreateTitleKeywordsMap(b *Book, pos int, src string) map[string]string {
 	return rd
 }
 
-func abbrSeq(seq string) (abbr string) {
-	for _, w := range strings.Split(seq, " ") {
-		for len(w) > 0 {
-			r, l := utf8.DecodeRuneInString(w)
-			if r != utf8.RuneError && unicode.IsLetter(r) {
-				abbr += string(r)
-				break
-			}
-			w = w[l:]
-		}
-	}
-	return
-}
-
 // CreateFileNameKeywordsMap prepares keywords map for replacement.
-func CreateFileNameKeywordsMap(b *Book, format string, pos int) map[string]string {
+func CreateFileNameKeywordsMap(b *Book, format string, pos, wlen int) map[string]string {
 	rd := make(map[string]string)
 	rd["#title"] = ""
 	if len(b.Title) > 0 {
@@ -220,6 +242,7 @@ func CreateFileNameKeywordsMap(b *Book, format string, pos int) map[string]strin
 	rd["#series"], rd["#abbrseries"], rd["#ABBRseries"] = "", "", ""
 	if len(b.SeqName) > 0 {
 		rd["#series"] = b.SeqName
+		rd["#series_first_word"] = firstWordSeq(b.SeqName, wlen)
 		abbr := abbrSeq(b.SeqName)
 		if len(abbr) > 0 {
 			rd["#abbrseries"] = strings.ToLower(abbr)

diff --git a/processor/textutils_test.go b/processor/textutils_test.go
@@ -97,14 +97,58 @@ func TestReplaceKeywords(t *testing.T) {
 	t.Logf("OK - %s: %d cases", t.Name(), len(cases))
 }
 
-var cases1 = []string{
+type testCaseWord struct {
+	cut int
+	in  string
+	out string
+}
+
+var casesFirstWord = []testCaseWord{
+	{4, "  abbreviated case", "abbr"},
+	{4, "  abb case", "abb"},
+	{4, "abbreviated case", "abbr"},
+	{0, "abbreviated case", "abbreviated"},
+	{5, "abbr case", "abbr"},
+	{4, "          ", ""},
+	{4, " ", ""},
+	{-1, "abbra case", "abbra"},
+}
+
+func TestFirstWord(t *testing.T) {
+	for i, c := range casesFirstWord {
+		res := firstWordSeq(c.in, c.cut)
+		if res != c.out {
+			t.Fatalf("BAD RESULT for case %d\nEXPECTED:\n[%s]\nGOT:\n[%s]\ncut len - %d", i+1, c.out, res, c.cut)
+		}
+	}
+	t.Logf("OK - %s: %d cases", t.Name(), len(casesFirstWord))
+}
+
+var casesAbbr = []testCaseWord{
+	{0, "  abbreviated case", "ac"},
+	{0, "abbreviated case", "ac"},
+	{0, "abbr case more", "acm"},
+	{0, "          ", ""},
+}
+
+func TestAbbr(t *testing.T) {
+	for i, c := range casesAbbr {
+		res := abbrSeq(c.in)
+		if res != c.out {
+			t.Fatalf("BAD RESULT for case %d\nEXPECTED:\n[%s]\nGOT:\n[%s]", i+1, c.out, res)
+		}
+	}
+	t.Logf("OK - %s: %d cases", t.Name(), len(casesFirstWord))
+}
+
+var casesDisposition = []string{
 	"1",
 	"test book.epub",
 	"Знаменитые расследования Мисс Марпл в одном томе .epub",
 }
 
 func TestContentDisposition(t *testing.T) {
-	for i, c := range cases1 {
+	for i, c := range casesDisposition {
 		res1 := url.PathEscape(c)
 		res2 := ""
 		for _, part := range encodeParts(c) {
@@ -114,5 +158,5 @@ func TestContentDisposition(t *testing.T) {
 			t.Fatalf("BAD RESULT for case %d [%s]\nEXPECTED:\n[%s]\nGOT:\n[%s]", i+1, c, res1, res2)
 		}
 	}
-	t.Logf("OK - %s: %d cases", t.Name(), len(cases1))
+	t.Logf("OK - %s: %d cases", t.Name(), len(casesDisposition))
 }
diff --git a/static/configuration.toml b/static/configuration.toml
@@ -61,18 +61,21 @@
 	# jpeq_quality_level = 75
 
 	#---- Pattern to format book title
-	#---- "#title"         - book title
-	#---- "#file_name"     - name of original FB2 file (no path, no extension)
-	#---- "#file_name_ext" - name of original FB2 file (no path)
-	#---- "#series"        - name of sequence book belongs to
-	#---- "#abbrseries"    - abbreviated #series, lower case
-	#---- "#ABBRseries"    - abbreviated #series, upper case
-	#---- "#number"        - number in a series
-	#---- "#padnumber"     - number in a series padded with zeros to "series_number_positions"
-	#---- "#date"          - date specified in a book description
+	#---- "#title"             - book title
+	#---- "#file_name"         - name of original FB2 file (no path, no extension)
+	#---- "#file_name_ext"     - name of original FB2 file (no path)
+	#---- "#series"            - name of sequence book belongs to
+	#---- "#series_first_word" - first word in the name of series book belongs to, up to "series_first_word_length" letters
+	#---- "#abbrseries"        - abbreviated #series, lower case
+	#---- "#ABBRseries"        - abbreviated #series, upper case
+	#---- "#number"            - number in a series
+	#---- "#padnumber"         - number in a series padded with zeros to "series_number_positions"
+	#---- "#date"              - date specified in a book description
 	title_format = "{(#ABBRseries{ - #padnumber}) }#title"
 	#---- How many positions padded series number will take
-	series_number_positions = 2
+	# series_number_positions = 2
+	#---- How many letters take from first word of series name, if less or equal 0 - take whole word, if word is shorter than specified - take whole word only
+	# series_first_word_length = 4
 
 	#---- Patterns to format author name (#author, #autors) in different places
 	#---- "#f"  - first name
@@ -86,16 +89,17 @@
 
 	#---- Output file name pattern - output file will have name created using FB2 information
 	#---- NOTE: watch out for path separators, directories will be created!
-	#---- "#title"      - book title
-	#---- "#series"     - name of sequence book belongs to
-	#---- "#abbrseries" - abbreviated #series, lower case
-	#---- "#ABBRseries" - abbreviated #series, upper case
-	#---- "#number"     - number in a series
-	#---- "#padnumber"  - number in a series padded with zeros to "series_number_positions"
-	#---- "#authors"    - list of all authors (each formatted as specified in "author_format")
-	#---- "#author"     - name of the first author (formatted as specified in "author_format"). If more then one - it will
-	#----                 be indicated with either ", et al" or " и др" depending on book language
-	#---- "#bookid"     - Book UUID (either parsed from or genrated based of fb2 information)
+	#---- "#title"             - book title
+	#---- "#series"            - name of sequence book belongs to
+	#---- "#series_first_word" - first word in the name of series book belongs to, up to "series_first_word_length" letters
+	#---- "#abbrseries"        - abbreviated #series, lower case
+	#---- "#ABBRseries"        - abbreviated #series, upper case
+	#---- "#number"            - number in a series
+	#---- "#padnumber"         - number in a series padded with zeros to "series_number_positions"
+	#---- "#authors"           - list of all authors (each formatted as specified in "author_format")
+	#---- "#author"            - name of the first author (formatted as specified in "author_format"). If more then one - it will
+	#----                        be indicated with either ", et al" or " и др" depending on book language
+	#---- "#bookid"            - Book UUID (either parsed from or genrated based of fb2 information)
 	# file_name_format = "{#author - }#title"
 
 	#---- Slugify/transliterate output file name - after all other processing on file name is completed