From 38e2b107a2eabaa85de44b214554a570aac542d4 Mon Sep 17 00:00:00 2001 From: Enoc Date: Fri, 5 Mar 2021 16:17:27 -0600 Subject: [PATCH 1/3] Add external_urls filter This filter traverses all tags and replaces its url for an url poiting to a path of an existant documentation. --- docs/filter-reference.md | 1 + docs/scraper-reference.md | 5 ++++ lib/docs/core/filter.rb | 10 +++++++ lib/docs/core/scraper.rb | 2 +- lib/docs/filters/core/external_urls.rb | 38 ++++++++++++++++++++++++++ lib/docs/scrapers/backbone.rb | 4 +++ 6 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 lib/docs/filters/core/external_urls.rb diff --git a/docs/filter-reference.md b/docs/filter-reference.md index 6c1387710b..bd654f1968 100644 --- a/docs/filter-reference.md +++ b/docs/filter-reference.md @@ -84,6 +84,7 @@ The `call` method must return either `doc` or `html`, depending on the type of f * [`AttributionFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/attribution.rb) — appends the license info and link to the original document * [`TitleFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/title.rb) — prepends the document with a title (disabled by default) * [`EntriesFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/entries.rb) — abstract filter for extracting the page's metadata +* [`ExternalUrlsFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/external_urls.rb) — replaces external URLs for relative URLs of existant devdocs documentation. ## Custom filters diff --git a/docs/scraper-reference.md b/docs/scraper-reference.md index fc00876d0d..e48fbe0f65 100644 --- a/docs/scraper-reference.md +++ b/docs/scraper-reference.md @@ -115,6 +115,7 @@ Additionally: * [`TitleFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/title.rb) is a core HTML filter, disabled by default, which prepends the document with a title (`

`). * [`EntriesFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/entries.rb) is an abstract HTML filter that each scraper must implement and responsible for extracting the page's metadata. +* [`ExternalUrlsFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/external_urls.rb) is an HTML filter that replaces external URLs found in `` tags to urls pointing to existant devdocs documentation. ### Filter options @@ -185,6 +186,10 @@ More information about how filters work is available on the [Filter Reference](. _Note: this filter is disabled by default._ +* [`ExternalUrlsFilter`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/filters/core/external_urls.rb) + + - `:external_urls` [Hash or Proc] If it is a Hash, replaces all URLs found in `` tags for URLs of existant devdocs documentation. If it is a Proc, it is called with an URL (string) as argument and should return a relative URL pointing to an existant devdocs documentation. See [`backbone.rb`](https://github.com/freeCodeCamp/devdocs/blob/master/lib/docs/scrapers/backbone.rb) + ## Keeping scrapers up-to-date In order to keep scrapers up-to-date the `get_latest_version(opts)` method should be overridden. If `self.release` is defined, this should return the latest version of the documentation. If `self.release` is not defined, it should return the Epoch time when the documentation was last modified. If the documentation will never change, simply return `1.0.0`. The result of this method is periodically reported in a "Documentation versions report" issue which helps maintainers keep track of outdated documentations. diff --git a/lib/docs/core/filter.rb b/lib/docs/core/filter.rb index 5be7788397..52b9cfca10 100644 --- a/lib/docs/core/filter.rb +++ b/lib/docs/core/filter.rb @@ -96,5 +96,15 @@ def clean_path(path) path = path.gsub %r{\+}, '_plus_' path end + + def path_to_root + if subpath == '' + return '../' + else + previous_dirs = subpath.scan(/\//) + return '../' * previous_dirs.length + end + end + end end diff --git a/lib/docs/core/scraper.rb b/lib/docs/core/scraper.rb index 083b0015eb..4013755abc 100644 --- a/lib/docs/core/scraper.rb +++ b/lib/docs/core/scraper.rb @@ -41,7 +41,7 @@ def stub(path, &block) self.html_filters = FilterStack.new self.text_filters = FilterStack.new - html_filters.push 'apply_base_url', 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths', 'parse_cf_email' + html_filters.push 'apply_base_url', 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths', 'parse_cf_email', 'external_urls' text_filters.push 'images' # ensure the images filter runs after all html filters text_filters.push 'inner_html', 'clean_text', 'attribution' diff --git a/lib/docs/filters/core/external_urls.rb b/lib/docs/filters/core/external_urls.rb new file mode 100644 index 0000000000..47fbc2200a --- /dev/null +++ b/lib/docs/filters/core/external_urls.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Docs + class ExternalUrlsFilter < Filter + + def call + if context[:external_urls] + + root = path_to_root + + css('a').each do |node| + + next unless anchorUrl = node['href'] + + # avoid links already converted to internal links + next if anchorUrl.match?(/\.\./) + + if context[:external_urls].is_a?(Proc) + node['href'] = context[:external_urls].call(anchorUrl) + next + end + + url = URI(anchorUrl) + + context[:external_urls].each do |host, name| + if url.host.to_s.match?(host) + node['href'] = root + name + url.path.to_s + '#' + url.fragment.to_s + end + end + + end + end + + doc + end + + end +end diff --git a/lib/docs/scrapers/backbone.rb b/lib/docs/scrapers/backbone.rb index 3a5fdb7664..944c69a384 100644 --- a/lib/docs/scrapers/backbone.rb +++ b/lib/docs/scrapers/backbone.rb @@ -21,6 +21,10 @@ class Backbone < UrlScraper Licensed under the MIT License. HTML + options[:external_urls] = { + 'underscorejs.org' => 'underscore' + } + def get_latest_version(opts) doc = fetch_doc('https://backbonejs.org/', opts) doc.at_css('.version').content[1...-1] From 843e2c7bd29dcb7eb863f41ba2f1868982a3075d Mon Sep 17 00:00:00 2001 From: Enoc Date: Thu, 18 Mar 2021 23:57:26 -0600 Subject: [PATCH 2/3] Fix and improve external_filter --- assets/javascripts/lib/page.coffee | 8 +++++++- lib/docs/core/filter.rb | 10 ---------- lib/docs/filters/core/external_urls.rb | 4 +--- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/assets/javascripts/lib/page.coffee b/assets/javascripts/lib/page.coffee index 7a5329f6a8..b245098d37 100644 --- a/assets/javascripts/lib/page.coffee +++ b/assets/javascripts/lib/page.coffee @@ -178,16 +178,22 @@ onclick = (event) -> link = $.eventTarget(event) link = link.parentNode while link and link.tagName isnt 'A' - if link and not link.target and isSameOrigin(link.href) + if link and not link.target and isSameOrigin(link.href) and isSameOriginDifferentDoc(link) event.preventDefault() path = link.pathname + link.search + link.hash path = path.replace /^\/\/+/, '/' # IE11 bug page.show(path) + return isSameOrigin = (url) -> url.indexOf("#{location.protocol}//#{location.hostname}") is 0 +isSameOriginDifferentDoc = (url) -> + console.log(url.pathname) + console.log(location.pathname) + url.pathname == location.pathname + updateCanonicalLink = -> @canonicalLink ||= document.head.querySelector('link[rel="canonical"]') @canonicalLink.setAttribute('href', "https://#{location.host}#{location.pathname}") diff --git a/lib/docs/core/filter.rb b/lib/docs/core/filter.rb index 52b9cfca10..5be7788397 100644 --- a/lib/docs/core/filter.rb +++ b/lib/docs/core/filter.rb @@ -96,15 +96,5 @@ def clean_path(path) path = path.gsub %r{\+}, '_plus_' path end - - def path_to_root - if subpath == '' - return '../' - else - previous_dirs = subpath.scan(/\//) - return '../' * previous_dirs.length - end - end - end end diff --git a/lib/docs/filters/core/external_urls.rb b/lib/docs/filters/core/external_urls.rb index 47fbc2200a..cd4bd4c267 100644 --- a/lib/docs/filters/core/external_urls.rb +++ b/lib/docs/filters/core/external_urls.rb @@ -6,8 +6,6 @@ class ExternalUrlsFilter < Filter def call if context[:external_urls] - root = path_to_root - css('a').each do |node| next unless anchorUrl = node['href'] @@ -24,7 +22,7 @@ def call context[:external_urls].each do |host, name| if url.host.to_s.match?(host) - node['href'] = root + name + url.path.to_s + '#' + url.fragment.to_s + node['href'] = '/' + name + url.path.to_s + '#' + url.fragment.to_s end end From d45cc0eb1a033b4bcb5df68cdfddaa9b2c783383 Mon Sep 17 00:00:00 2001 From: Enoc Date: Tue, 23 Mar 2021 22:06:28 -0600 Subject: [PATCH 3/3] Remove logs and improve feature Elementes in the sidebar werent served as xhr request due this feature, now it works checking if it is an element in the sidebar --- assets/javascripts/lib/page.coffee | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/assets/javascripts/lib/page.coffee b/assets/javascripts/lib/page.coffee index b245098d37..b8baef0195 100644 --- a/assets/javascripts/lib/page.coffee +++ b/assets/javascripts/lib/page.coffee @@ -178,11 +178,13 @@ onclick = (event) -> link = $.eventTarget(event) link = link.parentNode while link and link.tagName isnt 'A' - if link and not link.target and isSameOrigin(link.href) and isSameOriginDifferentDoc(link) - event.preventDefault() - path = link.pathname + link.search + link.hash - path = path.replace /^\/\/+/, '/' # IE11 bug - page.show(path) + if link and not link.target and isSameOrigin(link.href) + + if link.className.match('_list-item') or not isSameOriginDifferentDoc(link) + event.preventDefault() + path = link.pathname + link.search + link.hash + path = path.replace /^\/\/+/, '/' # IE11 bug + page.show(path) return @@ -190,9 +192,7 @@ isSameOrigin = (url) -> url.indexOf("#{location.protocol}//#{location.hostname}") is 0 isSameOriginDifferentDoc = (url) -> - console.log(url.pathname) - console.log(location.pathname) - url.pathname == location.pathname + url.pathname.split('/')[1] != location.pathname.split('/')[1] updateCanonicalLink = -> @canonicalLink ||= document.head.querySelector('link[rel="canonical"]')