Commit eda464e398e5540d503668e9ac9f383927fc9dfd

Thomas de Grivel 2022-02-01T01:29:17

wip cli

diff --git a/cli/kmxgit.rb b/cli/kmxgit.rb
index 59d2a2f..089183f 100755
--- a/cli/kmxgit.rb
+++ b/cli/kmxgit.rb
@@ -2,6 +2,8 @@
 
 require 'capybara'
 require 'io/console'
+require 'json'
+require 'open-uri'
 require 'optparse'
 
 Options = Struct.new(:cmd, :user)
@@ -35,6 +37,10 @@ def main(argv)
     usage()
     return 0
   when :mirror
+    FileUtils.mkdir_p "git.kmx.io"
+    Capybara.configure do |config|
+      config.save_path = "git.kmx.io"
+    end
     @session = Capybara::Session.new(:selenium)
     if @options.user
       login()
@@ -64,7 +70,113 @@ def login()
   end
 end
 
+def get_links
+  local = []
+  external = []
+  local_img = []
+  external_img = []
+  links = []
+  scripts = []
+  @session.all("a").each do |elt|
+    href = elt[:href].split("#")[0]
+    if href.start_with?("https://git.kmx.io/")
+      local << href.slice(18..)
+    else
+      external << href
+    end
+  end
+  local = local.uniq.compact
+  external = external.uniq.compact
+  @session.all("img").each do |elt|
+    href = elt[:src].split("#")[0]
+    if href.start_with?("https://git.kmx.io/")
+      local_img << href.slice(18..)
+    else
+      external_img << href
+    end
+  end
+  local_img = local_img.uniq.compact
+  external_img = external_img.uniq.compact
+  @session.all("link", visible: false).each do |elt|
+    links << elt[:href]
+  end
+  @session.all("script", visible: false).each do |elt|
+    scripts << elt[:src]
+  end
+  {local: local,
+   external: external,
+   links: links,
+   scripts: scripts}
+end
+
+def visited?(path)
+  @visited[path]
+end
+
+def visited!(path)
+  @visited[path] = true
+end
+
+def save_visited
+  File.write("git.kmx.io/.visited.json", @visited.to_json)
+end
+
+def mirror_asset(path)
+  p = path
+  p = p.sub(/[#].*$/, "")
+  p = p.sub(/[?].*$/, "")
+  if !visited?(p)
+    visited!(p)
+    @visited << p
+    url = "https://git.kmx.io" + path
+    io = open(url)
+    IO.copy_stream(io, "git.kmx.io" + p)
+    save_visited()
+  end
+  p
+end
+
+def mirror_page(path)
+  p = path
+  p = p.sub(/[#].*$/, "")
+  if p.end_with?("/")
+    p = p + "index.html"
+  end
+  if File.directory?("git.kmx.io" + p)
+    p = p + "/index.html"
+  end
+  if !visited?(p)
+    visited!(p)
+    url = "https://git.kmx.io" + path
+    @session.visit(url)
+    dir = "git.kmx.io"
+    File.dirname(p).split("/").each do |item|
+      dir = "#{dir}/#{item}"
+      File.unlink(dir) if File.file?(dir)
+    end
+    @session.save_page("." + p)
+    links = get_links()
+    puts links.inspect
+    links[:local_img].each do |local_img|
+      mirror_asset(local_img)
+    end
+    links[:links].each do |link|
+      mirror_asset(link)
+    end
+    links[:scripts].each do |script|
+      mirror_asset(script)
+    end
+    links[:local].each do |local|
+      mirror_page(local)
+    end
+    save_visited()
+  end
+  p
+end
+
 def mirror
+  @visited = []
+  mirror_page("/")
   1
 end