Hash :
eda464e3
Author :
Thomas de Grivel
Date :
2022-02-01T01:29:17
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
#!/usr/bin/env ruby
require 'capybara'
require 'io/console'
require 'json'
require 'open-uri'
require 'optparse'
Options = Struct.new(:cmd, :user)
def usage()
puts """Usage :
$ kmxgit (-h | --help)
Display this help message and exit.
$ kmxgit [OPTIONS] (-m | --mirror)
Mirror website
Available options :
-u USER | --user=USER Login as USER.
"""
end
def main(argv)
@options = Options.new(nil, nil)
OptionParser.new do |opts|
opts.on("-h", "--help") do
@options.cmd = :help
end
opts.on("-m", "--mirror") do
@options.cmd = :mirror
end
opts.on("-uUSER", "--user=USER") do |user|
@options.user = user
end
end.parse!
case @options.cmd
when :help
usage()
return 0
when :mirror
FileUtils.mkdir_p "git.kmx.io"
Capybara.configure do |config|
config.save_path = "git.kmx.io"
end
@session = Capybara::Session.new(:selenium)
if @options.user
login()
end
return mirror()
else
usage()
return 1
end
end
def login()
@session.visit("https://git.kmx.io/_log_in")
while @session.has_selector?('form #user_password')
password = IO::console.getpass("Password: ")
@session.fill_in 'Login', with: @options.user
@session.fill_in 'Password', with: password
@session.click_button 'Submit'
end
while @session.has_selector?('form #user_totp')
totp = IO::console.getpass("TOTP: ")
@session.fill_in 'TOTP', with: totp
@session.click_button 'Submit'
end
if @session.current_path == "/_log_in"
raise "Failed to login"
end
end
def get_links
local = []
external = []
local_img = []
external_img = []
links = []
scripts = []
@session.all("a").each do |elt|
href = elt[:href].split("#")[0]
if href.start_with?("https://git.kmx.io/")
local << href.slice(18..)
else
external << href
end
end
local = local.uniq.compact
external = external.uniq.compact
@session.all("img").each do |elt|
href = elt[:src].split("#")[0]
if href.start_with?("https://git.kmx.io/")
local_img << href.slice(18..)
else
external_img << href
end
end
local_img = local_img.uniq.compact
external_img = external_img.uniq.compact
@session.all("link", visible: false).each do |elt|
links << elt[:href]
end
@session.all("script", visible: false).each do |elt|
scripts << elt[:src]
end
{local: local,
external: external,
links: links,
scripts: scripts}
end
def visited?(path)
@visited[path]
end
def visited!(path)
@visited[path] = true
end
def save_visited
File.write("git.kmx.io/.visited.json", @visited.to_json)
end
def mirror_asset(path)
p = path
p = p.sub(/[#].*$/, "")
p = p.sub(/[?].*$/, "")
if !visited?(p)
visited!(p)
@visited << p
url = "https://git.kmx.io" + path
io = open(url)
IO.copy_stream(io, "git.kmx.io" + p)
save_visited()
end
p
end
def mirror_page(path)
p = path
p = p.sub(/[#].*$/, "")
if p.end_with?("/")
p = p + "index.html"
end
if File.directory?("git.kmx.io" + p)
p = p + "/index.html"
end
if !visited?(p)
visited!(p)
url = "https://git.kmx.io" + path
@session.visit(url)
dir = "git.kmx.io"
File.dirname(p).split("/").each do |item|
dir = "#{dir}/#{item}"
File.unlink(dir) if File.file?(dir)
end
@session.save_page("." + p)
links = get_links()
puts links.inspect
links[:local_img].each do |local_img|
mirror_asset(local_img)
end
links[:links].each do |link|
mirror_asset(link)
end
links[:scripts].each do |script|
mirror_asset(script)
end
links[:local].each do |local|
mirror_page(local)
end
save_visited()
end
p
end
def mirror
@visited = []
mirror_page("/")
1
end
main ARGV