Browse Source

init

master
Jonas Leder 2 months ago
commit
d3fdf0c1f0
5 changed files with 5092 additions and 0 deletions
  1. BIN
      Linkliste.xlsx
  2. BIN
      Websites/xxx.png
  3. BIN
      adblock.zip
  4. 64
    0
      cloner.py
  5. 5028
    0
      geckodriver.log

BIN
Linkliste.xlsx View File


BIN
Websites/xxx.png View File


BIN
adblock.zip View File


+ 64
- 0
cloner.py View File

@@ -0,0 +1,64 @@
1
+from selenium import webdriver #Control libryry for Firefox
2
+from selenium.webdriver.firefox.options import Options #Options library for firefox
3
+from openpyxl import load_workbook #library for reading excel tables
4
+import os #For checking if File exists
5
+import sys #For getting the Path of the file.
6
+
7
+runfirefoxheadless = False #set True to disable the Firefox GUI
8
+filename = "Linkliste.xlsx" #excel File which includes the link (example design is at the bottom. Starts reading from line 2)
9
+sheetname = "Linkliste" #name of the Sheet in the Excel file
10
+adblockplusfilename = "adblock.zip" #adblockplus file must be located in the Script Folder.
11
+adblockplusdownloadlink = "https://nextcloud.jonasled.de/index.php/s/xQWLPxzFYwWEFE2/download" #Downloadlink if ABP was not found
12
+
13
+if(os.name == "nt"): #On Windows use \ to seperate Folders and on Linux /.
14
+    abp_path = os.path.dirname(sys.argv[0]) + "\\" + adblockplusfilename
15
+else:
16
+    abp_path = os.path.dirname(sys.argv[0]) + "/" + adblockplusfilename
17
+if not (os.path.exists(abp_path)):
18
+    print("Adblock Plus not found, downloading")
19
+    import requests
20
+    r = requests.get(adblockplusdownloadlink)
21
+    open(abp_path, 'wb').write(r.content)
22
+
23
+#Prepare the Firefox for use (start and install ABP Plugin)
24
+print("starting firefox")
25
+options = Options()
26
+options.log.level = "fatal"
27
+options.headless = runfirefoxheadless
28
+driver = webdriver.Firefox(options=options)
29
+print("installing adblock plus")
30
+driver.install_addon(abp_path, temporary=True)
31
+
32
+#Load Excel Table
33
+wb = load_workbook(filename)
34
+sheet_ranges = wb[sheetname]
35
+row = 2
36
+
37
+#Look in every line and download URL, if not already exists.
38
+while True:
39
+    if not (os.path.exists("Websites/" + str(sheet_ranges['A' + str(row)].value) + ".png")):
40
+        print("Getting " + sheet_ranges['B' + str(row)].value)
41
+        driver.set_window_size(1080, 100)
42
+
43
+        if(str(sheet_ranges['B' + str(row)].value)[0:4] == "http"):
44
+            driver.get(sheet_ranges['B' + str(row)].value)
45
+        else:
46
+            driver.get("http://" + sheet_ranges['B' + str(row)].value)
47
+
48
+        width  = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
49
+        height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
50
+
51
+        driver.set_window_size(width+100, height+100)
52
+        driver.save_screenshot("Websites/" + str(sheet_ranges['A' + str(row)].value) + ".png")
53
+    else:
54
+        print("Skipping " + sheet_ranges['B' + str(row)].value)
55
+
56
+    row = row + 1
57
+    if(sheet_ranges['B' + str(row)].value == None):
58
+        break
59
+
60
+driver.quit()
61
+
62
+#Filename	Link
63
+#google     google.de
64
+#homepage   https://jonasled.de

+ 5028
- 0
geckodriver.log
File diff suppressed because it is too large
View File


Loading…
Cancel
Save