# -*- coding: utf-8 -*-
"""
Created on Thu Oct 14 12:59:05 2021

@author: thydzik
"""

import pickle

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import ElementClickInterceptedException
from selenium.common.exceptions import StaleElementReferenceException

import time
from datetime import datetime

import os

import requests
import shutil

import warnings
warnings.filterwarnings("ignore", message="Unverified HTTPS request")

import re

import csv

import sqlite3 as sl

import random


driver = webdriver.Firefox()


#https://www.amazon.com.au/b?ie=UTF8&node=8415198051

urls = ['https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_2a1_w?bbn=8415198051&rh=n%3A4851626051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #books
        'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_2b1_w?ie=UTF8&node=4851799051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #tech
        'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_2c1_w?ie=UTF8&node=4852150051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #home
        #'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_2d1_w?ie=UTF8&node=4852264051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #dvds
        'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_2e1_w?ie=UTF8&node=4851683051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #tech
        'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_3a1_w?ie=UTF8&node=4852445051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #toys
        'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_3b1_w?ie=UTF8&node=4851856051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #clothes
        'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_3c1_w?ie=UTF8&node=4852617051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051', #toys
        'https://www.amazon.com.au/s/ref=s9_acss_bw_cg_WAREHOUS_3d1_w?ie=UTF8&node=4852675051&bbn=8415198051&pf_rd_m=AMMK0LS9EDNM8&pf_rd_s=merchandised-search-3&pf_rd_r=CAKMN8CB51WHQX79Y1SV&pf_rd_t=101&pf_rd_p=515f72da-4098-44a3-8e37-80b70a605fe0&pf_rd_i=8415198051' #toys
        ]

random.shuffle(urls)

prices = []

con = sl.connect('warehouse.db')

maxpagetries = 20

for url in urls:

    urlstart = url+'&s=price-asc-rank'
    
    driver.get(urlstart)
    
    html = driver.page_source
    
    match = re.search(r's-pagination-item s-pagination-disabled" aria-disabled="true">(\d*)</span>', html)
    if match:
    	maxpage = int(match.group(1))
    else:
        print("didn't get a page max for: "+url)
        maxpage = maxpagetries

    
    
    #for page in range(1,2):
    
    for page in range(1,min(maxpage,maxpagetries)):
    
        driver.get(urlstart+"&page="+str(page)+"&ref=sr_pg_"+str(page))
        
        html = driver.page_source
        
        results0 = re.findall(r'<div data-asin="(\w+)" (.*)</div>', html, re.IGNORECASE | re.MULTILINE)
        
        for result0 in results0:
            r_asin = result0[0]
            
            if r_asin:
                
            
                results = re.findall(r'a-text-normal" href="(.*?)"><span class="a-size-base.*?">(.*?)</span>\s</a>.*?\$([0-9\.]*)</span><span', result0[1], re.IGNORECASE | re.MULTILINE)
                
                for result in results:
                    
                    result = list(result)
                    
                    
                    r_url = "https://www.amazon.com.au"+result[0]
                    
                    r_search = r_url.split('ref=')[0]
                    
                    r_title = result[1]
                    r_title = r_title.replace("&nbsp;", " ")
                    r_title = r_title.replace("|", "-")
                    
                    
                    r_sale = result[2]
                    
                    
                    #try and determine if price exists
                    cur = con.cursor()
                    
                    cur.execute("SELECT * FROM items WHERE asin='"+r_asin+"'")
                    items = cur.fetchall()
                    if items: #found the item
                        i_price = items[0][1]
                        i_cat = items[0][2]
                        i_cat= str(i_cat)
                    #if item not found, or price is the same as the sale price
                    if (not items) or (float(r_sale) == float(i_price)):
                        #need to add it
                        #navigate to the url
                        driver.get(r_search)
                        
                        html0 = driver.page_source
                        
                        regex1 = r'(?:<a class="a-link-normal a-color-tertiary" href[\w\W]*?">\s+([\w ]+)\s+</a>[\w\W\s]*?)?"(?:price|base)"><span class="a-offscreen">\$([0-9\.]{2,})</span>[\w\W\s]*?(?:\[asin\]" value="|"currentAsin" : "|"asin": ?")(\w*)"[,>]*'
                        
                        match = re.search(regex1, html0)
                        if match:
                            i_cat = str(match.group(1))
                            i_price = match.group(2)
                            i_asin = match.group(3)
                            
                            #if r_asin != i_asin:
                            #    print("asin different for "+r_asin+" != "+i_asin+": "+r_search)
                            #else:
                            #write the value to database
                            if not items:
                                cur.execute("INSERT INTO items VALUES ('"+r_asin+"',"+i_price+",'"+i_cat+"')")
                            else: #need to updated instead
                                cur.execute("UPDATE items SET price = "+i_price+", cat = '"+i_cat+"' WHERE asin = '"+r_asin+"';")
                            con.commit()
                        else:
                            print("didn't get a match for: "+r_search)
                            i_price = r_sale
                            i_cat = ''
                    
                    r_discount = round(100*(1-(float(r_sale)/float(i_price))),1)
                    
                    price = [] #not prices
                    price.append(r_title)
                    price.append(r_url)
                    price.append(i_cat)
                    price.append(float(r_sale))
                    price.append(float(i_price))
                    price.append(r_discount)
                    price.append("["+r_title+"]("+r_url+")|"+i_cat+"|$"+r_sale+"|$"+str(i_price)+"|"+str(r_discount)+"%")
                    
                    prices.append(price)
        #break
    #break

prices = sorted(prices, key=lambda x:x[5], reverse=True)
con.close()

# open the file in the write mode
f = open('amazonwarehouse.csv', 'w', newline='', encoding='utf-8')

# create the csv writer
writer = csv.writer(f)
writer.writerow(["Product","Url","Cat", "Sale", "Original","Discount","Product|Cat|Sale|Original|Discount"])
writer.writerow(["Product","Url","Cat", "Sale", "Original","Discount","-|-|-|-|-"])
# write a row to the csv file

for price in prices:

    writer.writerow(price)

# close the file
f.close()

os.startfile('amazonwarehouse.csv')

driver.close()