1. 감성분석엔진을 통해 분류된 전체 회사 데이터.

def weekly_all_classified_data(dict):
    start_date,end_date=start_to_date(),end_to_date()
    maximum,minimum=score_input()
    for key,value in dict.items():
        driver = webdriver.Chrome('C:\\Users\\LeeKwanHyeong\\chromedriver_win32\\chromedriver.exe',chrome_options=options)
        driver.implicitly_wait(2)
        driver.get(value)
        # sort 선택창
        elem = driver.find_element_by_xpath("//span[@class='DPvwYc']")
        elem.click()
        time.sleep(3)
        pyautogui.press('up')
        time.sleep(0.7)
        pyautogui.press('up')
        time.sleep(0.7)
        pyautogui.press('enter')
        
        while(True):
            driver.execute_script("window.scrollTo([0],document.body.scrollHeight);")
            time.sleep(0.5)
            try:
                element=driver.find_element_by_xpath('//div[@class="U26fgb O0WRkf oG5Srb C0oVfc n9lfJ"]')
                if(element is not None):
                    element.click()
                    break
            except Exception:
                continue

        html=driver.page_source
        driver.quit()
        bsObj=BeautifulSoup(html,'lxml')
        div_reviews=bsObj.find_all("div",{"class":"d15Mdf bAhLNe"})
        
        
        company_list,grade_list,date_list,content_list=[],[],[],[]
        for div in div_reviews:
            
            

            date_=div.find('span',{"class":"p2TkOb"}).get_text()
            t=re.findall(r"\d*\.\d+|\d+",date_)
            date='{0}-{1}-{2}'.format(t[0],t[1],t[2])
            year, month, day=int(t[0]), int(t[1]), int(t[2])
            dd=datetime(year,month,day)
            if((dd-start_date>=timedelta(days=0)) and (end_date-dd>=timedelta(days=0))):
                content=div.find('span',{'jsname':'bN97Pc'}).get_text()
                content=content.replace("전체 리뷰",'')
                content=re.sub('[^가-힣0-9a-zA-Z_!?@#%^&-=:;,\"\'<>\\s]','',content)
                content.encode('utf-8')
                grade=len(div.find_all('div',{'class':'vQHuPe bUWb7c'}))
                percentage,word=call_GRU_predict.GRU_predict(content)
                if(((percentage<maximum)and (percentage>minimum))) and (len(word)>6):
                    date_list.append(dd)
                    content_list.append(content)
                    grade_list.append(grade)
                    company_list.append(key)
                else:
                    continue
        grade_Series=pd.Series(grade_list)
        date_Series=pd.Series(date_list)
        content_Series=pd.Series(content_list)
                #
        company_Series=pd.Series(company_list)
        data_frame=pd.DataFrame()
        data_frame['company']=company_Series
        data_frame['date']=date_Series
        data_frame['grade']=grade_Series
        data_frame['content']=content_Series
        
        good_data=data_frame[data_frame['grade']>2]
        bad_data=data_frame[data_frame['grade']<3]
        
        writer=pd.ExcelWriter("C:/data/thisweek_classified.xlsx")
        if not os.path.exists(writer):
            good_data.to_excel(writer, sheet_name='good',header=True)
            bad_data.to_excel(writer,sheet_name='bad',header=True)
        else:
            bad_data.to_excel(writer, sheet_name='bad',header=False)
            good_data.to_excel(writer,sheet_name='good',header=False)
        writer.save()
        if not os.path.exists('C:\\Data\\thisweek_classified.csv'):
            data_frame.to_csv('C:\\Data\\thisweek_classified.csv',index=False, mode='w',encoding='utf_8_sig')
        else:
            data_frame.to_csv('C:\\Data\\thisweek_classified.csv',index=False,mode='a',encoding='utf_8_sig',header=False)
    return data_frame
        
    #location='C:\\Data\\thisweek_classified.csv'
   
            
        
def classified_all_data():
    url_dict=call_url.call_url() #url리턴 함수
      #시작일 종료일 지정
    load=call_all_company.weekly_all_classified_data(url_dict)
    return load

 

 

2. 감성분석 엔진을 통해 분류되지 않은 전체 데이터

 

def weekly_all_unclassified_data(dict):
    start_date,end_date=start_to_date(),end_to_date()
    for key,value in dict.items():
        driver = webdriver.Chrome('C:\\Users\\LeeKwanHyeong\\chromedriver_win32\\chromedriver.exe',chrome_options=options)
        driver.implicitly_wait(2)
        driver.get(value)
        # sort 선택창
        elem = driver.find_element_by_xpath("//span[@class='DPvwYc']")
        elem.click()
        time.sleep(3)
        pyautogui.press('up')
        time.sleep(0.7)
        pyautogui.press('up')
        time.sleep(0.7)
        pyautogui.press('enter')
        
        while(True):
            driver.execute_script("window.scrollTo([0],document.body.scrollHeight);")
            time.sleep(0.5)
            try:
                element=driver.find_element_by_xpath('//div[@class="U26fgb O0WRkf oG5Srb C0oVfc n9lfJ"]')
                if(element is not None):
                    element.click()
                    break
            except Exception:
                continue

        html=driver.page_source
        driver.quit()
        bsObj=BeautifulSoup(html,'lxml')
        div_reviews=bsObj.find_all("div",{"class":"d15Mdf bAhLNe"})
        
        
        company_list,grade_list,date_list,content_list=[],[],[],[]
        for div in div_reviews:
            
            

            date_=div.find('span',{"class":"p2TkOb"}).get_text()
            t=re.findall(r"\d*\.\d+|\d+",date_)
            date='{0}-{1}-{2}'.format(t[0],t[1],t[2])
            year, month, day=int(t[0]), int(t[1]), int(t[2])
            dd=datetime(year,month,day)
            if((dd-start_date>=timedelta(days=0)) and (end_date-dd>=timedelta(days=0))):
                content=div.find('span',{'jsname':'bN97Pc'}).get_text()
                content=content.replace("전체 리뷰",'')
                content=re.sub('[^가-힣0-9a-zA-Z_!?@#%^&-=:;,\"\'<>\\s]','',content)
                content.encode('utf-8')
                grade=len(div.find_all('div',{'class':'vQHuPe bUWb7c'}))
                percentage,word=call_GRU_predict.GRU_predict(content)
               
                date_list.append(dd)
                content_list.append(content)
                grade_list.append(grade)
                company_list.append(key)
                
        grade_Series=pd.Series(grade_list)
        date_Series=pd.Series(date_list)
        content_Series=pd.Series(content_list)
                #
        company_Series=pd.Series(company_list)
        data_frame=pd.DataFrame()
        data_frame['company']=company_Series
        data_frame['date']=date_Series
        data_frame['grade']=grade_Series
        data_frame['content']=content_Series
    good_data=data_frame[data_frame['grade']>2]
    bad_data=data_frame[data_frame['grade']<3]
        
    writer=pd.ExcelWriter("C:/data/thisweek_unclassified.xlsx")
    if not os.path.exists(writer):
        good_data.to_excel(writer,sheet_name='good',header=True)
        bad_data.to_excel(writer,sheet_name='bad',header=True)
    else:
        bad_data.to_excel(writer,sheet_name='bad',header=False)
        good_data.to_excel(writer,sheet_name='good',header=False)
    writer.save()
    if not os.path.exists('C:\\Data\\thisweek_unclassified.csv'):
            data_frame.to_csv('C:\\Data\\thisweek_unclassified.csv',index=False, mode='w',encoding='utf_8_sig')
    else:
        data_frame.to_csv('C:\\Data\\thisweek_unclassified.csv',index=False,mode='a',encoding='utf_8_sig',header=False)
    #all_data=pd.read_csv("C:\\Data\\thisweek_unclassified.csv")
    return data_frame
def none_classified_all_data():
    url_dict=call_url.call_url()
    load=call_all_company.weekly_all_unclassified_data(url_dict)
    return load

 

3. 감성분류를 통해 분류된 선택된 회사 데이터

def weekly_selected_classified_data(url,company):
    
    start_date,end_date=start_to_date(),end_to_date()
    maximum,minimum=score_input()
    
    driver = webdriver.Chrome('C:\\Users\\LeeKwanHyeong\\chromedriver_win32\\chromedriver.exe',chrome_options=options)
    driver.implicitly_wait(2)
    driver.get(url)
     # sort 선택창
    elem = driver.find_element_by_xpath("//span[@class='DPvwYc']")
    elem.click()
    time.sleep(3)
    pyautogui.press('up')
    time.sleep(0.7)
    pyautogui.press('up')
    time.sleep(0.7)
    pyautogui.press('enter')
    
        
    while(True):
        driver.execute_script("window.scrollTo([0],document.body.scrollHeight);")
        time.sleep(0.5)
        try:
            element=driver.find_element_by_xpath('//div[@class="U26fgb O0WRkf oG5Srb C0oVfc n9lfJ"]')
            if(element is not None):
                element.click()
                break
        except Exception:
            continue
    html=driver.page_source
    driver.quit()
    bsObj=BeautifulSoup(html,'lxml')
    div_reviews=bsObj.find_all("div",{"class":"d15Mdf bAhLNe"})
    
    
    #
    company_list,grade_list,date_list,content_list=[],[],[],[]
    
    for div in div_reviews:
        date_=div.find('span',{"class":"p2TkOb"}).get_text()
        t=re.findall(r"\d*\.\d+|\d+",date_)
        date='{0}-{1}-{2}'.format(t[0],t[1],t[2])
        year, month, day=int(t[0]), int(t[1]), int(t[2])
        dd=datetime(year,month,day)
        if((dd-start_date>=timedelta(days=0)) and (end_date-dd>=timedelta(days=0))):
                content=div.find('span',{'jsname':'bN97Pc'}).get_text()
                content=content.replace("전체 리뷰",'')
                content=re.sub('[^가-힣0-9a-zA-Z_!?@#%^&-=:;,\"\'<>\\s]','',content)
                content.encode('utf-8')
                grade=len(div.find_all('div',{'class':'vQHuPe bUWb7c'}))
                percentage,word=call_GRU_predict.GRU_predict(content)
                if(((percentage<maximum)and (percentage>minimum))) and (len(word)>6):
                    date_list.append(dd)
                    content_list.append(content)
                    grade_list.append(grade)
                    company_list.append(company)
                else:
                    continue
    grade_Series=pd.Series(grade_list)
    date_Series=pd.Series(date_list)
    content_Series=pd.Series(content_list)
    #
    company_Series=pd.Series(company_list)
    data_frame=pd.DataFrame()
    data_frame['company']=company_Series
    data_frame['date']=date_Series
    data_frame['grade']=grade_Series
    data_frame['content']=content_Series
    #
    good_data=data_frame[data_frame['grade']>2]
    bad_data=data_frame[data_frame['grade']<3]
    if not os.path.exists('C:\\Data\\thisweek_classified.csv'):
        data_frame.to_csv('C:\\Data\\thisweek_selected_classified.csv',index=False, mode='w',encoding='utf_8_sig')
    else:
        data_frame.to_csv('C:\\Data\\thisweek_selected_classified.csv',index=False,mode='a',encoding='utf_8_sig',header=False)
    
    writer=pd.ExcelWriter('C:/data/thisweek_selected_classified.xlsx')
    if not os.path.exists(writer):
        good_data.to_excel(writer,sheet_name='good',header=True)
        bad_data.to_excel(writer,sheet_name='bad',header=True)
    else:
        good_data.to_excel(writer,sheet_name='good',header=False)
        bad_data.to_excel(writer,sheet_name='bad',header=False)
    writer.save()
    return data_frame
    
    
def classified_selected_data():
    url,company=call_url.select_url()
    load=call_all_company.weekly_selected_classified_data(url,company)
    return load

 

 

4. 감성분류를 통해 분류되지 않은 선택된 회사 데이터

def weekly_selected_unclassified_data(url,company):
    start_date,end_date=start_to_date(),end_to_date()
    driver = webdriver.Chrome('C:\\Users\\LeeKwanHyeong\\chromedriver_win32\\chromedriver.exe',chrome_options=options)
    driver.implicitly_wait(2)
    driver.get(url)
     # sort 선택창
    elem = driver.find_element_by_xpath("//span[@class='DPvwYc']")
    elem.click()
    time.sleep(3)
    pyautogui.press('up')
    time.sleep(0.7)
    pyautogui.press('up')
    time.sleep(0.7)
    pyautogui.press('enter')
    
        
    while(True):
        driver.execute_script("window.scrollTo([0],document.body.scrollHeight);")
        time.sleep(0.5)
        try:
            element=driver.find_element_by_xpath('//div[@class="U26fgb O0WRkf oG5Srb C0oVfc n9lfJ"]')
            if(element is not None):
                element.click()
                break
        except Exception:
            continue
    html=driver.page_source
    driver.quit()
    bsObj=BeautifulSoup(html,'lxml')
    div_reviews=bsObj.find_all("div",{"class":"d15Mdf bAhLNe"})
    
    
    
    #
    company_list,grade_list,date_list,content_list=[],[],[],[]
    
    for div in div_reviews:
        date_=div.find('span',{"class":"p2TkOb"}).get_text()
        t=re.findall(r"\d*\.\d+|\d+",date_)
        date='{0}-{1}-{2}'.format(t[0],t[1],t[2])
        year, month, day=int(t[0]), int(t[1]), int(t[2])
        dd=datetime(year,month,day)
        if((dd-start_date>=timedelta(days=0)) and (end_date-dd>=timedelta(days=0))):
                content=div.find('span',{'jsname':'bN97Pc'}).get_text()
                content=content.replace("전체 리뷰",'')
                content=re.sub('[^가-힣0-9a-zA-Z_!?@#%^&-=:;,\"\'<>\\s]','',content)
                content.encode('utf-8')
                grade=len(div.find_all('div',{'class':'vQHuPe bUWb7c'}))
                percentage,word=call_GRU_predict.GRU_predict(content)
               
                date_list.append(dd)
                content_list.append(content)
                grade_list.append(grade)
                company_list.append(company)
    grade_Series=pd.Series(grade_list)
    date_Series=pd.Series(date_list)
    content_Series=pd.Series(content_list)
    #
    company_Series=pd.Series(company_list)
    data_frame=pd.DataFrame()
    data_frame['company']=company_Series
    data_frame['date']=date_Series
    data_frame['grade']=grade_Series
    data_frame['content']=content_Series
    #
    good_data=data_frame[data_frame['grade']>2]
    bad_data=data_frame[data_frame['grade']>3]
    
    writer=pd.ExcelWriter("C:/data/thisweek_selected_unclassified.xlsx")
    if not os.path.exists(writer):
        good_data.to_excel(writer,sheet_name='good',header=True)
        bad_data.to_excel(writer,sheet_name='bad',header=True)
    else:
        good_data.to_excel(writer,sheet_name='good',header=False)
        bad_data.to_excel(writer,sheet_name='bad',header=False)
    writer.save()
    if not os.path.exists('C:\\Data\\thisweek_selected_unclassified.csv'):
        data_frame.to_csv('C:\\Data\\thisweek_selected_unclassified.csv',index=False, mode='w',encoding='utf_8_sig')
    else:
        data_frame.to_csv('C:\\Data\\thisweek_selected_unclassified.csv',index=False,mode='a',encoding='utf_8_sig',header=False)
    
    
    return data_frame
    
    
    
    
def none_classified_selected_data():
    url,company=call_url.select_url()
    load=call_all_campany.weekly_selected_classified_data(url,company)
    return load

 

 

 

+ Recent posts