!pip install newsapi-python
from newsapi import NewsApiClient
from datetime import date, timedelta
phrase = ‘Apple stock’
newsapi = NewsApiClient(api_key=’your_news_api_key_here’)
my_date = date.today() — timedelta(days = 7)
articles = newsapi.get_everything(q=phrase,
from_param = my_date.isoformat(),
language="en",
sort_by="relevancy",
page_size = 5)
for article in articles['articles']:
print(article['title']+ ' | ' + article['publishedAt'] + ' | ' + article['url'])
Daily Crunch: Apple commits to carbon neutrality | 2020–07–21T22:10:52Z | http://techcrunch.com/2020/07/21/daily-crunch-apple-commits-to-carbon-neutrality/
Daily Crunch: Slack files antitrust complaint against Microsoft | 2020–07–22T22:16:02Z | http://techcrunch.com/2020/07/22/daily-crunch-slack-microsoft-antitrust/
Jamf ups its IPO range, now targets a valuation of up to $2.7B | 2020–07–20T17:04:25Z | http://techcrunch.com/2020/07/20/jamf-ups-its-ipo-range-now-targets-a-valuation-of-up-to-2-7b/
S&P 500 turns positive for 2020, but most stocks are missing the party — Reuters | 2020–07–21T19:45:00Z | https://www.reuters.com/article/us-usa-stocks-performance-idUSKCN24M2RD
Avoid Apple stock as uncertainties from coronavirus weigh on iPhone launch, Goldman Sachs says | 2020–07–23T13:50:13Z | https://www.businessinsider.com/apple-stock-price-rally-risk-coronavirus-iphone-delay-earnings-goldman-2020-7
!pip install newsapi-python
!pip install yfinance
import sys
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from newsapi import NewsApiClient
from datetime import date, timedelta, datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yfinance as yf
sia = SentimentIntensityAnalyzer()
nltk.download(‘vader_lexicon’)
pd.set_option(‘display.max_colwidth’,1000)
def get_sources(category = None):
newsapi = NewsApiClient(api_key='your api_key_here')
sources = newsapi.get_sources()
if category is not None:
rez = [source['id'] for source in sources['sources'] if source['category'] == category and source['language'] == 'en']
else:
rez = [source['id'] for source in sources['sources'] if source['language'] == 'en']
return rez
len(get_sources())
# Output
# 81
#Get the list of the business news sources
get_sources('business')
# Output
# ['australian-financial-review',
# 'bloomberg',
# 'business-insider',
# 'business-insider-uk',
# 'financial-post',
# 'fortune',
# 'the-wall-street-journal']
def get_articles_sentiments(keywrd, startd, sources_list = None, show_all_articles = False):
newsapi = NewsApiClient(api_key='your_api_key_here')
if type(startd)== str :
my_date = datetime.strptime(startd,'%d-%b-%Y')
else:
my_date = startd
#If the sources list is provided - use it
if sources_list:
articles = newsapi.get_everything(q = keywrd, from_param = my_date.isoformat(), to = (my_date + timedelta(days = 1)).isoformat(), language="en", sources = ",".join(sources_list), sort_by="relevancy", page_size = 100)
else:
articles = newsapi.get_everything(q = keywrd, from_param = my_date.isoformat(),to = (my_date + timedelta(days = 1)).isoformat(), language="en", sort_by="relevancy", page_size = 100)
article_content = ''
date_sentiments = {}
date_sentiments_list = []
seen = set()
for article in articles['articles']:
if str(article['title']) in seen:
continue
else:
seen.add(str(article['title']))
article_content = str(article['title']) + '. ' + str(article['description'])
#Get the sentiment score
sentiment = sia.polarity_scores(article_content)['compound']
date_sentiments.setdefault(my_date, []).append(sentiment)
date_sentiments_list.append((sentiment, article['url'], article['title'],article['description']))
date_sentiments_l = sorted(date_sentiments_list, key = lambda tup: tup[0],reverse = True)
sent_list = list(date_sentiments.values())[0]
#Return a dataframe with all sentiment scores and articles
return pd.DataFrame(date_sentiments_list, columns=['Sentiment','URL','Title','Description'])
return_articles = get_articles_sentiments(keywrd= 'stock', startd = '21-Jul-2020', sources_list = None, show_all_articles= True)
return_articles.Sentiment.hist(bins=30,grid=False)
print(return_articles.Sentiment.mean())
print(return_articles.Sentiment.count())
print(return_articles.Description)
return_articles.sort_values(by='Sentiment', ascending=True)[['Sentiment','URL']].head(2)
# Output:
# Sentiment URL
# 58 -0.9062 https://www.reuters.com/article/india-nepal-palmoil-idUSL3N2ES1Y3
# 59 -0.8360 https://in.reuters.com/article/volvocars-results-idINKCN24M1D7
return_articles.sort_values(by='Sentiment', ascending=True)[['Sentiment','URL']].tail(2)
# Output:
# Sentiment URL
# 37 0.9382 https://www.reuters.com/article/japan-stocks-midday-idUSL3N2ES06S
# 40 0.9559 https://www.marketwatch.com/story/best-buy-says-sales-are-better-during-pandemic-stock-heads-toward-all-time-high-2020-07-21
sources = get_sources('business')
return_articles = get_articles_sentiments('stock','21-Jul-2020',sources_list = sources, show_all_articles = True)
return_articles.Sentiment.hist(bins = 30, grid = False)
print(return_articles.Sentiment.mean())
print(return_articles.Sentiment.count())
print(return_articles.Description)
#Mean sentiment on 67 business articles
0.13
#Articles from the business sources
67
#Articles description examples
0 <ul>\n<li>Tesla CEO Elon Musk appears to have unlocked the second of his compensation goals on Tuesday. </li>\n<li>Despite a slight dip Tuesday, the company’s average market cap has been above $150 billion for long enough to unlock the second tranche of stock a…
1 <ul>\n<li>There’s a lot riding on Tesla’s second-quarter earnings report Wednesday afternoon.</li>\n<li>Analysts expect the company to post a $75 million loss for the three-month period ended June 31.</li>\n<li>Despite factory shutdowns and falling deliveries, t…
2 <ul>\n<li>Tesla reports its highly anticipated second-quarter earnings on Wednesday after market close. </li>\n<li>The report comes after the automaker’s second-quarter vehicle delivery numbers beat Wall Street expectations. </li>\n<li>Investors and analysts wil…
...
return_articles = get_articles_sentiments('stock','20-Jul-2020',show_all_articles=True)
return_articles.Sentiment.hist(bins = 30, grid = False)
return_articles.Sentiment.mean()
#Mean sentiment on 100 articles
0.22501616161616164
#FREE NewsAPI allows to retrieve only 1 month of news data
end_date = date.today()
start_date = date(year=end.year, month=end.month-1, day=end.day)
print('Start day = ', start_date)
print('End day = ', end_date)
current_day = start_date
business_sources = get_sources('business')
sentiment_all_score = []
sentiment_business_score = []
dates=[]
while current_day <= end_date:
dates.append(current_day)
sentiments_all = get_articles_sentiments(keywrd= 'stock' ,
startd = current_day, sources_list = None, show_all_articles= True)
sentiment_all_score.append(sentiments_all.mean())
sentiments_business = get_articles_sentiments(keywrd= 'stock' , startd = current_day, sources_list = business_sources, show_all_articles= True)
sentiment_business_score.append(sentiments_business.mean())
current_day = current_day + timedelta(days=1)
sentiments = pd.DataFrame([dates,np.array(sentiment_all_score),np.array(sentiment_business_score)]).transpose()
sentiments.columns =['Date','All_sources_sentiment','Business_sources_sentiment']
sentiments['Date'] = pd.to_datetime(sentiments['Date'])
sentiments['All_sources_sentiment'] = sentiments['All_sources_sentiment'].astype(float)
sentiments['Business_sources_sentiment'] = sentiments['Business_sources_sentiment'].astype(float)
sentiments.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 31 non-null datetime64[ns]
1 All_sources_sentiment 31 non-null float64
2 Business_sources_sentiment 31 non-null float64
dtypes: datetime64[ns](1), float64(2)
memory usage: 872.0 bytes
sentiments.set_index("Date", inplace=True)
sentiments.head()
Date All_sources_sentiment Business_sources_sentiment
2020-06-21 0.209889 0.111956
2020-06-22 0.219228 0.155876
2020-06-23 0.115508 0.102921
2020-06-24 0.084642 0.017751
2020-06-25 0.155524 0.005206
import pandas_datareader.data as pdr
end = date.today()
start = datetime(year=end.year, month=end.month-1, day=end.day)
print(f'Period 1 month until today: {start} to {end} ')
Period 1 month until today: 2020-06-21 00:00:00 to 2020-07-21
spx_index = pdr.get_data_stooq(‘^SPX’, start, end)
spx_index.index
DatetimeIndex([‘2020–07–21’, ‘2020–07–20’, ‘2020–07–17’, ‘2020–07–16’, ‘2020–07–15’, ‘2020–07–14’, ‘2020–07–13’, ‘2020–07–10’, ‘2020–07–09’, ‘2020–07–08’, ‘2020–07–07’, ‘2020–07–06’, ‘2020–07–02’, ‘2020–07–01’, ‘2020–06–30’, ‘2020–06–29’, ‘2020–06–26’, ‘2020–06–25’, ‘2020–06–24’, ‘2020–06–23’, ‘2020–06–22’], dtype=’datetime64[ns]’, name=’Date’, freq=None)
sentiments_vs_snp = sentiments.join(spx_index['Close']).dropna()
sentiments_vs_snp.rename(columns={'Close':'s&p500_close'}, inplace=True)
sentiments_vs_snp.head()
Date All_sources_sentiment Business_sources_sentiment s&p500_close
2020-06-22 0.219228 0.155876 3117.86
2020-06-23 0.115508 0.102921 3131.29
2020-06-24 0.084642 0.017751 3050.33
2020-06-25 0.155524 0.005206 3083.76
2020-06-26 0.124339 0.008645 3009.05
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(13.0,8.0)})
ax=sns.lineplot(data=sentiments_vs_snp['s&p500_close'], color="b",label='S&P500 Close price')
ax2 = plt.twinx()
sns.lineplot(data=sentiments_vs_snp["All_sources_sentiment"], color="g", ax=ax2, label='All sources sentiment')
sns.set(rc={'figure.figsize':(13.0,8.0)})
ax=sns.lineplot(data=sentiments_vs_snp['s&p500_close'], color="b", label='S&P500 Close price')
ax2 = plt.twinx()
sns.lineplot(data=sentiments_vs_snp["Business_sources_sentiment"], color="g", ax=ax2, label='Business_sources_sentiment')