import sys, six
from django.db import IntegrityError
from django.utils.timezone import utc
from urlparse import urlparse
import json
from datetime import datetime
from email.utils import parsedate
from msgvis.apps.questions.models import Article, Question
from msgvis.apps.corpus.models import *
from msgvis.apps.enhance.models import set_message_sentiment
def create_an_user_from_json_obj(user_data, dataset_obj):
sender, created = Person.objects.get_or_create(dataset=dataset_obj,
original_id=user_data['id'])
if user_data.get('screen_name'):
sender.username = user_data['screen_name']
if user_data.get('name'):
sender.full_name = user_data['name']
if user_data.get('lang'):
sender.language = Language.objects.get_or_create(code=user_data['lang'])[0]
if user_data.get('friends_count'):
sender.friend_count = user_data['friends_count']
if user_data.get('followers_count'):
sender.follower_count = user_data['followers_count']
if user_data.get('statuses_count'):
sender.message_count = user_data['statuses_count']
if user_data.get('profile_image_url'):
sender.profile_image_url = user_data['profile_image_url']
sender.save()
return sender
[docs]def create_an_instance_from_json(json_str, dataset_obj):
"""
Given a dataset object, imports a tweet from json string into
the dataset.
"""
tweet_data = json.loads(json_str)
if tweet_data.get('lang'):
lang = tweet_data.get('lang')
if lang != "en":
return False
return get_or_create_a_tweet_from_json_obj(tweet_data, dataset_obj)
def get_or_create_language(code):
lang, created = Language.objects.get_or_create(code=code)
return lang
def get_or_create_timezone(name):
zone, created = Timezone.objects.get_or_create(name=name)
return zone
def get_or_create_messagetype(name):
mtype, created = MessageType.objects.get_or_create(name=name)
return mtype
def get_or_create_hashtag(hashtagblob):
ht, created = Hashtag.objects.get_or_create(text=hashtagblob['text'])
return ht
def get_or_create_url(urlblob):
urlparse_results = urlparse(urlblob['expanded_url'])
domain = urlparse_results.netloc
url, created = Url.objects.get_or_create(full_url=urlblob['expanded_url'],
domain=domain,
short_url=urlblob['url'])
return url
def get_or_create_media(mediablob):
media, created = Media.objects.get_or_create(type=mediablob['type'],
media_url=mediablob['media_url'])
return media
def handle_reply_to(status_id, user_id, screen_name, dataset_obj):
# update original tweet shared_count
tmp_tweet = {
'id': status_id,
'user': {
'id': user_id,
'screen_name': screen_name,
},
'in_reply_to_status_id': None
}
original_tweet = get_or_create_a_tweet_from_json_obj(tmp_tweet, dataset_obj)
if original_tweet is not None:
original_tweet.replied_to_count += 1
original_tweet.save()
original_tweet.sender.replied_to_count += 1
original_tweet.sender.save()
def handle_retweet(retweeted_status, dataset_obj):
# update original tweet shared_count
original_tweet = get_or_create_a_tweet_from_json_obj(retweeted_status, dataset_obj)
if original_tweet is not None:
original_tweet.shared_count += 1
original_tweet.save()
original_tweet.sender.shared_count += 1
original_tweet.sender.save()
def handle_entities(tweet, entities, dataset_obj):
# hashtags
if entities.get('hashtags') and len(entities['hashtags']) > 0:
tweet.contains_hashtag = True
for hashtag in entities['hashtags']:
tweet.hashtags.add(get_or_create_hashtag(hashtag))
# urls
if entities.get('urls') and len(entities['urls']) > 0:
tweet.contains_url = True
for url in entities['urls']:
tweet.urls.add(get_or_create_url(url))
# media
if entities.get('media') and len(entities['media']) > 0:
tweet.contains_media = True
for me in entities['media']:
tweet.media.add(get_or_create_media(me))
# user_mentions
if entities.get('user_mentions') and len(entities['user_mentions']) > 0:
tweet.contains_mention = True
for mention in entities['user_mentions']:
mention_obj = create_an_user_from_json_obj(mention, dataset_obj)
mention_obj.mentioned_count += 1
mention_obj.save()
tweet.mentions.add(mention_obj)
[docs]def load_research_questions_from_json(json_str):
"""
Load research questions from json string
"""
questions = json.loads(json_str)
for q in questions:
source = q['source']
article, created = Article.objects.get_or_create(title=source['title'],
defaults={'authors': source['authors'],
'year': source['year'],
'venue': source['venue'],
'link': source['link']})
question = Question(source=article, text=q['text'])
question.save()
for dim in q['dimensions']:
question.add_dimension(dim)
question.save()
return True