This Python script shows how the topics of e-petitions were extracted. Before approaching the topic modeling, we need to clean the texts of e-petitions.
Text cleaning
# defining the functions
import re
import string
import numpy as np
import pandas as pd
    
# Russian stopwords
# !pip install nltk
import nltk
nltk.download("stopwords")
from nltk.corpus import stopwords
russian_stopwords = stopwords.words("russian") # loading Russian stopwords
print(russian_stopwords)
def remove_stop_words(sentence):
    return ' '.join([word for word in sentence.split() if word not in russian_stopwords])
def remove_short(sentence):
    return ' '.join([word for word in sentence.split() if len(word) >= 3])
    
def remove_digits(sentence):
    return ' '.join([i for i in sentence.split() if not i.isdigit()])
    
def preprocess(all_texts):
    all_texts = list(map(lambda x: x.lower(), all_texts))#lowercase
    all_texts = list(map(lambda x: x.translate(str.maketrans('', '', string.punctuation)), all_texts))# remove punctuation
    all_texts = list(map(lambda x: x.strip(), all_texts))#remove extra spaces
    all_texts = list(map(lambda x: remove_stop_words(x), all_texts))# removing stopwords
    all_texts = list(map(lambda x: remove_digits(x), all_texts))#removing digits
    all_texts = list(map(lambda x: re.sub("[0-9]+", "", x), all_texts))#removing digits
    all_texts = list(map(lambda x: re.sub("[a-zA-Z]+", "", x), all_texts))#removing english
    all_texts = list(map(lambda x: re.sub("«", "", x), all_texts))#removing digits
    all_texts = list(map(lambda x: re.sub("»", "", x), all_texts))#removing english
    all_texts = list(map(lambda x: re.sub("  ", " ", x), all_texts))#removing english
    all_texts = list(map(lambda x: remove_short(x), all_texts))#removing short words
    return all_texts 
# the following function creates the objects needed for the topic modeling, i.e., the corpus and id2word
#!pip3 install rnnmorph
from rnnmorph.predictor import RNNMorphPredictor
predictor = RNNMorphPredictor(language="ru")
import gensim.corpora as corpora
def create_list_of_words(all_texts):
    all_texts= preprocess(all_texts)# applying the function of preprocessing to the text
    l_text2=[]
    for doc in all_texts:
          list1=doc.split(" ")
          l_text2.append(list1)
    
    # choosing only the nouns
    corp=[]
    for j in range(0,len(l_text2)):
        if len(l_text2[j])>1:
            forms = predictor.predict(l_text2[j])
            text=[]
            for i in range(len(forms)):
                if forms[i].pos=="NOUN":
                    text.append(forms[i].normal_form)
            corp.append(text)
        else:
            corp.append([''])
    
    # Create Dictionary
    id2word = corpora.Dictionary(corp)
    # Create Corpus
    texts = corp
    # Term Document Frequency
    corpus = [id2word.doc2bow(text) for text in texts]
    
    return corp,id2word,texts,corpus
#now, we can clean both datasets
#ROI
df_roi=pd.read_json('roi_data.json')
res1,id2word1,texts1,corpus1=create_list_of_words(df_roi.Text)
#Change.org
df_ch=pd.read_json('ch_data.json')
res2,id2word2,texts2,corpus2=create_list_of_words(df_ch.Text)
Choosing the correct number of topics
To choose the correct number of topics, we need to calculate the coherance values.
# defining the function
import gensim
from gensim.models import CoherenceModel
import os
from gensim.models.wrappers import LdaMallet
import time
import os
os.environ.update({'MALLET_HOME':r'C:/mallet-2.0.8/'})
mallet_path = r'C:/mallet-2.0.8/bin/mallet.bat'
#looking for the number of topics
def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
    """
    Compute c_v coherence for various number of topics
    Parameters:
    ----------
    dictionary : Gensim dictionary
    corpus : Gensim corpus
    texts : List of input texts
    limit : Max num of topics
    Returns:
    -------
    model_list : List of LDA topic models
    coherence_values : Coherence values corresponding to the LDA model with the respective number of topics
    """
    coherence_values = []
    
    for num_topics in range(start, limit, step):
        print(num_topics)
        start=time.time()
        model = gensim.models.wrappers.LdaMallet(mallet_path, corpus=corpus, num_topics=num_topics, id2word=id2word)
        coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
        end=time.time()
        coherence_values.append(coherencemodel.get_coherence())
    return coherence_values
#defining the function to build the graph of the coherence values
import matplotlib.pyplot as plt
def get_coh_v(id2word, corpus, texts, start, limit, step):
    # Getting the coherence numbers 
    
    #running the function
    coherence_value = compute_coherence_values(dictionary=id2word, corpus=corpus, texts=texts, start=start, limit=limit, step=step)
   
    
    # building the graph 
    x = range(start, limit, step)
    plt.plot(x, coherence_value)
    plt.xlabel("Num Topics")
    plt.ylabel("Coherence score")
    plt.legend(("coherence_values"), loc='best')
    
    plt.show()
#running the function
get_coh_v(id2word1,corpus1,texts1,1,20,1)
get_coh_v(id2word2,corpus2,texts2,1,20,1)
 

Topic modeling
Once we found the correct number of topics to be extracted, we can conduct LdaMallet topic modeling.
#defining the function to extract topics
def get_model_results(corpus,id2word):
    print('Enter the number of topics')
    x = int(input())
    model = gensim.models.wrappers.LdaMallet(mallet_path, corpus=corpus, num_topics=x, id2word=id2word)
    
    # Creating a df of topic words
    import pandas as pd
    topics = [[(term, round(wt, 3)) for term, wt in model.show_topic(n, topn=20)] for n in range(0, model.num_topics)]
    topics_df = pd.DataFrame([[term for term, wt in topic] for topic in topics], columns = ['Term'+str(i) for i in range(1, 21)], index=['Topic '+str(t) for t in range(1, model.num_topics+1)]).T
    topic_list=[]
    print("Give a title for the following topic")
    for item in range(0,x):
        print(list(topics_df.iloc[:,item]))
        y=input()
        topic_list.append(y)
    topics_df.columns =topic_list
    # Get the results 
    tm_results = model[corpus]
    df_new=pd.DataFrame(tm_results)
    for item in range(0,x):
        list1=df_new[item]
        new_list = list(map(lambda x: x[1], list1))
        df_new.loc[:,item]=new_list
    df_new.columns =topic_list
    df_new['main_topic']=df_new.idxmax(axis=1)
    
    return(df_new,topics_df)
The function allows to specify the number of topic and name categories one by one.
result_df1,top1=get_model_results(corpus1,id2word1)#the specified number of topics is 5
result_df2,top2=get_model_results(corpus2,id2word2)#the specified number of topics is 9
Now, we can attach the main topic column to the datasets and save the dfs.
df_roi['main_topic']=result_df1['main_topic']
df_ch['main_topic']=result_df2['main_topic']
df_roi.to_csv('df_roi.csv',encoding='utf-8-sig')
df_ch.to_csv('df_ch.csv',encoding='utf-8-sig')
We can also check if some words are missing in the ROI documents.
all_words1=texts1
all_words2=texts2
all_words_list1 = [item for item2 in all_words1 for item in item2]
all_words_list2 = [item for item2 in all_words2 for item in item2]
diff_words=np.setdiff1d(all_words_list2,all_words_list1).tolist()
word_in_doc=[]
for i in diff_words:
    n_doc=0
    for j in all_words2:
        if i in j:
            n_doc+=1
    word_in_doc.append(n_doc)
d = {'Word':diff_words,'In N documents':word_in_doc}
wk= pd.DataFrame(d)
wk=wk.sort_values(by=['In N documents'], ascending=False)
The next stages of the analysis were conducted in R.
Merging the datasets
#Loading the dataset with the information extracted 
#for the ROI petition-signing
roi<-read.csv("dataset_roi.csv")
roi<-roi[,c("ID",
            "votes",
            "main_topic")]
#Dataset containing the information about 
#the Change.org petition-signing
ch<-read.csv("dataset_ch.csv")
ch<-ch[,c("ID",
          "Number.of.signatures",
          "main_topic")]
#Datasets in regard to mobilisation on Twitter
#ROI petition-mentioning
tw_roi<-read.csv("twitter_roi.csv")
#subsetting the dataset
tw_roi<-tw_roi[,c("ID",
                  "Retweet",
                  "Reply",
                  "Likes",
                  "Quote",
                  "Followers",
                  "Friends")]
#Change.org petition-mentioning
tw_ch<-read.csv("twitter_ch.csv")
tw_ch<-tw_ch[,c("ID",
                "Retweet",
                "Reply",
                "Likes",
                "Quote",
                "Followers",
                "Friends")]
#Datasets in regard to mobilisation on VK
#ROI petition-mentioning
vk_roi<-read.csv("vk_roi.csv")
vk_roi<-vk_roi[,c("ID",
                  "Comment",
                  "Repost",
                  "View",
                  "Like",
                  "Followers",
                  "Friends",
                  "Groups")]
#Change.org petition-mentioning
vk_ch<-read.csv("vk_ch.csv")
vk_ch<-vk_ch[,c("ID",
                "Comment",
                "Repost",
                "View",
                "Like",
                "Followers",
                "Friends",
                "Groups")]
#Aggregating the data in the mobilisation datasets
#Summing up the number of mentions, views, etc.
#install.packages("dplyr",dependencies=T)
library(dplyr)
tw_roi<-tw_roi %>% 
  group_by(ID)%>%
  summarize(tw_retw=sum(Retweet,na.rm = T),
            tw_rep=sum(Reply,na.rm = T),
            tw_like=sum(Likes,na.rm = T),
            tw_q=sum(Quote,na.rm = T),
            tw_fol=sum(Followers,na.rm = T),
            tw_fr=sum(Friends,na.rm = T))
tw_ch<-tw_ch %>% 
  group_by(ID)%>%
  summarize(tw_retw=sum(Retweet,na.rm = T),
            tw_rep=sum(Reply,na.rm = T),
            tw_like=sum(Likes,na.rm = T),
            tw_q=sum(Quote,na.rm = T),
            tw_fol=sum(Followers,na.rm = T),
            tw_fr=sum(Friends,na.rm = T))
vk_roi<-vk_roi %>% 
  group_by(ID)%>%
  summarize(vk_com=sum(Comment,na.rm = T),
            vk_rep=sum(Repost,na.rm = T),
            vk_v=sum(View,na.rm = T),
            vk_like=sum(Like,na.rm = T),
            vk_fol=sum(Followers,na.rm = T),
            vk_fr=sum(Friends,na.rm = T),
            vk_gr=sum(Groups,na.rm = T))
vk_ch<-vk_ch %>% 
  group_by(ID)%>%
  summarize(vk_com=sum(Comment,na.rm = T),
            vk_rep=sum(Repost,na.rm = T),
            vk_v=sum(View,na.rm = T),
            vk_like=sum(Like,na.rm = T),
            vk_fol=sum(Followers,na.rm = T),
            vk_fr=sum(Friends,na.rm = T),
            vk_gr=sum(Groups,na.rm = T))
#Merging the datasets
roi<-left_join(roi,tw_roi)
roi<-left_join(roi,vk_roi)
ch<-left_join(ch,tw_ch)
ch<-left_join(ch,vk_ch)
#If the petition was not mentiones on SNSs, then the number of 
#mentions, views, etc. is 0
for (i in 4:length(names(roi))){
  roi[,names(roi)[i]]<-ifelse(is.na(roi[,names(roi)[i]]),0,
                              roi[,names(roi)[i]])}
for (i in 4:length(names(ch))){
  ch[,names(ch)[i]]<-ifelse(is.na(ch[,names(ch)[i]]),0,
                              ch[,names(ch)[i]])}
Dimensionality reduction
#Examining if some of the variables correlate
#Checking the Twitter variables
#In the ROI dataset
cor(roi[,c("tw_retw",
                "tw_rep",
                "tw_like",
                "tw_q",
                "tw_fol",
                "tw_fr")])
 
#In the Change.org dataset
cor(ch[,c("tw_retw",
                "tw_rep",
                "tw_like",
                "tw_q",
                "tw_fol",
                "tw_fr")])
 
All of the variables are highly correlated. In this analysis, however, the distinction between the variables should be drawn. Thus, quotes and replies are reduced to one variable, i.e., petition-mentioning, while the number of friends and followers comprise the reach of the Twitter recruiters.
# Confirmatory factor analysis
library(psych)
ch_tw_rep_q<-fa(as.matrix(ch[,c("tw_q","tw_rep")]),
           nfactors =1, 
           residuals = TRUE )
ch_tw_reach<-fa(as.matrix(ch[,c("tw_fr","tw_fol")]),
              nfactors =1, 
              residuals = TRUE )
roi_tw_rep_q<-fa(as.matrix(roi[,c("tw_q","tw_rep")]),
                nfactors =1, 
                residuals = TRUE )
roi_tw_reach<-fa(as.matrix(roi[,c("tw_fr","tw_fol")]),
                nfactors =1, 
                residuals = TRUE )
#loadings can be checked 
#loadings(roi_tw_reach)
#summary(roi_tw_reach$scores)
#Checking the VK variables
#In the Change.org dataset
cor(roi[,c("vk_com",
           "vk_rep",
           "vk_v",
           "vk_like",
           "vk_fol",
           "vk_fr",
           "vk_gr")])

#In the Change.org dataset
cor(ch[,c("vk_com",
           "vk_rep",
           "vk_v",
           "vk_like",
           "vk_fol",
           "vk_fr",
           "vk_gr")])
 Once again, many variables are correlated. The dimensionality is reduced as follows. Comments and replies are reduced to the petition-mentioning, while number of friends, followers, groups and, in regard to the ROI petition-mentioning, views comprise the reach of the VK recruiters.
Once again, many variables are correlated. The dimensionality is reduced as follows. Comments and replies are reduced to the petition-mentioning, while number of friends, followers, groups and, in regard to the ROI petition-mentioning, views comprise the reach of the VK recruiters.
roi_vk_com_rep<-fa(as.matrix(roi[,c("vk_com","vk_rep")]),
                 nfactors =1, 
                 residuals = TRUE )
ch_vk_com_rep<-fa(as.matrix(ch[,c("vk_com","vk_rep")]),
                   nfactors =1, 
                   residuals = TRUE )
roi_vk_reach<-fa(as.matrix(roi[,c("vk_v",
                                "vk_fol",
                                "vk_fr",
                                "vk_gr")]),
                   nfactors =1, 
                   residuals = TRUE )
ch_vk_reach<-fa(as.matrix(ch[,c("vk_v",
                                "vk_fr",
                                "vk_gr")]),
                 nfactors =1, 
                 residuals = TRUE )
#Adding new variables to the df
ch$tw_rep_q<-ch_tw_rep_q$scores
ch$tw_reach<-ch_tw_reach$scores
ch$vk_com_rep<-ch_vk_com_rep$scores
ch$vk_reach<-ch_vk_reach$scores
roi$tw_rep_q<-roi_tw_rep_q$scores
roi$tw_reach<-roi_tw_reach$scores
roi$vk_com_rep<-roi_vk_com_rep$scores
roi$vk_reach<-roi_vk_reach$scores
Regression analysis
#There are outliers in the data
#Thus, subsetting the dataset to 96% of the variation
names_var<-c("tw_rep_q",
             "tw_like",
             "tw_retw",
             "tw_reach",
             "vk_com_rep",
             "vk_like",
             "vk_reach")
for (i in names_var){
  print(summary(lm(Number.of.signatures~
              main_topic*scale(get(i)),
              ch[ch$Number.of.signatures<
                   quantile(ch$Number.of.signatures,0.96),])))
}
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * scale(get(i)), 
##     data = ch[ch$Number.of.signatures < quantile(ch$Number.of.signatures, 
##         0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3060.3  -469.2  -262.6   -29.2  6900.0 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                     548.023     76.756   7.140
## main_topicHealth                               -193.166    114.192  -1.692
## main_topicIdentity                             -223.848    105.944  -2.113
## main_topicInfrastructure_ecology               -216.875    105.958  -2.047
## main_topicInfrastructure_housing                155.962    103.464   1.507
## main_topicInfrastructure_roads                  260.245    122.268   2.128
## main_topicLegislature                          -181.066    127.284  -1.423
## main_topicNavalny                                 2.306    110.783   0.021
## main_topicPets                                  206.629    110.034   1.878
## scale(get(i))                                   186.624    101.714   1.835
## main_topicHealth:scale(get(i))                 -179.136    114.004  -1.571
## main_topicIdentity:scale(get(i))                536.529    157.316   3.411
## main_topicInfrastructure_ecology:scale(get(i))  195.986    197.884   0.990
## main_topicInfrastructure_housing:scale(get(i))   72.493    133.201   0.544
## main_topicInfrastructure_roads:scale(get(i))   1678.004    484.544   3.463
## main_topicLegislature:scale(get(i))             -53.918    255.300  -0.211
## main_topicNavalny:scale(get(i))                  32.363    110.002   0.294
## main_topicPets:scale(get(i))                    570.296    129.434   4.406
##                                                Pr(>|t|)    
## (Intercept)                                    1.45e-12 ***
## main_topicHealth                               0.090932 .  
## main_topicIdentity                             0.034775 *  
## main_topicInfrastructure_ecology               0.040852 *  
## main_topicInfrastructure_housing               0.131917    
## main_topicInfrastructure_roads                 0.033460 *  
## main_topicLegislature                          0.155078    
## main_topicNavalny                              0.983393    
## main_topicPets                                 0.060594 .  
## scale(get(i))                                  0.066736 .  
## main_topicHealth:scale(get(i))                 0.116321    
## main_topicIdentity:scale(get(i))               0.000666 ***
## main_topicInfrastructure_ecology:scale(get(i)) 0.322134    
## main_topicInfrastructure_housing:scale(get(i)) 0.586361    
## main_topicInfrastructure_roads:scale(get(i))   0.000549 ***
## main_topicLegislature:scale(get(i))            0.832764    
## main_topicNavalny:scale(get(i))                0.768646    
## main_topicPets:scale(get(i))                   1.13e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1014 on 1497 degrees of freedom
## Multiple R-squared:  0.1364,	Adjusted R-squared:  0.1266 
## F-statistic: 13.91 on 17 and 1497 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * scale(get(i)), 
##     data = ch[ch$Number.of.signatures < quantile(ch$Number.of.signatures, 
##         0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2151.6  -484.3  -272.7   -41.9  6899.6 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                      542.73      78.23   6.937
## main_topicHealth                                -189.81     116.53  -1.629
## main_topicIdentity                              -205.78     108.07  -1.904
## main_topicInfrastructure_ecology                -222.35     108.30  -2.053
## main_topicInfrastructure_housing                 174.80     105.64   1.655
## main_topicInfrastructure_roads                   539.81     147.43   3.661
## main_topicLegislature                            -57.81     145.30  -0.398
## main_topicNavalny                                  3.67     112.97   0.032
## main_topicPets                                   319.07     111.59   2.859
## scale(get(i))                                     32.86     124.39   0.264
## main_topicHealth:scale(get(i))                   -15.21     131.93  -0.115
## main_topicIdentity:scale(get(i))                 703.40     263.67   2.668
## main_topicInfrastructure_ecology:scale(get(i))   261.27     296.00   0.883
## main_topicInfrastructure_housing:scale(get(i))   522.00     219.63   2.377
## main_topicInfrastructure_roads:scale(get(i))    5972.92    1201.12   4.973
## main_topicLegislature:scale(get(i))             1886.81    1026.91   1.837
## main_topicNavalny:scale(get(i))                  159.19     129.87   1.226
## main_topicPets:scale(get(i))                    1022.44     199.17   5.133
##                                                Pr(>|t|)    
## (Intercept)                                    5.92e-12 ***
## main_topicHealth                                0.10354    
## main_topicIdentity                              0.05708 .  
## main_topicInfrastructure_ecology                0.04023 *  
## main_topicInfrastructure_housing                0.09821 .  
## main_topicInfrastructure_roads                  0.00026 ***
## main_topicLegislature                           0.69081    
## main_topicNavalny                               0.97409    
## main_topicPets                                  0.00430 ** 
## scale(get(i))                                   0.79171    
## main_topicHealth:scale(get(i))                  0.90821    
## main_topicIdentity:scale(get(i))                0.00772 ** 
## main_topicInfrastructure_ecology:scale(get(i))  0.37757    
## main_topicInfrastructure_housing:scale(get(i))  0.01759 *  
## main_topicInfrastructure_roads:scale(get(i))   7.36e-07 ***
## main_topicLegislature:scale(get(i))             0.06635 .  
## main_topicNavalny:scale(get(i))                 0.22048    
## main_topicPets:scale(get(i))                   3.22e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1034 on 1497 degrees of freedom
## Multiple R-squared:  0.1021,	Adjusted R-squared:  0.09192 
## F-statistic: 10.02 on 17 and 1497 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * scale(get(i)), 
##     data = ch[ch$Number.of.signatures < quantile(ch$Number.of.signatures, 
##         0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3130.8  -473.2  -276.7   -33.0  6891.1 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                     602.851     88.442   6.816
## main_topicHealth                               -220.871    124.414  -1.775
## main_topicIdentity                             -293.484    114.723  -2.558
## main_topicInfrastructure_ecology               -300.695    114.108  -2.635
## main_topicInfrastructure_housing                137.094    114.027   1.202
## main_topicInfrastructure_roads                  656.209    216.589   3.030
## main_topicLegislature                          -279.461    134.076  -2.084
## main_topicNavalny                                -4.903    119.099  -0.041
## main_topicPets                                  309.901    118.046   2.625
## scale(get(i))                                  1063.342    771.833   1.378
## main_topicHealth:scale(get(i))                 -483.809    912.978  -0.530
## main_topicIdentity:scale(get(i))               -690.456    775.923  -0.890
## main_topicInfrastructure_ecology:scale(get(i)) -961.139    776.393  -1.238
## main_topicInfrastructure_housing:scale(get(i)) -120.662    885.788  -0.136
## main_topicInfrastructure_roads:scale(get(i))   8893.252   2873.366   3.095
## main_topicLegislature:scale(get(i))            -939.263    772.411  -1.216
## main_topicNavalny:scale(get(i))                  28.252    799.168   0.035
## main_topicPets:scale(get(i))                   2724.272    861.274   3.163
##                                                Pr(>|t|)    
## (Intercept)                                    1.35e-11 ***
## main_topicHealth                                0.07605 .  
## main_topicIdentity                              0.01062 *  
## main_topicInfrastructure_ecology                0.00850 ** 
## main_topicInfrastructure_housing                0.22944    
## main_topicInfrastructure_roads                  0.00249 ** 
## main_topicLegislature                           0.03730 *  
## main_topicNavalny                               0.96717    
## main_topicPets                                  0.00875 ** 
## scale(get(i))                                   0.16851    
## main_topicHealth:scale(get(i))                  0.59624    
## main_topicIdentity:scale(get(i))                0.37369    
## main_topicInfrastructure_ecology:scale(get(i))  0.21593    
## main_topicInfrastructure_housing:scale(get(i))  0.89167    
## main_topicInfrastructure_roads:scale(get(i))    0.00200 ** 
## main_topicLegislature:scale(get(i))             0.22417    
## main_topicNavalny:scale(get(i))                 0.97180    
## main_topicPets:scale(get(i))                    0.00159 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1014 on 1497 degrees of freedom
## Multiple R-squared:  0.137,	Adjusted R-squared:  0.1272 
## F-statistic: 13.98 on 17 and 1497 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * scale(get(i)), 
##     data = ch[ch$Number.of.signatures < quantile(ch$Number.of.signatures, 
##         0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3296.6  -392.6  -234.6   -28.2  6736.3 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                       901.7      112.2   8.034
## main_topicHealth                                 -513.0      140.6  -3.650
## main_topicIdentity                               -565.6      131.7  -4.293
## main_topicInfrastructure_ecology                 -555.3      131.8  -4.212
## main_topicInfrastructure_housing                 -152.7      130.4  -1.171
## main_topicInfrastructure_roads                    736.3      193.3   3.809
## main_topicLegislature                            -530.3      147.0  -3.608
## main_topicNavalny                                -340.0      135.2  -2.515
## main_topicPets                                   -366.6      136.4  -2.688
## scale(get(i))                                    2019.7      481.6   4.194
## main_topicHealth:scale(get(i))                  -1765.9      525.9  -3.358
## main_topicIdentity:scale(get(i))                -1487.0      489.1  -3.040
## main_topicInfrastructure_ecology:scale(get(i))  -1591.3      497.7  -3.198
## main_topicInfrastructure_housing:scale(get(i))  -1465.0      494.6  -2.962
## main_topicInfrastructure_roads:scale(get(i))     3817.2      918.2   4.157
## main_topicLegislature:scale(get(i))             -1618.1      491.8  -3.290
## main_topicNavalny:scale(get(i))                 -1646.2      485.0  -3.394
## main_topicPets:scale(get(i))                    -1550.7      482.8  -3.212
##                                                Pr(>|t|)    
## (Intercept)                                    1.89e-15 ***
## main_topicHealth                               0.000271 ***
## main_topicIdentity                             1.87e-05 ***
## main_topicInfrastructure_ecology               2.68e-05 ***
## main_topicInfrastructure_housing               0.241633    
## main_topicInfrastructure_roads                 0.000145 ***
## main_topicLegislature                          0.000319 ***
## main_topicNavalny                              0.011996 *  
## main_topicPets                                 0.007265 ** 
## scale(get(i))                                  2.90e-05 ***
## main_topicHealth:scale(get(i))                 0.000806 ***
## main_topicIdentity:scale(get(i))               0.002403 ** 
## main_topicInfrastructure_ecology:scale(get(i)) 0.001414 ** 
## main_topicInfrastructure_housing:scale(get(i)) 0.003105 ** 
## main_topicInfrastructure_roads:scale(get(i))   3.40e-05 ***
## main_topicLegislature:scale(get(i))            0.001024 ** 
## main_topicNavalny:scale(get(i))                0.000706 ***
## main_topicPets:scale(get(i))                   0.001347 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 957.9 on 1497 degrees of freedom
## Multiple R-squared:   0.23,	Adjusted R-squared:  0.2213 
## F-statistic: 26.31 on 17 and 1497 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * scale(get(i)), 
##     data = ch[ch$Number.of.signatures < quantile(ch$Number.of.signatures, 
##         0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2696.8  -477.0  -266.2   -53.8  6904.9 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                      602.64      86.04   7.004
## main_topicHealth                                -247.65     122.20  -2.027
## main_topicIdentity                              -242.25     114.91  -2.108
## main_topicInfrastructure_ecology                -269.45     113.74  -2.369
## main_topicInfrastructure_housing                  82.43     111.86   0.737
## main_topicInfrastructure_roads                   -27.84     119.74  -0.233
## main_topicLegislature                           -245.34     134.29  -1.827
## main_topicNavalny                                -35.08     118.83  -0.295
## main_topicPets                                   274.87     117.59   2.338
## scale(get(i))                                    441.75     253.40   1.743
## main_topicHealth:scale(get(i))                  -264.50     269.57  -0.981
## main_topicIdentity:scale(get(i))                 -84.24     287.22  -0.293
## main_topicInfrastructure_ecology:scale(get(i))   -36.99     275.53  -0.134
## main_topicInfrastructure_housing:scale(get(i))  -329.05     259.84  -1.266
## main_topicInfrastructure_roads:scale(get(i))    -219.63     259.13  -0.848
## main_topicLegislature:scale(get(i))             -125.50     266.67  -0.471
## main_topicNavalny:scale(get(i))                 -299.05     261.88  -1.142
## main_topicPets:scale(get(i))                     160.46     274.12   0.585
##                                                Pr(>|t|)    
## (Intercept)                                    3.74e-12 ***
## main_topicHealth                                 0.0429 *  
## main_topicIdentity                               0.0352 *  
## main_topicInfrastructure_ecology                 0.0180 *  
## main_topicInfrastructure_housing                 0.4613    
## main_topicInfrastructure_roads                   0.8162    
## main_topicLegislature                            0.0679 .  
## main_topicNavalny                                0.7679    
## main_topicPets                                   0.0195 *  
## scale(get(i))                                    0.0815 .  
## main_topicHealth:scale(get(i))                   0.3267    
## main_topicIdentity:scale(get(i))                 0.7693    
## main_topicInfrastructure_ecology:scale(get(i))   0.8932    
## main_topicInfrastructure_housing:scale(get(i))   0.2056    
## main_topicInfrastructure_roads:scale(get(i))     0.3968    
## main_topicLegislature:scale(get(i))              0.6380    
## main_topicNavalny:scale(get(i))                  0.2537    
## main_topicPets:scale(get(i))                     0.5584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1041 on 1497 degrees of freedom
## Multiple R-squared:  0.09008,	Adjusted R-squared:  0.07974 
## F-statistic: 8.717 on 17 and 1497 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * scale(get(i)), 
##     data = ch[ch$Number.of.signatures < quantile(ch$Number.of.signatures, 
##         0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3223.6  -448.0  -247.6   -50.5  6908.5 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                      692.775     88.638   7.816
## main_topicHealth                                -336.151    123.578  -2.720
## main_topicIdentity                              -333.969    116.015  -2.879
## main_topicInfrastructure_ecology                -347.200    115.618  -3.003
## main_topicInfrastructure_housing                  -9.303    113.549  -0.082
## main_topicInfrastructure_roads                  -102.703    120.962  -0.849
## main_topicLegislature                           -321.350    135.398  -2.373
## main_topicNavalny                               -134.281    120.438  -1.115
## main_topicPets                                   188.216    119.123   1.580
## scale(get(i))                                   1161.980    323.127   3.596
## main_topicHealth:scale(get(i))                  -975.372    333.836  -2.922
## main_topicIdentity:scale(get(i))                -663.018    345.765  -1.918
## main_topicInfrastructure_ecology:scale(get(i))  -624.511    353.427  -1.767
## main_topicInfrastructure_housing:scale(get(i)) -1029.000    329.234  -3.125
## main_topicInfrastructure_roads:scale(get(i))    -789.713    332.838  -2.373
## main_topicLegislature:scale(get(i))             -724.378    338.192  -2.142
## main_topicNavalny:scale(get(i))                -1052.627    325.879  -3.230
## main_topicPets:scale(get(i))                    -563.763    344.385  -1.637
##                                                Pr(>|t|)    
## (Intercept)                                    1.02e-14 ***
## main_topicHealth                               0.006601 ** 
## main_topicIdentity                             0.004050 ** 
## main_topicInfrastructure_ecology               0.002718 ** 
## main_topicInfrastructure_housing               0.934717    
## main_topicInfrastructure_roads                 0.395988    
## main_topicLegislature                          0.017752 *  
## main_topicNavalny                              0.265053    
## main_topicPets                                 0.114317    
## scale(get(i))                                  0.000334 ***
## main_topicHealth:scale(get(i))                 0.003533 ** 
## main_topicIdentity:scale(get(i))               0.055360 .  
## main_topicInfrastructure_ecology:scale(get(i)) 0.077429 .  
## main_topicInfrastructure_housing:scale(get(i)) 0.001810 ** 
## main_topicInfrastructure_roads:scale(get(i))   0.017786 *  
## main_topicLegislature:scale(get(i))            0.032361 *  
## main_topicNavalny:scale(get(i))                0.001264 ** 
## main_topicPets:scale(get(i))                   0.101838    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1033 on 1497 degrees of freedom
## Multiple R-squared:  0.1041,	Adjusted R-squared:  0.09391 
## F-statistic: 10.23 on 17 and 1497 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * scale(get(i)), 
##     data = ch[ch$Number.of.signatures < quantile(ch$Number.of.signatures, 
##         0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4028.1  -416.5  -217.7   -50.4  6934.8 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                     655.908     80.530   8.145
## main_topicHealth                               -274.916    117.081  -2.348
## main_topicIdentity                             -292.009    108.964  -2.680
## main_topicInfrastructure_ecology               -340.075    108.106  -3.146
## main_topicInfrastructure_housing                  8.488    106.819   0.079
## main_topicInfrastructure_roads                  -80.684    114.156  -0.707
## main_topicLegislature                          -242.605    129.192  -1.878
## main_topicNavalny                               -95.311    113.406  -0.840
## main_topicPets                                  248.909    112.475   2.213
## scale(get(i))                                   914.066    197.621   4.625
## main_topicHealth:scale(get(i))                 -574.768    228.757  -2.513
## main_topicIdentity:scale(get(i))               -370.292    223.652  -1.656
## main_topicInfrastructure_ecology:scale(get(i)) -603.788    208.083  -2.902
## main_topicInfrastructure_housing:scale(get(i)) -764.946    203.390  -3.761
## main_topicInfrastructure_roads:scale(get(i))   -534.271    207.916  -2.570
## main_topicLegislature:scale(get(i))            -210.720    241.971  -0.871
## main_topicNavalny:scale(get(i))                -678.100    208.482  -3.253
## main_topicPets:scale(get(i))                   -336.511    228.914  -1.470
##                                                Pr(>|t|)    
## (Intercept)                                    7.92e-16 ***
## main_topicHealth                               0.019000 *  
## main_topicIdentity                             0.007446 ** 
## main_topicInfrastructure_ecology               0.001689 ** 
## main_topicInfrastructure_housing               0.936679    
## main_topicInfrastructure_roads                 0.479811    
## main_topicLegislature                          0.060593 .  
## main_topicNavalny                              0.400791    
## main_topicPets                                 0.027047 *  
## scale(get(i))                                  4.06e-06 ***
## main_topicHealth:scale(get(i))                 0.012090 *  
## main_topicIdentity:scale(get(i))               0.098000 .  
## main_topicInfrastructure_ecology:scale(get(i)) 0.003766 ** 
## main_topicInfrastructure_housing:scale(get(i)) 0.000176 ***
## main_topicInfrastructure_roads:scale(get(i))   0.010276 *  
## main_topicLegislature:scale(get(i))            0.383977    
## main_topicNavalny:scale(get(i))                0.001169 ** 
## main_topicPets:scale(get(i))                   0.141763    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1014 on 1497 degrees of freedom
## Multiple R-squared:  0.1366,	Adjusted R-squared:  0.1268 
## F-statistic: 13.93 on 17 and 1497 DF,  p-value: < 2.2e-16
for (i in names_var){
  print(summary(lm(votes~
              main_topic*scale(get(i)),
              roi[roi$votes<
                   quantile(roi$votes,0.96),])))
}
## 
## Call:
## lm(formula = votes ~ main_topic * scale(get(i)), data = roi[roi$votes < 
##     quantile(roi$votes, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -485.56 -241.34  -25.56  186.72 1089.17 
## 
## Coefficients: (3 not defined because of singularities)
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                              442.04      30.99  14.265  < 2e-16 ***
## main_topicInfrastructure                -127.24      38.23  -3.328 0.000936 ***
## main_topicLaw_criminal                  -206.33      59.24  -3.483 0.000538 ***
## main_topicLaw_weapon                     -24.37      45.39  -0.537 0.591551    
## main_topicSocial_support                  65.73      33.99   1.934 0.053677 .  
## scale(get(i))                            291.74     270.06   1.080 0.280525    
## main_topicInfrastructure:scale(get(i))  -260.01     270.35  -0.962 0.336622    
## main_topicLaw_criminal:scale(get(i))         NA         NA      NA       NA    
## main_topicLaw_weapon:scale(get(i))           NA         NA      NA       NA    
## main_topicSocial_support:scale(get(i))       NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 284.9 on 523 degrees of freedom
## Multiple R-squared:  0.08267,	Adjusted R-squared:  0.07215 
## F-statistic: 7.856 on 6 and 523 DF,  p-value: 4.076e-08
## 
## 
## Call:
## lm(formula = votes ~ main_topic * scale(get(i)), data = roi[roi$votes < 
##     quantile(roi$votes, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -524.88 -240.20  -25.39  181.91  962.39 
## 
## Coefficients: (1 not defined because of singularities)
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                              561.88      35.27  15.930  < 2e-16 ***
## main_topicInfrastructure                -246.84      41.48  -5.951 4.88e-09 ***
## main_topicLaw_criminal                  -181.11      57.93  -3.126  0.00187 ** 
## main_topicLaw_weapon                     -26.73     129.74  -0.206  0.83687    
## main_topicSocial_support                 -72.22      47.78  -1.511  0.13131    
## scale(get(i))                           2387.10     439.29   5.434 8.48e-08 ***
## main_topicInfrastructure:scale(get(i)) -2355.33     439.46  -5.360 1.26e-07 ***
## main_topicLaw_criminal:scale(get(i))         NA         NA      NA       NA    
## main_topicLaw_weapon:scale(get(i))      -398.34    1847.32  -0.216  0.82936    
## main_topicSocial_support:scale(get(i)) -2354.30     566.00  -4.160 3.73e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 277.7 on 521 degrees of freedom
## Multiple R-squared:  0.1319,	Adjusted R-squared:  0.1186 
## F-statistic: 9.898 on 8 and 521 DF,  p-value: 7.361e-13
## 
## 
## Call:
## lm(formula = votes ~ main_topic * scale(get(i)), data = roi[roi$votes < 
##     quantile(roi$votes, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -548.38 -238.82  -25.02  181.01 1095.23 
## 
## Coefficients: (1 not defined because of singularities)
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              513.703     50.336  10.205  < 2e-16
## main_topicInfrastructure                -198.030     55.106  -3.594 0.000357
## main_topicLaw_criminal                  -200.268     59.346  -3.375 0.000794
## main_topicLaw_weapon                      -3.157    117.888  -0.027 0.978649
## main_topicSocial_support                 -16.473     61.411  -0.268 0.788618
## scale(get(i))                           1224.516    584.456   2.095 0.036641
## main_topicInfrastructure:scale(get(i)) -1201.488    584.590  -2.055 0.040351
## main_topicLaw_criminal:scale(get(i))          NA         NA      NA       NA
## main_topicLaw_weapon:scale(get(i))       189.395   1434.052   0.132 0.894980
## main_topicSocial_support:scale(get(i)) -1091.394    687.667  -1.587 0.113098
##                                           
## (Intercept)                            ***
## main_topicInfrastructure               ***
## main_topicLaw_criminal                 ***
## main_topicLaw_weapon                      
## main_topicSocial_support                  
## scale(get(i))                          *  
## main_topicInfrastructure:scale(get(i)) *  
## main_topicLaw_criminal:scale(get(i))      
## main_topicLaw_weapon:scale(get(i))        
## main_topicSocial_support:scale(get(i))    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 285.1 on 521 degrees of freedom
## Multiple R-squared:  0.08523,	Adjusted R-squared:  0.07118 
## F-statistic: 6.067 on 8 and 521 DF,  p-value: 1.769e-07
## 
## 
## Call:
## lm(formula = votes ~ main_topic * scale(get(i)), data = roi[roi$votes < 
##     quantile(roi$votes, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -470.94 -238.13  -15.59  173.62  975.35 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             422.756     23.578  17.930  < 2e-16 ***
## main_topicInfrastructure               -107.635     31.793  -3.386 0.000764 ***
## main_topicLaw_criminal                  -98.537    105.669  -0.933 0.351508    
## main_topicLaw_weapon                      9.275     54.227   0.171 0.864261    
## main_topicSocial_support                 71.105     32.558   2.184 0.029414 *  
## scale(get(i))                           327.094     51.538   6.347 4.79e-10 ***
## main_topicInfrastructure:scale(get(i)) -278.957     52.999  -5.263 2.07e-07 ***
## main_topicLaw_criminal:scale(get(i))    640.175    795.930   0.804 0.421585    
## main_topicLaw_weapon:scale(get(i))      -37.885    331.577  -0.114 0.909078    
## main_topicSocial_support:scale(get(i)) -162.550     89.920  -1.808 0.071228 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 271.9 on 520 degrees of freedom
## Multiple R-squared:  0.1694,	Adjusted R-squared:  0.155 
## F-statistic: 11.79 on 9 and 520 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = votes ~ main_topic * scale(get(i)), data = roi[roi$votes < 
##     quantile(roi$votes, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -597.01 -238.03  -21.33  186.33 1101.74 
## 
## Coefficients: (1 not defined because of singularities)
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                              426.46      24.57  17.354  < 2e-16 ***
## main_topicInfrastructure                -112.43      33.16  -3.390 0.000752 ***
## main_topicLaw_criminal                  -193.76      58.96  -3.286 0.001084 ** 
## main_topicLaw_weapon                     -25.54      44.81  -0.570 0.568937    
## main_topicSocial_support                  79.93      36.88   2.167 0.030657 *  
## scale(get(i))                            103.06      36.49   2.824 0.004923 ** 
## main_topicInfrastructure:scale(get(i))   -70.93      38.96  -1.821 0.069237 .  
## main_topicLaw_criminal:scale(get(i))         NA         NA      NA       NA    
## main_topicLaw_weapon:scale(get(i))       -11.81      66.90  -0.176 0.859982    
## main_topicSocial_support:scale(get(i))    48.20     124.50   0.387 0.698830    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 282.8 on 521 degrees of freedom
## Multiple R-squared:  0.09999,	Adjusted R-squared:  0.08617 
## F-statistic: 7.235 on 8 and 521 DF,  p-value: 4.04e-09
## 
## 
## Call:
## lm(formula = votes ~ main_topic * scale(get(i)), data = roi[roi$votes < 
##     quantile(roi$votes, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -613.13 -238.32  -19.93  185.84 1100.56 
## 
## Coefficients: (1 not defined because of singularities)
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                              419.53      24.49  17.131  < 2e-16 ***
## main_topicInfrastructure                -104.31      33.09  -3.152 0.001714 ** 
## main_topicLaw_criminal                  -194.94      58.80  -3.316 0.000978 ***
## main_topicLaw_weapon                     -16.90      44.70  -0.378 0.705566    
## main_topicSocial_support                 104.10      39.26   2.652 0.008252 ** 
## scale(get(i))                             53.43      15.65   3.414 0.000690 ***
## main_topicInfrastructure:scale(get(i))   -16.39      26.67  -0.614 0.539163    
## main_topicLaw_criminal:scale(get(i))         NA         NA      NA       NA    
## main_topicLaw_weapon:scale(get(i))        23.97      60.90   0.393 0.694114    
## main_topicSocial_support:scale(get(i))   212.52     149.50   1.422 0.155738    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 282.3 on 521 degrees of freedom
## Multiple R-squared:  0.1029,	Adjusted R-squared:  0.08911 
## F-statistic: 7.469 on 8 and 521 DF,  p-value: 1.893e-09
## 
## 
## Call:
## lm(formula = votes ~ main_topic * scale(get(i)), data = roi[roi$votes < 
##     quantile(roi$votes, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -617.14 -237.84  -24.67  183.59 1101.25 
## 
## Coefficients: (1 not defined because of singularities)
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                              417.37      24.51  17.025  < 2e-16 ***
## main_topicInfrastructure                -100.17      33.04  -3.032 0.002548 ** 
## main_topicLaw_criminal                  -194.25      58.81  -3.303 0.001021 ** 
## main_topicLaw_weapon                     -11.40      44.76  -0.255 0.799097    
## main_topicSocial_support                  87.25      36.25   2.407 0.016427 *  
## scale(get(i))                             53.68      15.57   3.448 0.000611 ***
## main_topicInfrastructure:scale(get(i))   -12.89      26.09  -0.494 0.621326    
## main_topicLaw_criminal:scale(get(i))         NA         NA      NA       NA    
## main_topicLaw_weapon:scale(get(i))        77.96      90.04   0.866 0.387025    
## main_topicSocial_support:scale(get(i))   114.04     132.14   0.863 0.388546    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 282.3 on 521 degrees of freedom
## Multiple R-squared:  0.1029,	Adjusted R-squared:  0.0891 
## F-statistic: 7.468 on 8 and 521 DF,  p-value: 1.9e-09
#Checking the influence of the topic alone
summary(lm(Number.of.signatures~
              main_topic,
              ch[ch$Number.of.signatures<
                   quantile(ch$Number.of.signatures,0.96),]))
summary(lm(votes~
              main_topic,
              roi[roi$votes<
                   quantile(roi$votes,0.96),]))
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic, data = ch[ch$Number.of.signatures < 
##     quantile(ch$Number.of.signatures, 0.96), ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -855.8 -507.1 -302.7  -55.7 6867.0 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        542.09      81.08   6.686 3.22e-11 ***
## main_topicHealth                  -187.08     120.68  -1.550  0.12130    
## main_topicIdentity                -217.39     111.96  -1.942  0.05236 .  
## main_topicInfrastructure_ecology  -237.39     111.28  -2.133  0.03307 *  
## main_topicInfrastructure_housing   155.89     109.31   1.426  0.15406    
## main_topicInfrastructure_roads      67.66     117.71   0.575  0.56553    
## main_topicLegislature             -183.53     133.61  -1.374  0.16977    
## main_topicNavalny                   36.10     116.94   0.309  0.75759    
## main_topicPets                     318.75     115.68   2.756  0.00593 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1073 on 1506 degrees of freedom
## Multiple R-squared:  0.02883,	Adjusted R-squared:  0.02367 
## F-statistic: 5.588 on 8 and 1506 DF,  p-value: 5.346e-07
## 
## Call:
## lm(formula = votes ~ main_topic, data = roi[roi$votes < quantile(roi$votes, 
##     0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -485.56 -242.57  -25.56  185.96 1089.17 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                421.83      24.84  16.983  < 2e-16 ***
## main_topicInfrastructure  -102.29      33.47  -3.056 0.002357 ** 
## main_topicLaw_criminal    -206.33      59.56  -3.464 0.000575 ***
## main_topicLaw_weapon       -18.92      45.35  -0.417 0.676666    
## main_topicSocial_support    65.73      34.17   1.923 0.054960 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 286.5 on 525 degrees of freedom
## Multiple R-squared:  0.06926,	Adjusted R-squared:  0.06217 
## F-statistic: 9.767 on 4 and 525 DF,  p-value: 1.26e-07
#Comparing the influence of the number of Twitter friends
#on the Change.org petition signing
summary(lm(Number.of.signatures~
              main_topic*tw_fr,
              ch[ch$Number.of.signatures<
                   quantile(ch$Number.of.signatures,0.96),]))
#and the number of VK friends
#on the Change.org petition signing
summary(lm(Number.of.signatures~
              main_topic*vk_fr,
              ch[ch$Number.of.signatures<
                   quantile(ch$Number.of.signatures,0.96),]))
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * tw_fr, data = ch[ch$Number.of.signatures < 
##     quantile(ch$Number.of.signatures, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3687.2  -363.3  -234.4   -24.2  6648.7 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             377.55344   73.87549   5.111 3.62e-07
## main_topicHealth                        -59.74203  109.31068  -0.547   0.5848
## main_topicIdentity                     -137.65147  100.88032  -1.365   0.1726
## main_topicInfrastructure_ecology       -119.97977  100.19433  -1.197   0.2313
## main_topicInfrastructure_housing        237.20795   98.58068   2.406   0.0162
## main_topicInfrastructure_roads            1.78452  108.35145   0.016   0.9869
## main_topicLegislature                   -90.26623  119.58832  -0.755   0.4505
## main_topicNavalny                       110.40663  105.10952   1.050   0.2937
## main_topicPets                           44.62777  107.75181   0.414   0.6788
## tw_fr                                     0.53752    0.07002   7.677 2.93e-14
## main_topicHealth:tw_fr                   -0.47566    0.07782  -6.113 1.25e-09
## main_topicIdentity:tw_fr                 -0.48023    0.07063  -6.800 1.51e-11
## main_topicInfrastructure_ecology:tw_fr   -0.49552    0.07094  -6.985 4.27e-12
## main_topicInfrastructure_housing:tw_fr   -0.45596    0.07120  -6.404 2.02e-10
## main_topicInfrastructure_roads:tw_fr      0.08256    0.10153   0.813   0.4163
## main_topicLegislature:tw_fr              -0.49771    0.07061  -7.048 2.75e-12
## main_topicNavalny:tw_fr                  -0.50185    0.07023  -7.146 1.39e-12
## main_topicPets:tw_fr                     -0.49122    0.07010  -7.008 3.65e-12
##                                           
## (Intercept)                            ***
## main_topicHealth                          
## main_topicIdentity                        
## main_topicInfrastructure_ecology          
## main_topicInfrastructure_housing       *  
## main_topicInfrastructure_roads            
## main_topicLegislature                     
## main_topicNavalny                         
## main_topicPets                            
## tw_fr                                  ***
## main_topicHealth:tw_fr                 ***
## main_topicIdentity:tw_fr               ***
## main_topicInfrastructure_ecology:tw_fr ***
## main_topicInfrastructure_housing:tw_fr ***
## main_topicInfrastructure_roads:tw_fr      
## main_topicLegislature:tw_fr            ***
## main_topicNavalny:tw_fr                ***
## main_topicPets:tw_fr                   ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 935.2 on 1497 degrees of freedom
## Multiple R-squared:  0.266,	Adjusted R-squared:  0.2577 
## F-statistic: 31.92 on 17 and 1497 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = Number.of.signatures ~ main_topic * vk_fr, data = ch[ch$Number.of.signatures < 
##     quantile(ch$Number.of.signatures, 0.96), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3047.5  -425.0  -242.4   -51.2  6948.0 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             4.259e+02  8.165e+01   5.216 2.09e-07
## main_topicHealth                       -1.300e+02  1.202e+02  -1.082 0.279515
## main_topicIdentity                     -1.765e+02  1.117e+02  -1.581 0.114200
## main_topicInfrastructure_ecology       -1.646e+02  1.111e+02  -1.481 0.138749
## main_topicInfrastructure_housing        1.675e+02  1.101e+02   1.522 0.128208
## main_topicInfrastructure_roads          1.040e+02  1.173e+02   0.886 0.375608
## main_topicLegislature                  -2.101e+02  1.334e+02  -1.575 0.115575
## main_topicNavalny                       9.076e+01  1.166e+02   0.779 0.436273
## main_topicPets                          3.841e+02  1.147e+02   3.348 0.000835
## vk_fr                                   2.992e-01  6.400e-02   4.674 3.21e-06
## main_topicHealth:vk_fr                 -1.391e-01  8.508e-02  -1.635 0.102237
## main_topicIdentity:vk_fr               -1.273e-01  7.587e-02  -1.677 0.093664
## main_topicInfrastructure_ecology:vk_fr -1.668e-01  8.463e-02  -1.971 0.048917
## main_topicInfrastructure_housing:vk_fr -2.038e-01  6.716e-02  -3.035 0.002444
## main_topicInfrastructure_roads:vk_fr   -1.782e-01  7.074e-02  -2.519 0.011859
## main_topicLegislature:vk_fr            -9.194e-03  8.492e-02  -0.108 0.913803
## main_topicNavalny:vk_fr                -2.279e-01  6.788e-02  -3.357 0.000807
## main_topicPets:vk_fr                   -2.154e-01  6.868e-02  -3.137 0.001742
##                                           
## (Intercept)                            ***
## main_topicHealth                          
## main_topicIdentity                        
## main_topicInfrastructure_ecology          
## main_topicInfrastructure_housing          
## main_topicInfrastructure_roads            
## main_topicLegislature                     
## main_topicNavalny                         
## main_topicPets                         ***
## vk_fr                                  ***
## main_topicHealth:vk_fr                    
## main_topicIdentity:vk_fr               .  
## main_topicInfrastructure_ecology:vk_fr *  
## main_topicInfrastructure_housing:vk_fr ** 
## main_topicInfrastructure_roads:vk_fr   *  
## main_topicLegislature:vk_fr               
## main_topicNavalny:vk_fr                ***
## main_topicPets:vk_fr                   ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1029 on 1497 degrees of freedom
## Multiple R-squared:  0.1117,	Adjusted R-squared:  0.1016 
## F-statistic: 11.07 on 17 and 1497 DF,  p-value: < 2.2e-16
Bayesian network analysis
Structure learning
Bayesian network structure learning includes 2 steps. The first step is to infer the structure using score-based and hybrid algorithms of structure learning. Here, 2 algorithms are used. Those are HC and H2PC. To learn the structure correctly all interactions between variables associates with the Twitter and VK mobilisation were excluded. Thus, interactions of the type tw_like -> vk_like or vk_reach -> tw_retw, etc. are excluded.
#Reading the blacklist
blacklist<-read.csv("blacklist.csv",header = F)
#Subsetting the datasets
ch_bn<-ch[,c("Number.of.signatures",
             "main_topic",
             "tw_retw",
             "tw_rep_q",
             "tw_like",
             "tw_reach", 
             "vk_com_rep", 
             "vk_like",
             "vk_reach")]
roi_bn<-roi[,c("votes",
               "main_topic",
               "tw_retw",
               "tw_rep_q",
               "tw_like",
               "tw_reach", 
               "vk_com_rep", 
               "vk_like",
               "vk_reach")]
#Applying BN structure learning
#All the learned structures are averaged
#install.packages("bnlearn",dependencies=T)
library(bnlearn)
str_ch1 = boot.strength (ch_bn, R = 5000 , 
                         algorithm = "hc",
                         algorithm.args = list(blacklist = blacklist))
avg_ch1 = averaged.network (str_ch1)
str_ch2 = boot.strength (ch_bn, R = 5000 , 
                         algorithm = "h2pc",
                         algorithm.args = list(blacklist = blacklist))
avg_ch2 = averaged.network (str_ch2)
str_roi1 = boot.strength(roi_bn, R = 5000 , 
                        algorithm = "hc",
                        algorithm.args = list(blacklist = blacklist))
avg_roi1 = averaged.network(str_roi1)
str_roi2 = boot.strength(roi_bn, R = 5000 , 
                        algorithm = "h2pc",
                        algorithm.args = list(blacklist = blacklist))
avg_roi2 = averaged.network(str_roi2)
The second step is to use structural equation modeling to check for the significance of the paths that are different in the structures learned by the score-based and hybrid algorithms. Thus, as a base structure (or the 0 model), we can use those links that were learned by both algorithms.
#SEM significance-of-the-paths testing in regard to participation
#in the CHange.org petition-signing
#subsetting the dataset
ch_sem<-ch[,1:16]
#renaming the "Number.of.signatures" variable into "votes"
names(ch_sem)[2]<-"votes"
#scaling all of the continuous variables
for (i in c(2,4:16)){
  ch_sem[,i]<-scale(ch_sem[,i])
}
#install.packages("lavaan",dependencies=T)
library(lavaan)
#defining the 0 model
mod_ch0 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach"
m_ch0 <- sem(mod_ch0, data = ch_sem )
After the 0 model was defined, we enrich the model by adding paths learned by one algorithm but not the other one by one. After each addition, we can test the significance of the path and choose the best-fitting model.
mod_ch1.1 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach
tw_like~main_topic+tw_reach+tw_rep_q
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch1.1 <- sem(mod_ch1.1, data = ch_sem )
mod_ch1.2 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch1.2 <- sem(mod_ch1.2, data = ch_sem )
# Comparing the fits
anova(m_ch0,m_ch1.1)
anova(m_ch0,m_ch1.2)
anova(m_ch1.1,m_ch1.2)
#m_ch1.1 has the best fit
## This is lavaan 0.6-7
## lavaan is BETA software! Please report any bugs.
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch0   54 29837 30105 3470.8                                  
## m_ch1.1 58 30136 30383 3777.9     307.14       4  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch0   54 29837 30105 3470.8                                  
## m_ch1.2 58 30254 30501 3895.6      424.8       4  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_ch1.1 58 30136 30383 3777.9                              
## m_ch1.2 58 30254 30501 3895.6     117.67       0
# adding links to the model with the best fit, i.e., m_ch1.1
mod_ch2.1 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach
tw_like~main_topic+tw_reach+tw_rep_q
vk_like~main_topic+vk_reach+vk_com_rep
vk_com_rep~main_topic+vk_reach
"
m_ch2.1 <- sem(mod_ch2.1, data = ch_sem )
mod_ch2.2 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach
tw_like~main_topic+tw_reach+tw_rep_q
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach+vk_like
"
m_ch2.2 <- sem(mod_ch2.2, data = ch_sem )
#Comparing the fits
anova(m_ch1.2,m_ch2.1)
anova(m_ch1.2,m_ch2.2)
anova(m_ch2.1,m_ch2.2)
#model m_ch1.2 has the best fit
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_ch1.2 58 30254 30501 3895.6                              
## m_ch2.1 61 30201 30432 3848.6    -46.973       3          1
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_ch1.2 58 30254 30501 3895.6                              
## m_ch2.2 61 30170 30401 3817.6    -78.014       3          1
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_ch2.1 61 30201 30432 3848.6                              
## m_ch2.2 61 30170 30401 3817.6    -31.042       0
#Adding the links to the model with the best fit, i.e., m_ch1.2
mod_ch3 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach+tw_rep_q
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch3 <- sem(mod_ch3, data = ch_sem )
anova(m_ch1.2,m_ch3)
#model m_ch1.2 has the best fit
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch1.2 58 30254 30501 3895.6                                  
## m_ch3   61 30304 30535 3951.9     56.296       3  3.633e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_ch4 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach+tw_rep_q
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch4 <- sem(mod_ch4, data = ch_sem )
anova(m_ch1.2,m_ch4)
#m_ch1.2 has the best fit
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch1.2 58 30254 30501 3895.6                                  
## m_ch4   61 30307 30538 3954.7      59.05       3  9.377e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_ch5 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_retw~main_topic+tw_reach+tw_like
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch5 <- sem(mod_ch5, data = ch_sem )
anova(m_ch1.2,m_ch5)
#m_ch1.2 has the best fit
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_ch5   57 30256 30508 3895.6                              
## m_ch1.2 58 30254 30501 3895.6  0.0024482       1     0.9605
mod_ch6 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch6 <- sem(mod_ch6, data = ch_sem )
anova(m_ch1.2,m_ch6)
#m6 has a slightly better fit
## Chi-Squared Difference Test
## 
##         Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)  
## m_ch6   57 30250 30502 3889.5                                
## m_ch1.2 58 30254 30501 3895.6     6.0751       1    0.01371 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_ch7 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_reach
tw_retw~main_topic+tw_reach+votes
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch7 <- sem(mod_ch7, data = ch_sem )
anova(m_ch6,m_ch7)
#m_ch6 has the best fit
## Chi-Squared Difference Test
## 
##       Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch6 57 30250 30502 3889.5                                  
## m_ch7 60 30277 30513 3922.9     33.407       3  2.643e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_ch8 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like+votes
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch8 <- sem(mod_ch8, data = ch_sem )
anova(m_ch6,m_ch8)
#m_ch6 has the best fit
## Chi-Squared Difference Test
## 
##       Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch6 57 30250 30502 3889.5                                  
## m_ch8 60 30296 30532 3941.2     51.639       3  3.576e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_ch9 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach+votes
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach
"
m_ch9 <- sem(mod_ch9, data = ch_sem )
anova(m_ch6,m_ch9)
#m_ch6 has the best fit
## Chi-Squared Difference Test
## 
##       Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_ch6 57 30250 30502 3889.5                              
## m_ch9 60 29888 30124 3533.3    -356.22       3          1
mod_ch10 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach+votes
"
m_ch10 <- sem(mod_ch10, data = ch_sem )
anova(m_ch6,m_ch10)
#m_ch6 has the best fit
## Chi-Squared Difference Test
## 
##        Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch6  57 30250 30502 3889.5                                  
## m_ch10 60 30305 30541 3950.7     61.144       3  3.349e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_ch11 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach
vk_com_rep~main_topic+vk_reach+vk_like
"
m_ch11 <- sem(mod_ch11, data = ch_sem )
mod_ch12 <- "
vk_reach =~ vk_v+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_reach
tw_retw~main_topic+tw_reach
tw_rep_q~main_topic+tw_reach+tw_like
tw_like~main_topic+tw_reach
vk_like~main_topic+vk_reach+vk_com_rep
vk_com_rep~main_topic+vk_reach
"
m_ch12 <- sem(mod_ch12, data = ch_sem )
anova(m_ch6,m_ch11)
anova(m_ch6,m_ch12)
#m_ch6 has the best fit
## Chi-Squared Difference Test
## 
##        Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)  
## m_ch6  57 30250 30502 3889.5                                
## m_ch11 60 30252 30488 3897.4      7.899       3    0.04815 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Chi-Squared Difference Test
## 
##        Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_ch6  57 30250 30502 3889.5                                  
## m_ch12 60 30268 30504 3913.2     23.694       3  2.894e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
At the end, the structure with the best fit is learned. In this case, it is model 6. It can be visualised as follows.
#install.packages("lavaanPlot",dependencies = T)
library(lavaanPlot)
lavaanPlot(model =m_ch6 , coefs =T, 
           edge_options =list(fontsize =14,color ="grey"),
           node_options = list ( fontsize =16) , stars =" regress ")
 
The same procedure is repeated for the ROI petition signing.
#SEM significance-of-the-paths testing in regard to participation
#in the ROI petition signing
#subsetting the dataset
roi_sem<-roi[,1:16]
#scaling all of the continuous variables
for (i in c(2,4:16)){
  roi_sem[,i]<-scale(roi_sem[,i])
}
mod_roi0 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_rep_q~tw_like+tw_retw
tw_like~tw_retw
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi0 <- sem(mod_roi0, data = roi_sem )
mod_roi1 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_rep_q~tw_like+tw_retw
tw_like~tw_retw
tw_retw~tw_reach
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi1 <- sem(mod_roi1, data = roi_sem )
anova(m_roi0,m_roi1)
#model 0 has a better fit
## Chi-Squared Difference Test
## 
##        Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_roi0 82 3363.3 3514.3 6788.8                              
## m_roi1 82 3591.8 3751.5 5445.6    -1343.3       0
mod_roi2 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_rep_q~tw_like+tw_retw
tw_like~tw_retw+tw_reach
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi2 <- sem(mod_roi2, data = roi_sem )
anova(m_roi0,m_roi2)
#m_roi2 has a better fit
## Chi-Squared Difference Test
## 
##        Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_roi2 81 3094.8 3250.2 6518.4                                  
## m_roi0 82 3363.3 3514.3 6788.8     270.48       1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_roi3 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_rep_q~tw_like+tw_retw+tw_reach
tw_like~tw_retw+tw_reach
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi3 <- sem(mod_roi3, data = roi_sem )
anova(m_roi2,m_roi3)
#m_roi3 has a better fit
## Chi-Squared Difference Test
## 
##        Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_roi3 80 2953.0 3112.7 6374.6                                  
## m_roi2 81 3094.8 3250.2 6518.4     143.76       1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_roi4 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach
tw_rep_q~tw_like+tw_retw+tw_reach
tw_like~tw_retw+tw_reach
tw_retw~votes
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi4 <- sem(mod_roi4, data = roi_sem )
anova(m_roi3,m_roi4)
#m_roi3 has a better fit
## Chi-Squared Difference Test
## 
##        Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)
## m_roi3 80 2953.0 3112.7 6374.6                              
## m_roi4 82 4258.6 4418.2 6112.3    -262.27       2          1
mod_roi5 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_com_rep
tw_rep_q~tw_like+tw_retw+tw_reach
tw_like~tw_retw+tw_reach
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi5 <- sem(mod_roi5, data = roi_sem )
anova(m_roi3,m_roi5)
#m_roi5 has a better fit
## Chi-Squared Difference Test
## 
##        Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_roi5 79 2931.9 3095.9 6351.5                                  
## m_roi3 80 2953.0 3112.7 6374.6     23.116       1  1.525e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_roi6 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_like
tw_rep_q~tw_like+tw_retw+tw_reach
tw_like~tw_retw+tw_reach
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi6 <- sem(mod_roi6, data = roi_sem )
anova(m_roi5,m_roi6)
#m_roi5 has a better fit
## Chi-Squared Difference Test
## 
##        Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_roi5 79 2931.9 3095.9 6351.5                                  
## m_roi6 81 2972.0 3127.3 6395.5     44.066       2  2.699e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_roi7 <- "
vk_reach =~ vk_v+vk_fol+vk_fr+vk_gr
tw_reach =~ tw_fr+tw_fol
vk_com_rep =~ vk_com+vk_rep
tw_rep_q=~ tw_q+tw_rep
votes ~ main_topic + tw_reach+vk_com_rep+vk_like
tw_rep_q~tw_like+tw_retw+tw_reach
tw_like~tw_retw+tw_reach
vk_like~vk_com_rep+vk_reach
vk_com_rep~vk_reach
"
m_roi7 <- sem(mod_roi7, data = roi_sem )
anova(m_roi5,m_roi7)
#m_roi5 has the best fit
## Chi-Squared Difference Test
## 
##        Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## m_roi5 79 2931.9 3095.9 6351.5                                  
## m_roi7 80 2972.8 3132.5 6394.4     42.894       1  5.778e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lavaanPlot(model =m_roi5 , coefs =T, 
           edge_options =list(fontsize =14,color ="grey"),
           node_options = list ( fontsize =16) , stars =" regress ")
 
Fitting the parameters
Once the structures are learned, we can fit BN parameters. First of all, we need to create empty graphs for both of the networks.
newdag_ch <- empty.graph(nodes =c("vk_reach",
                                  "tw_reach",
                                  "vk_com_rep",
                                  "tw_rep_q",
                                  "votes",
                                  "main_topic",
                                  "tw_retw",
                                  "vk_like",
                                  "tw_like")) # creating an empty DAG
newdag_roi <- empty.graph(nodes =c("vk_reach",
                                   "tw_reach",
                                   "vk_com_rep",
                                   "tw_rep_q",
                                   "votes",
                                   "main_topic",
                                   "tw_retw",
                                   "vk_like",
                                   "tw_like")) # creating an empty DAG
Then, we will set up all of the learned arcs.
newdag_ch<- set.arc(newdag_ch, from ="main_topic", to ="votes")
newdag_ch<- set.arc(newdag_ch, from ="tw_reach", to ="votes")
newdag_ch<- set.arc(newdag_ch, from ="vk_reach", to ="votes")
newdag_ch<- set.arc(newdag_ch, from ="main_topic", to ="tw_retw")
newdag_ch<- set.arc(newdag_ch, from ="tw_reach", to ="tw_retw")
newdag_ch<- set.arc(newdag_ch, from ="main_topic", to ="tw_like")
newdag_ch<- set.arc(newdag_ch, from ="tw_reach", to ="tw_like")
newdag_ch<- set.arc(newdag_ch, from ="main_topic", to ="tw_rep_q")
newdag_ch<- set.arc(newdag_ch, from ="tw_reach", to ="tw_rep_q")
newdag_ch<- set.arc(newdag_ch, from ="tw_like", to ="tw_rep_q")
newdag_ch<- set.arc(newdag_ch, from ="main_topic", to ="vk_like")
newdag_ch<- set.arc(newdag_ch, from ="vk_reach", to ="vk_like")
newdag_ch<- set.arc(newdag_ch, from ="main_topic", to ="vk_com_rep")
newdag_ch<- set.arc(newdag_ch, from ="vk_reach", to ="vk_com_rep")
newdag_roi<- set.arc(newdag_roi, from ="main_topic", to ="votes")
newdag_roi<- set.arc(newdag_roi, from ="tw_reach", to ="votes")
newdag_roi<- set.arc(newdag_roi, from ="vk_reach", to ="votes")
newdag_roi<- set.arc(newdag_roi, from ="tw_retw", to ="tw_rep_q")
newdag_roi<- set.arc(newdag_roi, from ="tw_reach", to ="tw_rep_q")
newdag_roi<- set.arc(newdag_roi, from ="tw_like", to ="tw_rep_q")
newdag_roi<- set.arc(newdag_roi, from ="tw_retw", to ="tw_like")
newdag_roi<- set.arc(newdag_roi, from ="tw_reach", to ="tw_like")
newdag_roi<- set.arc(newdag_roi, from ="vk_com_rep", to ="vk_like")
newdag_roi<- set.arc(newdag_roi, from ="vk_reach", to ="vk_like")
newdag_roi<- set.arc(newdag_roi, from ="vk_reach", to ="vk_com_rep")
# Subsetting the dfs
ch_fit_dat<-ch[,c("vk_reach","tw_reach",
                  "vk_com_rep","tw_rep_q",
                  "Number.of.signatures","main_topic",
                  "tw_retw",
                  "vk_like","tw_like")]
names(ch_fit_dat)[5]<-"votes"
roi_fit_dat<-roi[,c("vk_reach","tw_reach",
                    "vk_com_rep","tw_rep_q",
                    "votes","main_topic",
                    "tw_retw",
                    "vk_like","tw_like")]
After that, we can fit the parameters.
fit_ch<- bn.fit (newdag_ch , data = ch_fit_dat)
fit_roi<- bn.fit (newdag_roi , data = roi_fit_dat)
The results are represented by Fig 3, Table 4 and Table 5 of the main manuscript.