Off-The-record

מה מעסיק את חבריי הכנסת שלנו ומה דעותיהם בתחומים שונים, דרך המילים בהן הם בוחרים להשתמש ברשתות החברתיות
The 2.txt file for the search was not created. #31

Closed nir-jackson closed 9 years ago

nir-jackson commented 9 years ago


the script didn't create the 2.txt file that holds all the words with 2 or less letters. please fix this.

AlexGr2 commented 9 years ago

I can't find it in the C# script, you have more scripts that creates it?

the script didn't create the 2.txt file that holds all the words with 2 or less letters. please fix this.

nir-jackson commented 9 years ago

add bool first = true; before all of the loops

then add:

if (first){ word = "2"; first=false; } after this line: word = alpha[num] + alpha[num2] + alpha[num3]

nir-jackson commented 9 years ago

this won't create the אאא file (which is empty anyway) and will create the 2 file instead

AlexGr2 commented 9 years ago

2.txt creation added

Reopened #31

nir-jackson commented 9 years ago

@AlexGr2 just one thing, the third for loop needs to be from 0 to 26 (not 23). all the updated scripts (including the potilics.txt without 66398526339)

nir-jackson commented 9 years ago

*please send me all the updated scripts (including the potilics.txt without 66398526339)

AlexGr2 commented 9 years ago

here all the update scripts with with no word check per count, and with 2.txt potilitcs.txt with no 66398526339

*please send me all the updated scripts (including the potilics.txt without 66398526339)

!/usr/bin/env python

-- coding: utf-8 --

import urllib, json import collections from collections import Counter import operator import sys, os import codecs author = 'AlexGruber'

threeLetterWordsPath = 'testFolder' perDateCountPath = 'WholeDates' allWordsPath = 'wordAllCount' rootPath = "c:/"

wordCountPerPolitics = collections.defaultdict(int) wordIdPerPolitics = collections.defaultdict(list) politicsDictWords = collections.defaultdict(dict) politicsDictId = collections.defaultdict(dict) dateDict = collections.OrderedDict() dateMutliArray = []

list_NextUrls = list() buffer_list = list() buffer_string = "" allPostsIDsDict ={} wordIdDict = collections.defaultdict(list) politicDictWordsCount={} politicDictMilitaryCount = {} politicDictSahbekimCount = {} politicDictNarcistCount = {} politicDictKalkalaCount = {} politicDictRahlanCount = {} tempdict={} errorCount = 0

testlist = list() threeLetterList = list()

stuctList = list()

threeLetterDict = collections.defaultdict(list)

dateDictForThreeLetter = {}

avoidWords = [u'ישראל', u'כל', u'את', u'של', u'זה', u'על', u'או', u'גם', u'אז', u'רק', 'and', 'the', 'of', u'עד', u'אשר', u'כי', u'אם', \ 'in', 'to', 'a', 'that' ,'is', 'for', 'with', 'are', 'this', 'have', 'The', 'on', u'-', 'it', 'from', 'a' , \ 'at', 'as', u'היא', u'אני', u'לא', u'עם', u'הוא', u'•', u'(', u')', u' ', u'', u"" , "", " " \ u':', u'"', u',' ,u'–', u'?', u'!', u'.', u'', u'', u'']

dateDict = collections.OrderedDict() allPostsIDsDict ={} wordIdDict = collections.defaultdict(list) tempdict={} errorCount = 0

def createBuffer_Posts(url): list = "" response = urllib.urlopen(url) data = json.loads( try : parent = data["posts"]["data"] for index in range(len(parent)): try: allPostsIDsDict[parent[index]["id"]] = [parent[index]["message"] , parent[index]["created_time"]] except: errorCount=1 except: print "no parent data"

return list

def perDateCount(politicFileName):

tempDate = ""
tempIdList = list()
runOnce = True
obj = []
print "Counting Words"
print len(allPostsIDsDict)

tempJson = {"Dates" :{}}
for date, msg in dateDict.iteritems() :
    print date
    Constdate = date.split("T")[0].split('-')[0] + ":" + date.split("T")[0].split('-')[1]
    if runOnce == True:
        tempDate = Constdate
        runOnce = False
    if tempDate == Constdate:
            tempWordsList = msg[0].split()
            errorCount = 1

        for word in tempWordsList:
            # if not word in avoidWords:
            word = word.replace(u'.', u"")
            word = word.replace(u'!', u"")
            word = word.replace(u'?', u"")
            word = word.replace(u'-', u"")
            word = word.replace(u',', u"")
            word = word.replace(u'"', u"")
            word = word.replace(u':', u"")
            word = word.replace(u'(', u"")
            word = word.replace(u')', u"")
            word = word.replace(u'*', u"")

            if not word in avoidWords:

                     errorCount = 1
            JsonTempDate = tempDate
            tempDate = Constdate
            mostCommon = Counter(tempBuildString).most_common(10)
            printHeb = dict(mostCommon)
            sorted_x = sorted(printHeb.items(), key = operator.itemgetter(1), reverse=True)
            for key, value in sorted_x:
                  if wordIdDict.has_key(key):
                     obj.append({"Word" : key ,"Amount" : str(value), "array_id" : list(set(wordIdDict[key]))})
            del tempBuildString[:]
            del tempIdList[:]
            del tempWordsList[:]

# with open('c:\\WholeDates\\' + politicFileName + '.txt', 'w') as outfile:
#                     json.dump(tempJson, outfile, indent=4)
del tempBuildString[:]
del tempIdList[:]
del tempWordsList[:]
print "finished counting " + politicFileName

def AllwordCounter(politicFileName):

# print "Counting Words"
# print len(allPostsIDsDict)

for keyDict, valueDict in allPostsIDsDict.iteritems() :
    tempWordsList = valueDict[0].split()
    id = []
    for word in tempWordsList:
        if not word in avoidWords:
            word = word.replace(u'.', u"")
            word = word.replace(u'!', u"")
            word = word.replace(u'?', u"")
            word = word.replace(u'-', u"")
            word = word.replace(u',', u"")
            word = word.replace(u'"', u"")
            word = word.replace(u':', u"")
            word = word.replace(u'(', u"")
            word = word.replace(u')', u"")
            word = word.replace(u'*', u"")
            # word = word.replace(u'\'', u"")
            wordCountPerPolitics[word] += 1
            dateDictForThreeLetter[word] = valueDict[1]

politicsDictWords[politicFileName] = wordCountPerPolitics
politicsDictId[politicFileName] = wordIdPerPolitics

for politic, dictOfWords in politicsDictWords.iteritems() :
    # print politic
    tempJson = {politicFileName:[],}
    for word, count in dictOfWords.iteritems() :
        # print word[1]
        tempJson[politicFileName].append({word : count})
        if wordIdPerPolitics.has_key(word):
            if dateDictForThreeLetter.has_key(word):
                splitAllWordsByThreeLetters(threeLetterList, word, count, wordIdPerPolitics[word], politic, threeLetterDict, wordIdPerPolitics[word], dateDictForThreeLetter[word])
# printToJsonTempDict(threeLetterDict, politicFileName)
with open(os.path.join(rootPath,allWordsPath, politicFileName + '.txt'), 'w') as outfile:
     json.dump(tempJson, outfile, indent=4)
print "finished counting " + politicFileName

def splitAllWordsByThreeLetters(listOfThreeLetters, word, count, wordIds, folderName, tempDictJson, ids, date): try: tempDictJson[word].append([{"Word" : word ,"Amount" : str(count),"date" : date.split('+')[0], "Array_id" : ids}]) except: print "Unexpected error:" , sys.exc_info()[0]

def printToJsonTempDict(tempDictJson, folderName): for keyDict, valueDict in tempDictJson.iteritems() :

print keyDict , valueDict

    newpath = os.path.join(rootPath,threeLetterWordsPath, folderName)
    if not os.path.exists(newpath): os.makedirs(newpath)
    with open(os.path.join(rootPath,threeLetterWordsPath,folderName, keyDict + '.txt'), 'a') as outfile:
         json.dump(tempDictJson[keyDict], outfile, indent=4)

tempNewWordsDict = {}

politicsArr = [line.strip() for line in open(os.path.join(os.path.dirname(os.path.abspath(file)), 'potilics.txt'))]

for politic in politicsArr: url = "" + politic + "?fields=posts.limit(200)%7Bmessage%7D&access_token=1719315164960950%7CnKFpk2SebwixsCQS3y7zQDPA1Ow" createBuffer_Posts(url) AllwordCounter(politic)

for key , word in threeLetterDict.iteritems() :
    # print word[0][0]['date']
    # print word[0][0]['Word'][:3]
    # print word[0][0]['Array_id']

    path = os.path.join(rootPath,threeLetterWordsPath, politic, word[0][0]['Word'][:3] + ".txt")
    if os.path.exists(path):
        with open(path, "r") as jsonFile:
            jsonToUpdate = json.load(jsonFile)
        found = False
        try :
            for index in range(0, len(jsonToUpdate)):
                # print word[0][0]['Word']
                if word[0][0]['Word'] == jsonToUpdate[index][0]["Word"]:
                    found = True
                    if word[0][0]['date'] > jsonToUpdate[index][0]["date"]:
                       jsonToUpdate[index][0]["date"] = word[0][0]['date']
                       jsonToUpdate[index][0]["Amount"] = int(jsonToUpdate[index][0]["Amount"]) + int(word[0][0]['Amount'])
                       jsonToUpdate[index][0]["Array_id"] = jsonToUpdate[index][0]["Array_id"] + word[0][0]["Array_id"]
                       with open(path, 'w') as UpdateJson:

            if found == False:
                # print word[0][0]['Word'], word[0][0]["date"], index ,politic , int(word[0][0]['Amount'])
                tempNewWordsDict[word[0][0]['Word']] = word[0][0]["date"] ,int(word[0][0]['Amount']), path , word[0][0]['Array_id']
             print "no parent data" , politic

for key , word in tempNewWordsDict.iteritems():
    # print key
    # print word[1]
    # print word[2]
    with open(word[2], 'r') as jsonToUpdate:
        json_data = json.load(jsonToUpdate)
        # json_data[0].append([{"Word" : key ,"Amount" : str(word[1]),"date" : word[0].split('+')[0]}])
        json_data.append([{"Word" : key ,"Amount" : str(word[1]),"date" : word[0].split('+')[0] , "Array_id": word[3]}])

    with open(word[2], 'w') as f:


print "DONE"

!/usr/bin/env python

-- coding: utf-8 --

import urllib, json import collections from collections import Counter import operator import sys, os import codecs author = 'AlexGruber'

perDateCountPath = 'WholeDates' allWordsPath = 'wordAllCount' threeLetterWordsPath = 'testFolder' listOfCheckedWordsPath = "WCheckedList" rootPath = "c:/"

politicsFile = '\ThreeLetterWords.txt'

wordCountPerPolitics = collections.defaultdict(int) wordIdPerPolitics = collections.defaultdict(list) politicsDictWords = collections.defaultdict(dict) politicsDictId = collections.defaultdict(dict) dateDict = collections.OrderedDict() dateMutliArray = []

list_NextUrls = list() buffer_list = list() buffer_string = "" allPostsIDsDict ={} wordIdDict = collections.defaultdict(list) politicDictWordsCount={} politicDictMilitaryCount = {} politicDictSahbekimCount = {} politicDictNarcistCount = {} politicDictKalkalaCount = {} politicDictRahlanCount = {} tempdict={} errorCount = 0

testlist = list() threeLetterList = list()

stuctList = list()

threeLetterDict = collections.defaultdict(list)

dateDictForThreeLetter = {}

avoidWords = [u'ישראל', u'כל', u'את', u'של', u'זה', u'על', u'או', u'גם', u'אז', u'רק', 'and', 'the', 'of', u'עד', u'אשר', u'כי', u'אם', \ 'in', 'to', 'a', 'that' ,'is', 'for', 'with', 'are', 'this', 'have', 'The', 'on', u'-', 'it', 'from', 'a' , \ 'at', 'as', u'היא', u'אני', u'לא', u'עם', u'הוא', u'•', u'(', u')', u' ', u'', u"" , "", " " \ u':', u'"', u',' ,u'–', u'?', u'!', u'.', u'', u'', u'']

militaryWords = u'צבא' , u'צה״ל', u'רמטכ״ל', u'נשק',u'אירן', u'אטום', u'ביטחון' ,\ u'עזה' , u'ג׳האד', u'מלחמה', u'מבצע', u'סכסוך', u'איראני', u'הביטחון', u'חיזבאללה', u'דאעש', u'קבינט', \ u'חמאס' , u'ג׳איסלאמית', u'טיל', u'קסאמים' , u'ג׳קסאם', u'ברזל'

sahbekimWords = u'אנחנו', u'יחד', u'ביחד', u'שלנו', u'כולנו', u'רובנו', u'רוב', u'עם', u'קבוצה', \ u'שיתוף', u'שותף', u'צוות'

narcistWords = u'אני', 'me', 'I'

kalkalaWords = u'כסף', u'כלכלה', u'עושר', u'עוני', u'העוני', u'קו העוני', u'תקציב', u'בורסה', u'מע״מ', u'מד״ד',u'מדד', \ u'העליון', u'העליון', u'מעמד', u'הביניים' ,u'משכורות', u'שכר', u'דירות', u'דיור', \ u'נדל״ן', u'קניה', u'מוצרים', u'מילקי', u'קוטג׳', u'ברלין' , u'תקציבים', u'מסים', u'מיסים', u'הכנסה'\ u'הכנסות', u'חשבת', u'החשבת',u'תוצר', u'יצוא', u'יבוא',u'תוצר', u'ייצוא', u'ייבוא'

avoidWordsRahlan = u'עו"ד', u'אנשים', u'ועשיה', u'איש', u'לבית', u'שאן', u'חזק', u'עיר', \ u'ראש', u'עיריית', u'ראשון', u'לציון', u'רצון', u'שאן', u'חזק', u'עיר'

def getTargetIds(jsonData, list): response = urllib.urlopen(jsonData) data = json.loads(

if 'next' not in data["posts"]["paging"]:

 if 'next' not in data["paging"]:
    raise ValueError("No data for target")
    raise SystemExit(0)
     # print data["paging"]["next"]
     getTargetIds(data["paging"]["next"] , list)
    print "END!!!!"

def createBuffer_Next(url): list = "" tempDate="" response = urllib.urlopen(url) data = json.loads( try : parent = data["data"] for index in range(len(parent)): try: dateDict[parent[index]["created_time"]] = [parent[index]["message"] , parent[index]["id"]] allPostsIDsDict[parent[index]["id"]] = [parent[index]["message"], parent[index]["created_time"]] tempdict[parent[index]["created_time"]] = parent[index]["message"] list += parent[index]["message"] except:

print "Unexpected error:" , sys.exc_info()[0]

    print "no parent data"
dateDict['6-6-6T6-6-6'] = ['end', 'end']
return list

def createBuffer_Posts(url): list = "" tempDate="" response = urllib.urlopen(url) data = json.loads( try : parent = data["posts"]["data"] for index in range(len(parent)): try: dateDict[parent[index]["created_time"]] = [parent[index]["message"] , parent[index]["id"]] allPostsIDsDict[parent[index]["id"]] = [parent[index]["message"], parent[index]["created_time"]] tempdict[parent[index]["created_time"]] = parent[index]["message"] list += parent[index]["message"] except: errorCount=1 except: print "no parent data"

return list

def perDateCount(politicFileName):

tempDate = ""
tempIdList = list()
runOnce = True
obj = []
print "Counting Words"
print len(allPostsIDsDict)

tempJson = {"Dates" :{}}
for date, msg in dateDict.iteritems() :
    print date
    Constdate = date.split("T")[0].split('-')[0] + ":" + date.split("T")[0].split('-')[1]
    if runOnce == True:
        tempDate = Constdate
        runOnce = False
    if tempDate == Constdate:
            tempWordsList = msg[0].split()
            errorCount = 1

        for word in tempWordsList:
            # if not word in avoidWords:
            # word = word.replace(u'.', u"")
            # word = word.replace(u'!', u"")
            # word = word.replace(u'?', u"")
            # word = word.replace(u'-', u"")
            # word = word.replace(u',', u"")
            # # word = word.replace(u'"', u"")
            # word = word.replace(u':', u"")
            # word = word.replace(u'(', u"")
            # word = word.replace(u')', u"")
            # word = word.replace(u'*', u"")

            if not word in avoidWords:
                 # print "accpted ", word

                     errorCount = 1
            # else:
            #     print "not accpted ", word
            # print tempDate
            JsonTempDate = tempDate
            tempDate = Constdate
            mostCommon = Counter(tempBuildString).most_common(10)
            printHeb = dict(mostCommon)
            sorted_x = sorted(printHeb.items(), key = operator.itemgetter(1), reverse=True)
            for key, value in sorted_x:
                  if wordIdDict.has_key(key):
                     obj.append({"Word" : key ,"Amount" : str(value), "array_id" : list(set(wordIdDict[key]))})
            del tempBuildString[:]
            del tempIdList[:]
            del tempWordsList[:]

# with open(perDateCountPath + '\\' + politicFileName + '.txt', 'w') as outfile:
with open(os.path.join(rootPath, perDateCountPath, politicFileName + '.txt') , 'w') as outfile:
                    json.dump(tempJson, outfile, indent=4)
del tempBuildString[:]
del tempIdList[:]
del tempWordsList[:]
print "finished counting " + politicFileName

def AllwordCounter(wordsList, politicFileName):

print "Counting Words"
print len(allPostsIDsDict)

for keyDict, valueDict in allPostsIDsDict.iteritems() :
    tempWordsList = valueDict[0].split()
    id = []
    for word in tempWordsList:
        if not word in avoidWords:
            # word = word.replace(u'.', u"")
            # word = word.replace(u'!', u"")
            # word = word.replace(u'?', u"")
            # word = word.replace(u'-', u"")
            # word = word.replace(u',', u"")
            # word = word.replace(u'"', u"")
            # word = word.replace(u':', u"")
            # word = word.replace(u'(', u"")
            # word = word.replace(u')', u"")
            # word = word.replace(u'*', u"")
            # word = word.replace(u'\'', u"")
            wordCountPerPolitics[word] += 1
            dateDictForThreeLetter[word] = valueDict[1]

politicsDictWords[politicFileName] = wordCountPerPolitics
politicsDictId[politicFileName] = wordIdPerPolitics

for politic, dictOfWords in politicsDictWords.iteritems() :
    print politic
    tempJson = {politicFileName:[],}
    for word, count in dictOfWords.iteritems() :
        # print word[1]
        tempJson[politicFileName].append({word : count})
        if wordIdPerPolitics.has_key(word):
            if dateDictForThreeLetter.has_key(word):
                splitAllWordsByThreeLetters(threeLetterList, word, count, wordIdPerPolitics[word], politic, threeLetterDict, wordIdPerPolitics[word], dateDictForThreeLetter[word])
printToJsonTempDict(threeLetterDict, politicFileName)
# with open(allWordsPath + '\\' + politicFileName + '.txt', 'w') as outfile:
with open(os.path.join(rootPath, allWordsPath,  politicFileName + '.txt'), 'w') as outfile:
     json.dump(tempJson, outfile, indent=4)
print "finished counting " + politicFileName

def CheckWordsAgainstThelist(wordsList, politicFileName):

countMilitaryWords = 0
listWordsMilitaryWords = []
countSahbekimWords = 0
listWordsSahbekimWords = []
countNacistWords = 0
listWordsNacistWords = []
countKalkalaWords = 0
listWordsKalkalaWords = []
countRahlanWords = 0
listWordsRahlanWords = []

print  "keys number in the tempDict in CheckWordsAgainstTheList :  " , len(tempdict.keys())

for keyDict, valueDict in tempdict.iteritems() :
    tempWordsList = valueDict.split()

    for checkWord in tempWordsList:
        if not checkWord in avoidWords:
            checkWord = checkWord.replace(u'.', u"")
            checkWord = checkWord.replace(u'!', u"")
            checkWord = checkWord.replace(u'?', u"")
            checkWord = checkWord.replace(u'-', u"")
            checkWord = checkWord.replace(u',', u"")
            checkWord = checkWord.replace(u'"', u"")
            checkWord = checkWord.replace(u':', u"")
            checkWord = checkWord.replace(u'(', u"")
            checkWord = checkWord.replace(u')', u"")
            checkWord = checkWord.replace(u'*', u"")

            if checkWord in militaryWords:
            if checkWord in sahbekimWords:
            if checkWord in narcistWords:
            if checkWord in kalkalaWords:
            if checkWord in politicByWordsGut:
                    if not checkWord in politicByWords:
                        # print checkWord, len(politicByWords)

politicDictMilitaryCount[politicFileName] = {"amount" : countMilitaryWords/float(politicDictWordsCount[politicFileName]), "wordsArray" : Counter(listWordsMilitaryWords).most_common(3)}
politicDictSahbekimCount[politicFileName] = {"amount" : countSahbekimWords/float(politicDictWordsCount[politicFileName]), "wordsArray" : Counter(listWordsSahbekimWords).most_common(3)}
politicDictNarcistCount[politicFileName] = {"amount" : countNacistWords/float(politicDictWordsCount[politicFileName]), "wordsArray" : Counter(listWordsNacistWords).most_common(3)}
politicDictKalkalaCount[politicFileName] = {"amount" : countKalkalaWords/float(politicDictWordsCount[politicFileName]), "wordsArray" : Counter(listWordsKalkalaWords).most_common(3)}
politicDictRahlanCount[politicFileName] =  {"amount" : countRahlanWords, "wordsArray" : Counter(listWordsRahlanWords).most_common(3)}
print politicFileName, countMilitaryWords, countMilitaryWords/float(politicDictWordsCount[politicFileName])

del tempWordsList[:]

del listWordsMilitaryWords[:]
del listWordsSahbekimWords[:]
del listWordsNacistWords[:]
del listWordsKalkalaWords[:]
del listWordsRahlanWords[:]

def splitAllWordsByThreeLetters(listOfThreeLetters, word, count, wordIds, folderName, tempDictJson, ids, date): if len(word) > 2 : try: if word[:3] in listOfThreeLetters: tempDictJson[word[:3]].append([{"Word" : word ,"Amount" : str(count),"date" : date.split('+')[0], "Array_id" : ids}]) except: print "Unexpected error:" , sys.exc_info()[0] else: tempDictJson["2"].append([{"Word" : word ,"Amount" : str(count),"date" : date.split('+')[0], "Array_id" : ids}])

def printToJsonTempDict(tempDictJson, folderName): for keyDict, valueDict in tempDictJson.iteritems() :

print keyDict , valueDict

    newpath = os.path.join(rootPath,threeLetterWordsPath,folderName)
    if not os.path.exists(newpath): os.makedirs(newpath)
    with open(os.path.join(rootPath, threeLetterWordsPath, folderName, keyDict + '.txt'), 'a') as outfile:
         json.dump(tempDictJson[keyDict], outfile, indent=4)

def sortAndPrintMilitaryCount():

sorted_politicDictMilitaryCount = sorted(politicDictMilitaryCount.items(), key=operator.itemgetter(1), reverse=True)

with open(os.path.join(rootPath,listOfCheckedWordsPath, "militaryCountRecords_1.txt"), 'a') as outfile:
     json.dump(sorted_politicDictMilitaryCount, outfile,indent=4)

tempPrintJson ={}

for index in range(3):
    tempPrintJson[sorted_politicDictMilitaryCount[index][0]] = sorted_politicDictMilitaryCount[index][1]

with open(os.path.join(rootPath,listOfCheckedWordsPath, "militaryCountRecords.txt"), 'a') as outfile:
     json.dump(sorted(tempPrintJson.items(), key=operator.itemgetter(1), reverse=True), outfile,indent=4)

def sortAndPrintSahbekimCount(): sorted_politicDictSahbekimCount = sorted(politicDictSahbekimCount.items(), key=operator.itemgetter(1), reverse=True)

with open(os.path.join(rootPath,listOfCheckedWordsPath,"SahbekimCountRecords_1.txt"), 'a') as outfile:
    json.dump(sorted_politicDictSahbekimCount, outfile,indent=4)

tempPrintJson ={}

for index in range(3):
    tempPrintJson[sorted_politicDictSahbekimCount[index][0]] = sorted_politicDictSahbekimCount[index][1]

with open(os.path.join(rootPath,listOfCheckedWordsPath,"SahbekimCountRecords.txt"), 'a') as outfile:
    json.dump(sorted(tempPrintJson.items(), key=operator.itemgetter(1), reverse=True), outfile,indent=4)

def sortAndPrintNarcistCount(): sorted_politicDictNarcistCount = sorted(politicDictNarcistCount.items(), key=operator.itemgetter(1), reverse=True)

tempPrintJson ={}

for index in range(3):
    tempPrintJson[sorted_politicDictNarcistCount[index][0]] = sorted_politicDictNarcistCount[index][1]

with open(os.path.join(rootPath,listOfCheckedWordsPath, "narcistCountRecords.txt"), 'a') as outfile:
     json.dump(sorted(tempPrintJson.items(), key=operator.itemgetter(1), reverse=True), outfile,indent=4)

def sortAndPrintKalkalaCount(): sorted_politicDictKalkalaCount = sorted(politicDictKalkalaCount.items(), key=operator.itemgetter(1), reverse=True)

with open(os.path.join(rootPath,listOfCheckedWordsPath,"kalkalaCountRecords_1.txt"), 'a') as outfile:
     json.dump(sorted_politicDictKalkalaCount, outfile,indent=4)

tempPrintJson ={}

for index in range(3):
    tempPrintJson[sorted_politicDictKalkalaCount[index][0]] = sorted_politicDictKalkalaCount[index][1]

with open(os.path.join(rootPath,listOfCheckedWordsPath, "kalkalaCountRecords.txt"), 'a') as outfile:
     json.dump(sorted(tempPrintJson.items(), key=operator.itemgetter(1), reverse=True), outfile,indent=4)

def sortAndPrintWordCount(): sorted_politicDictWordsCount = sorted(politicDictWordsCount.items(), key=operator.itemgetter(1), reverse=True)

tempPrintJson ={}

for index in range(3):
    tempPrintJson[sorted_politicDictWordsCount[index][0]] = sorted_politicDictWordsCount[index][1]
with open(os.path.join(rootPath,listOfCheckedWordsPath, "wordsCountRecords.txt"), 'a') as outfile:
      json.dump(sorted(tempPrintJson.items(), key=operator.itemgetter(1), reverse=True), outfile,indent=4)

def sortAndPrintRahlanCount(): sorted_politicDictRahlanCount = sorted(politicDictRahlanCount.items(), key=operator.itemgetter(1), reverse=True)

tempPrintJson ={}

for index in range(3):
    tempPrintJson[sorted_politicDictRahlanCount[index][0]] = sorted_politicDictRahlanCount[index][1]
with open(os.path.join(rootPath,listOfCheckedWordsPath, "rahlanCountRecords.txt"), 'a') as outfile:
      json.dump(sorted(tempPrintJson.items(), key=operator.itemgetter(1), reverse=True), outfile,indent=4)

print os.path.dirname(os.path.abspath(file)) + '\potilics.txt' politicByWords=[] politicCheckListArray = [] gutlist = [] politicByWordsGut = []

politicsArr = [line.strip() for line in open(os.path.join(os.path.dirname(os.path.abspath(file)), 'potilics.txt'))] threeLetterWords = [line.strip() for line in open(os.path.join(os.path.dirname(os.path.abspath(file)), 'ThreeLetterWords.txt'))]

f =, 'ThreeLetterWords.txt'), "r", "utf-8") p =, 'politicsName.txt'), "r", "utf-8")

for words in f.readlines(): testlist.append(words.split())

for word in testlist: for cut in word: threeLetterList.append(cut)

for words in p.readlines(): gutlist.append(words.split())

for word in gutlist: for cut in word:

print cut


for politic in politicsArr: url = "" + politic + "?fields=posts.limit(200)%7Bmessage%7D&access_token=1719315164960950%7CnKFpk2SebwixsCQS3y7zQDPA1Ow" response = urllib.urlopen(url) data = json.loads(

url2 = "" + politic + "?fields=name"
response2 = urllib.urlopen(url2)
data2 = json.loads(

try :
        for words in politicCheckListArray:
            for word in words:
                # print word, len(politicByWords)
        # errorCount=
        print "Unexpected error:" , sys.exc_info()[0]

    print data["posts"]["paging"]["next"]
    allPostsIDsDict ={}
    buildString = createBuffer_Posts(url)

    getTargetIds(data["posts"]["paging"]["next"], list_NextUrls)

    for url in list_NextUrls:
       buildString += createBuffer_Next(url)

    print "ALL ID's in the allPostIDsDict :  " , len(allPostsIDsDict.keys())

    politicDictWordsCount[politic] = len(buildString)
    AllwordCounter(buildString, politic)
    CheckWordsAgainstThelist(buildString, politic)

    del list_NextUrls[:]
    del buildString[:]
    print politic
    print "Unexpected error:" , sys.exc_info()[0]

sortAndPrintMilitaryCount() sortAndPrintSahbekimCount() sortAndPrintNarcistCount() sortAndPrintKalkalaCount() sortAndPrintWordCount() sortAndPrintRahlanCount()

print 'done'

nir-jackson commented 9 years ago

send them to me in a separate email..... for some reason i can't open them when you send them through github just send a new email to me with the scripts

nir-jackson commented 9 years ago

@akariv @mushon

hey, we've uploaded new scripts (,, potilics.txt, please delete all the current data and rerun the scripts (i need you to delete the old data because of unwanted data that needs to be removed from the server).

additional I've updated the the website code. so please re upload the files index.html, style_new_new.css, Visualization.js, Top3.js.

nir-jackson commented 9 years ago

fixed! :smiley: