kenoboss / TextSummarization

0 stars 2 forks source link

Materialsuche #1

Closed kenoboss closed 6 years ago

kenoboss commented 7 years ago
kenoboss commented 7 years ago
import csv

# change path to file
corpus_path = "/home/kenobi/Repos/GitHub/TextSummarization/data/"
corpus_file = "news_summary.csv"
corpus_complete_path = corpus_path + corpus_file

def get_lines (corpus_complete_path):
    target = open(corpus_complete_path, newline='', encoding="windows-1250")
    lines = csv.reader(target, delimiter=',', quotechar='"')
    result = []
    for line in lines:
        result.append(line)
    target.close()
    return result

def clear_csv_file():
    result = []
    lines = get_lines(corpus_complete_path)
    index = 0
    for line in lines:
        if index > 0:
            line = clear_line(line)
            result.append(line)
        index += 1
    return result

def clear_line(line):
    result = []
    for entry in line:
        tmp = clear_string(entry)
        result.append(tmp)
    return result

def clear_string (string):
    string = string.strip()
    string = string.strip("\"")
    string = string.strip("\'")
    string = string.replace ("\"", "\'")
    string = string.replace("\r\n", "")

    string = "\"" + string + "\""
    return  string

def line_to_s (line):
    result = ""
    for entry in line:
        result = result + entry + ";"
    result = result + "\n"
    return result

def create_clear_file():
    lines = clear_csv_file()
    filepath = corpus_path + "summary_news_clear.csv"
    target = open(filepath, "w")

    headline = ["author", "date", "headlines", "url", "summary", "text"]
    target.write(line_to_s(headline))
    for line in lines:
        target.write(line_to_s(line))
    target.close()

def main ():
    create_clear_file()

main()
kenoboss commented 7 years ago

Hier ein Skript zum Aufbereiten von der Datei news_summary Pfad zur Datei muss lediglich angepasst werden