Merge pull request #325 from anshrusia200/plagiarism-check

Added plagiarism checker script
This commit is contained in:
Bartick Maiti 2022-10-10 22:33:48 +05:30 committed by GitHub
commit 105e27af09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 0 deletions

View File

@ -0,0 +1,12 @@
# Plagiarism Checker
This is a simple Python script to check plagiarism between 2 files.
## Using the script
```bash
# install xlwt
pip install scikit-learn
# run script
python script.py
```

View File

@ -0,0 +1 @@
A plagiarism checker helps in preventing duplicacy in two files. This way authenticity of content is maintained.

View File

@ -0,0 +1 @@
A plagiarism checker helps in preventing duplicacy in two files. This way authenticity of content is maintained.

View File

@ -0,0 +1,36 @@
# Plagiarism detector using cosine similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def Plagiarism_Checker(files, student):
results = set()
# converting text from the text file and storing into an array
v = lambda Text: TfidfVectorizer().fit_transform(Text).toarray()
# comparing of two data from two text files
similarity = lambda doc1, doc2: cosine_similarity([doc1, doc2])
vectors = list(zip(files, v(student)))
for stud, text_vector_a in vectors:
n_vectors = vectors.copy()
i = n_vectors.index((stud, text_vector_a))
del n_vectors[i]
for stud2, vector2 in n_vectors:
# matching similairty score by comparing elements present
# in an array
sim_score = similarity(text_vector_a, vector2)[0][1]
stud_pair = sorted((stud, stud2))
match_per = (stud_pair[0], stud_pair[1],sim_score)
results.add(match_per)
#returns the score for matching between 2 files. percent match = score*100 %
return results
student_files = ["sample1.txt", "sample2.txt"]
student_notes = []
for file in student_files:
# opening the file present in the current directory
with open(file, "r") as f:
student_notes.append(f.read())
results = Plagiarism_Checker(student_files, student_notes)
for result in results:
print("Result: ", result)