Please and one can help me in this when i try to debug the web-app after i applied the machine learning algorithms (BagOfwords vectorization& classifiers) i have stragguled, and the error was like following:
File "c:\Users\LENOVO\Desktop\webapp\app2.py", line 42, in predict
tweet_vect = words.transform([tweet]).toarray()
AttributeError: 'numpy.ndarray' object has no attribute 'transform'
127.0.0.1 - - [14/Apr/2023 11:29:54] "POST / HTTP/1.1" 500
So my codes are as follow , ,model.py :
df = pd.read_csv ('C:\\Users\\LENOVO\\Desktop\\webapp\\df.csv')
df.head()
from nltk.stem import WordNetLemmatizer
lemma= WordNetLemmatizer()
stop = stopwords.words('English')
def clean_tweets(text):
text = text.lower()
words= nltk.word_tokenize(text)
# Lemmatization is another technique used to reduce inflected words to their root word. It describes the algorithmic process of identifying an inflected word's “lemma” (dictionary form) based on its intended meaning.
words = ' '.join ([lemma.lemmatize(word) for word in words
if word not in (stop)])
text=''.join(words)
# removing non-alphabit characters
text = re.sub('[^a-z]',' ',text)
return text
# create a corpus:
from nltk.stem import PorterStemmer
df.head(10)
corpus = []
pstem = PorterStemmer()
for i in range(df['cleaned_tweets'].shape[0]):
tweet = re.sub("[^a-zA-Z]", ' ', df['cleaned_tweets'][i])
#Transform words to lowercase
tweet = tweet.split()
#Remove stopwords then Stemming it
tweet = [pstem.stem(word) for word in tweet if not word in set(stopwords.words('english'))]
tweet = ' '.join(tweet)
#Append cleaned tweet to corpus
corpus.append(tweet)
df = df.drop(['text'],axis=1)
print("Corpus created successfully")
print(pd.DataFrame(corpus)[0].head(10))
rawTexData = df["cleaned_tweets"].head(10)
cleanTexData = pd.DataFrame(corpus, columns=['corpus'])
frames = [rawTexData, cleanTexData]
result = pd.concat(frames, axis=1, sort=False)
result
#Create our dictionary
uniqueWordFrequents = {}
for tweet in corpus:
for word in tweet.split():
if(word in uniqueWordFrequents.keys()):
uniqueWordFrequents[word] += 1
else:
uniqueWordFrequents[word] = 1
#Convert dictionary to dataFrame
uniqueWordFrequents = pd.DataFrame.from_dict(uniqueWordFrequents,orient='index',columns=['Word Frequent'])
uniqueWordFrequents.sort_values(by=['Word Frequent'], inplace=True, ascending=False)
uniqueWordFrequents.head(10)
uniqueWordFrequents['Word Frequent'].unique()
uniqueWordFrequents = uniqueWordFrequents[uniqueWordFrequents['Word Frequent'] >= 20]
print(uniqueWordFrequents.shape)
from sklearn.feature_extraction.text import CountVectorizer
counVec = CountVectorizer(max_features = uniqueWordFrequents.shape[0])
bagOfWords = counVec.fit_transform(corpus).toarray()
b = bagOfWords
y = df['target']
print("b shape = ",b.shape)
print("y shape = ",y.shape)
b_train , b_test , y_train , y_test = train_test_split(b,y,test_size=0.20, random_state=55, shuffle =True)
print('data splitting successfully')
multinomialNBModel = MultinomialNB(alpha=0.1)
multinomialNBModel.fit(b_train,y_train)
print("multinomialNB model run successfully")
passModel=PassiveAggressiveClassifier()
passModel.fit(b_train,y_train)
print ('Passive Regressive model run successfully')
modelsNames = [('multinomialNBModel',multinomialNBModel),
('PassiveAggressiveClassifier',passModel)]
from sklearn.ensemble import VotingClassifier
votingClassifier = VotingClassifier(voting = 'hard',estimators= modelsNames)
votingClassifier.fit(b_train,y_train)
print("votingClassifier model run successfully")
models = [multinomialNBModel, passModel, votingClassifier]
for model in models:
print(type(model).__name__,' Train Score is : ' ,model.score(b_train, y_train))
print(type(model).__name__,' Test Score is : ' ,model.score(b_test, y_test))
y_pred = model.predict(b_test)
print(type(model).__name__,' F1 Score is : ' ,f1_score(y_test,y_pred))
bagOfWords
import pickle
pickle.dump(votingClassifier, open("model.pkl", "wb"))
pickle.dump(bagOfWords, open("words.pkl", "wb"))
app2.py:
from flask import Flask
from flask import render_template
from flask import request
from flask import jsonify
from flask import redirect
from flask import url_for
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import numpy as np
import joblib
nltk.download('stopwords')
import pickle
app = Flask(__name__)
ps = PorterStemmer()
model = pickle.load(open('model.pkl', 'rb'))
words = pickle.load(open('words.pkl', 'rb'))
# Build functionalities
@app.route('/', methods=['GET'])
def home():
return render_template('index2.html')
def predict(text):
tweet = re.sub('[^a-zA-Z]', ' ', text)
tweet = tweet.lower()
tweet = tweet.split()
tweet = [ps.stem(word) for word in tweet if not word in stopwords.words('english')]
tweet = ' '.join(tweet)
tweet_vect = words.transform([tweet]).toarray()
prediction = 'FAKE' if model.predict(tweet_vect) == 0 else 'REAL'
return prediction
@app.route('/', methods=['POST'])
def webapp():
text = request.form['text']
prediction = predict(text)
return render_template('index2.html', text=text, result=prediction)
@app.route('/predict/', methods=['GET','POST'])
def api():
text = request.args.get('text')
prediction = predict(text)
return jsonify(prediction=prediction)
if __name__ == "__main__":
app.run()
pls any help