TM10 Using Open AI#
import pandas as pd
from IPython.display import display
pd.options.display.float_format = '{:.4f}'.format
import warnings
warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
# import os
# os.environ["TOKENIZERS_PARALLELISM"] = "false"
import re
import zhon.hanzi
from collections import Counter
zh_pat = f"[{zhon.hanzi.characters}|{zhon.hanzi.punctuation}]"
with open("data/stopwords_zh-tw.txt", encoding="utf-8") as fin:
stopwords = fin.read().split("\n")[1:]
Step 1. Import and clean data#
Notes that we don’t need tokenization and stop-word removal here. Just clean up space or punctuation.
import pandas as pd
df = pd.read_csv('data/sentiment.csv')
def clean_text(text):
text = re.sub("[\r\t ]+", " ", text)
text = "".join(re.findall(zh_pat, text))
return text
df["cleaned"] = df["text"].apply(clean_text)
Using Open AI#
from openai import OpenAI
OPEN_API_KEY = "YOUR_API_KEY"
client = OpenAI(api_key = OPEN_API_KEY)
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
# @retry(wait=wait_random_exponential(min=3, max=10), stop=stop_after_attempt(3))
def ask_gpt(query):
completion = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
messages=[
{"role": "system", "content": "以下留言是各類商店討論版中的評論,請協助分析,這些留言的評論對象是"},
{"role": "system", "content": """
A-旅館
B-食品
C-餐廳
D-3C用品
E-其他"""},
{"role": "system", "content": "只要回答A|B|C|D|E即可,謝謝!你也可以照你的判斷新增新label!"},
{"role": "user", "content": f"留言內容為「{query}」"},
{"role": "assistant", "content": "你的答案是:"},
],
# n=1,
# stop=["\n"],
temperature=0,
frequency_penalty=0.0,
presence_penalty=0.0
)
return completion
# return completion.choices[0].message.content.strip()
for i in range(10):
comment = df["cleaned"][i]
completion = ask_gpt(comment)
response = completion.choices[0].message.content.strip()
print(response, "\t", comment)
"""
D-3C用品 店家很給力,快遞也是相當快,第三次光顧啦
D-3C用品 這樣的配置用系統還是有點卡。指紋收集器。沒送原裝滑鼠還需要自己買,不太好。
A-旅館 不錯,在同等檔次酒店中應該是值得推薦的!
B-食品 哎!不會是蒙牛乾的吧嚴懲真凶!
D-3C用品 空尤其是三立電視臺女主播做的序尤其無趣像是硬湊那麼多字
D-3C用品 明明買了本書,只到了本,也沒有說是什麼原因,以後怎麼信的過???????????
E-其他 看了一下感覺還可以
D-3C用品 散熱還不錯,玩遊戲溫度和硬碟溫度都在以下,速度很好,顯示卡也不錯
D-3C用品 外觀好看,白色的自己貼紙也方便,執行起來速度也還不錯屬於主流配置了。一般用用可以的
D-3C用品 水超級小用的時候還要先修理一下花灑售後還說是水壓問題說本來標配都是這樣還要自己重新換一個
"""
---------------------------------------------------------------------------
AuthenticationError Traceback (most recent call last)
Cell In[4], line 3
1 for i in range(10):
2 comment = df["cleaned"][i]
----> 3 completion = ask_gpt(comment)
4 response = completion.choices[0].message.content.strip()
5 print(response, "\t", comment)
Cell In[3], line 13, in ask_gpt(query)
12 def ask_gpt(query):
---> 13 completion = client.chat.completions.create(
14 model="gpt-3.5-turbo-1106",
15 messages=[
16 {"role": "system", "content": "以下留言是各類商店討論版中的評論,請協助分析,這些留言的評論對象是"},
17 {"role": "system", "content": """
18 A-旅館
19 B-食品
20 C-餐廳
21 D-3C用品
22 E-其他"""},
23 {"role": "system", "content": "只要回答A|B|C|D|E即可,謝謝!你也可以照你的判斷新增新label!"},
24 {"role": "user", "content": f"留言內容為「{query}」"},
25 {"role": "assistant", "content": "你的答案是:"},
26 ],
27 # n=1,
28 # stop=["\n"],
29 temperature=0,
30 frequency_penalty=0.0,
31 presence_penalty=0.0
32 )
33 return completion
File ~/anaconda3/lib/python3.10/site-packages/openai/_utils/_utils.py:274, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
272 msg = f"Missing required argument: {quote(missing[0])}"
273 raise TypeError(msg)
--> 274 return func(*args, **kwargs)
File ~/anaconda3/lib/python3.10/site-packages/openai/resources/chat/completions.py:704, in Completions.create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, n, parallel_tool_calls, presence_penalty, response_format, seed, service_tier, stop, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
668 @required_args(["messages", "model"], ["messages", "model", "stream"])
669 def create(
670 self,
(...)
701 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
702 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
703 validate_response_format(response_format)
--> 704 return self._post(
705 "/chat/completions",
706 body=maybe_transform(
707 {
708 "messages": messages,
709 "model": model,
710 "frequency_penalty": frequency_penalty,
711 "function_call": function_call,
712 "functions": functions,
713 "logit_bias": logit_bias,
714 "logprobs": logprobs,
715 "max_completion_tokens": max_completion_tokens,
716 "max_tokens": max_tokens,
717 "n": n,
718 "parallel_tool_calls": parallel_tool_calls,
719 "presence_penalty": presence_penalty,
720 "response_format": response_format,
721 "seed": seed,
722 "service_tier": service_tier,
723 "stop": stop,
724 "stream": stream,
725 "stream_options": stream_options,
726 "temperature": temperature,
727 "tool_choice": tool_choice,
728 "tools": tools,
729 "top_logprobs": top_logprobs,
730 "top_p": top_p,
731 "user": user,
732 },
733 completion_create_params.CompletionCreateParams,
734 ),
735 options=make_request_options(
736 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
737 ),
738 cast_to=ChatCompletion,
739 stream=stream or False,
740 stream_cls=Stream[ChatCompletionChunk],
741 )
File ~/anaconda3/lib/python3.10/site-packages/openai/_base_client.py:1270, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1256 def post(
1257 self,
1258 path: str,
(...)
1265 stream_cls: type[_StreamT] | None = None,
1266 ) -> ResponseT | _StreamT:
1267 opts = FinalRequestOptions.construct(
1268 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1269 )
-> 1270 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
File ~/anaconda3/lib/python3.10/site-packages/openai/_base_client.py:947, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
944 else:
945 retries_taken = 0
--> 947 return self._request(
948 cast_to=cast_to,
949 options=options,
950 stream=stream,
951 stream_cls=stream_cls,
952 retries_taken=retries_taken,
953 )
File ~/anaconda3/lib/python3.10/site-packages/openai/_base_client.py:1051, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1048 err.response.read()
1050 log.debug("Re-raising status error")
-> 1051 raise self._make_status_error_from_response(err.response) from None
1053 return self._process_response(
1054 cast_to=cast_to,
1055 options=options,
(...)
1059 retries_taken=retries_taken,
1060 )
AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: YOUR_API_KEY. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}