TM10 Using Open AI#

import pandas as pd
from IPython.display import display
pd.options.display.float_format = '{:.4f}'.format

import warnings
warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
# import os
# os.environ["TOKENIZERS_PARALLELISM"] = "false"

import re
import zhon.hanzi
from collections import Counter
zh_pat = f"[{zhon.hanzi.characters}|{zhon.hanzi.punctuation}]"

with open("data/stopwords_zh-tw.txt", encoding="utf-8") as fin:
    stopwords = fin.read().split("\n")[1:]

Step 1. Import and clean data#

Notes that we don’t need tokenization and stop-word removal here. Just clean up space or punctuation.

import pandas as pd
df = pd.read_csv('data/sentiment.csv')

def clean_text(text):
    text = re.sub("[\r\t ]+", " ", text)
    text = "".join(re.findall(zh_pat, text))
    return text

df["cleaned"] = df["text"].apply(clean_text)

Using Open AI#

from openai import OpenAI
OPEN_API_KEY = "YOUR_API_KEY"
client = OpenAI(api_key = OPEN_API_KEY)

from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
) 

# @retry(wait=wait_random_exponential(min=3, max=10), stop=stop_after_attempt(3))
def ask_gpt(query):
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=[
            {"role": "system", "content": "以下留言是各類商店討論版中的評論,請協助分析,這些留言的評論對象是"},
            {"role": "system", "content": """
                A-旅館
                B-食品
                C-餐廳
                D-3C用品
                E-其他"""},
            {"role": "system", "content": "只要回答A|B|C|D|E即可,謝謝!你也可以照你的判斷新增新label!"},
            {"role": "user", "content": f"留言內容為「{query}」"},
            {"role": "assistant", "content": "你的答案是:"},
            ],
        # n=1,
        # stop=["\n"], 
        temperature=0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )
    return completion
    # return completion.choices[0].message.content.strip()
for i in range(10):
    comment = df["cleaned"][i]
    completion = ask_gpt(comment)
    response = completion.choices[0].message.content.strip()
    print(response, "\t", comment)

"""
D-3C用品 	 店家很給力,快遞也是相當快,第三次光顧啦
D-3C用品 	 這樣的配置用系統還是有點卡。指紋收集器。沒送原裝滑鼠還需要自己買,不太好。
A-旅館 	 不錯,在同等檔次酒店中應該是值得推薦的!
B-食品 	 哎!不會是蒙牛乾的吧嚴懲真凶!
D-3C用品 	 空尤其是三立電視臺女主播做的序尤其無趣像是硬湊那麼多字
D-3C用品 	 明明買了本書,只到了本,也沒有說是什麼原因,以後怎麼信的過???????????
E-其他 	 看了一下感覺還可以
D-3C用品 	 散熱還不錯,玩遊戲溫度和硬碟溫度都在以下,速度很好,顯示卡也不錯
D-3C用品 	 外觀好看,白色的自己貼紙也方便,執行起來速度也還不錯屬於主流配置了。一般用用可以的
D-3C用品 	 水超級小用的時候還要先修理一下花灑售後還說是水壓問題說本來標配都是這樣還要自己重新換一個
"""
---------------------------------------------------------------------------
AuthenticationError                       Traceback (most recent call last)
Cell In[4], line 3
      1 for i in range(10):
      2     comment = df["cleaned"][i]
----> 3     completion = ask_gpt(comment)
      4     response = completion.choices[0].message.content.strip()
      5     print(response, "\t", comment)

Cell In[3], line 13, in ask_gpt(query)
     12 def ask_gpt(query):
---> 13     completion = client.chat.completions.create(
     14         model="gpt-3.5-turbo-1106",
     15         messages=[
     16             {"role": "system", "content": "以下留言是各類商店討論版中的評論,請協助分析,這些留言的評論對象是"},
     17             {"role": "system", "content": """
     18                 A-旅館
     19                 B-食品
     20                 C-餐廳
     21                 D-3C用品
     22                 E-其他"""},
     23             {"role": "system", "content": "只要回答A|B|C|D|E即可,謝謝!你也可以照你的判斷新增新label!"},
     24             {"role": "user", "content": f"留言內容為「{query}」"},
     25             {"role": "assistant", "content": "你的答案是:"},
     26             ],
     27         # n=1,
     28         # stop=["\n"], 
     29         temperature=0,
     30         frequency_penalty=0.0,
     31         presence_penalty=0.0
     32     )
     33     return completion

File ~/anaconda3/lib/python3.10/site-packages/openai/_utils/_utils.py:274, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
    272             msg = f"Missing required argument: {quote(missing[0])}"
    273     raise TypeError(msg)
--> 274 return func(*args, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/openai/resources/chat/completions.py:704, in Completions.create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, n, parallel_tool_calls, presence_penalty, response_format, seed, service_tier, stop, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
    668 @required_args(["messages", "model"], ["messages", "model", "stream"])
    669 def create(
    670     self,
   (...)
    701     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
    702 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
    703     validate_response_format(response_format)
--> 704     return self._post(
    705         "/chat/completions",
    706         body=maybe_transform(
    707             {
    708                 "messages": messages,
    709                 "model": model,
    710                 "frequency_penalty": frequency_penalty,
    711                 "function_call": function_call,
    712                 "functions": functions,
    713                 "logit_bias": logit_bias,
    714                 "logprobs": logprobs,
    715                 "max_completion_tokens": max_completion_tokens,
    716                 "max_tokens": max_tokens,
    717                 "n": n,
    718                 "parallel_tool_calls": parallel_tool_calls,
    719                 "presence_penalty": presence_penalty,
    720                 "response_format": response_format,
    721                 "seed": seed,
    722                 "service_tier": service_tier,
    723                 "stop": stop,
    724                 "stream": stream,
    725                 "stream_options": stream_options,
    726                 "temperature": temperature,
    727                 "tool_choice": tool_choice,
    728                 "tools": tools,
    729                 "top_logprobs": top_logprobs,
    730                 "top_p": top_p,
    731                 "user": user,
    732             },
    733             completion_create_params.CompletionCreateParams,
    734         ),
    735         options=make_request_options(
    736             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
    737         ),
    738         cast_to=ChatCompletion,
    739         stream=stream or False,
    740         stream_cls=Stream[ChatCompletionChunk],
    741     )

File ~/anaconda3/lib/python3.10/site-packages/openai/_base_client.py:1270, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
   1256 def post(
   1257     self,
   1258     path: str,
   (...)
   1265     stream_cls: type[_StreamT] | None = None,
   1266 ) -> ResponseT | _StreamT:
   1267     opts = FinalRequestOptions.construct(
   1268         method="post", url=path, json_data=body, files=to_httpx_files(files), **options
   1269     )
-> 1270     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~/anaconda3/lib/python3.10/site-packages/openai/_base_client.py:947, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
    944 else:
    945     retries_taken = 0
--> 947 return self._request(
    948     cast_to=cast_to,
    949     options=options,
    950     stream=stream,
    951     stream_cls=stream_cls,
    952     retries_taken=retries_taken,
    953 )

File ~/anaconda3/lib/python3.10/site-packages/openai/_base_client.py:1051, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
   1048         err.response.read()
   1050     log.debug("Re-raising status error")
-> 1051     raise self._make_status_error_from_response(err.response) from None
   1053 return self._process_response(
   1054     cast_to=cast_to,
   1055     options=options,
   (...)
   1059     retries_taken=retries_taken,
   1060 )

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: YOUR_API_KEY. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}