```python from transformers import AutoTokenizer, AutoModelForCausalLM device = "cuda" # the device to load the model onto # Now you do not need to add "trust_remote_code=True" TOKENIZER = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B") MODEL = AutoModelForCausalLM.from_pretrained("Qwen/CodeQwen1.5-7B", device_map="auto").eval() # tokenize the input into tokens input_text = """library-system library.py class Book: def __init__(self, title, author, isbn, copies): self.title = title self.author = author self.isbn = isbn self.copies = copies def __str__(self): return f"Title: {self.title}, Author: {self.author}, ISBN: {self.isbn}, Copies: {self.copies}" class Library: def __init__(self): self.books = [] def add_book(self, title, author, isbn, copies): book = Book(title, author, isbn, copies) self.books.append(book) def find_book(self, isbn): for book in self.books: if book.isbn == isbn: return book return None def list_books(self): return self.books student.py class Student: def __init__(self, name, id): self.name = name self.id = id self.borrowed_books = [] def borrow_book(self, book, library): if book and book.copies > 0: self.borrowed_books.append(book) book.copies -= 1 return True return False def return_book(self, book, library): if book in self.borrowed_books: self.borrowed_books.remove(book) book.copies += 1 return True return False main.py from library import Library from student import Student def main(): # Set up the library with some books library = Library() library.add_book("The Great Gatsby", "F. Scott Fitzgerald", "1234567890", 3) library.add_book("To Kill a Mockingbird", "Harper Lee", "1234567891", 2) # Set up a student student = Student("Alice", "S1") # Student borrows a book """ model_inputs = TOKENIZER([input_text], return_tensors="pt").to(device) # Use `max_new_tokens` to control the maximum output length. generated_ids = MODEL.generate(model_inputs.input_ids, max_new_tokens=1024, do_sample=False)[0] # The generated_ids include prompt_ids, so we only need to decode the tokens after prompt_ids. output_text = TOKENIZER.decode(generated_ids[len(model_inputs.input_ids[0]):], skip_special_tokens=True) print(f"Prompt: \n{input_text}\n\nGenerated text: \n{output_text}") ``` The expected output as following: ``` Generated text: book = library.find_book("1234567890") if student.borrow_book(book, library): print(f"{student.name} borrowed {book.title}") else: print(f"{student.name} could not borrow {book.title}") # Student returns a book if student.return_book(book, library): print(f"{student.name} returned {book.title}") else: print(f"{student.name} could not return {book.title}") # List all books in the library print("All books in the library:") for book in library.list_books(): print(book) if __name__ == "__main__": main() ```

Model	Size	HumanEval 0-shot	HumanEval+ 0-shot	MBPP 0-shot	MBPP+ 0-shot	MBPP 3-shot
Base Model
CodeLlama-Base	7B	33.5	25.6	52.1	41.6	38.6
StarCoder2	7B	35.4	29.9	54.4	45.6	51.0
DeepSeek-Coder-Base	6.7B	47.6	39.6	70.2	56.6	60.6
CodeQwen1.5	7B	51.8	45.7	72.2	60.2	61.8
Chat Model
GPT-3.5-Turbo	-	76.8	70.7	82.5	69.7	70.8
GPT-4-Turbo (Nov 2023)	-	85.4	81.7	83.5	70.7	80.0
DeepSeek-Coder-Instruct	6.7B	73.8	70.1	73.2	63.4	65.4
CodeQwen1.5-Chat	7B	83.5	78.7	77.7	67.2	70.6

Model	Size	Code Generation All Time Pass@1	Code Generation 2023/9/1 ~ 2024/4/1 Pass@1
Base Model
CodeLlama-Base	7B	6.5	7.6
StarCoder2	7B	11.3	12.7
DeepSeek-Coder-Base	6.7B	19.1	13.7
CodeQwen1.5	7B	21.8	19.3
Chat Model
CodeLlama-Instruct	7B	10.6	12.4
DeepSeek-Coder-Instruct	6.7B	21.6	19.2
CodeQwen1.5-Chat	7B	25.0	23.2

Model	Size	Python	C++	Java	PHP	TS	C#	Bash	JS	Avg
Base Model
CodeLlama-Base	7B	31.7	29.8	34.2	23.6	36.5	36.7	12.0	29.2	29.2
StarCoder2-Base	7B	35.3	40.9	37.3	29.2	37.7	40.5	9.4	36.0	33.3
DeepSeek-Coder-Base	6.7B	49.4	50.3	43.0	38.5	49.7	50.0	28.5	48.4	44.7
CodeQwen1.5	7B	52.4	52.2	42.4	46.6	52.2	55.7	36.7	49.7	48.5
Chat Model
GPT-3.5-Turbo	-	76.2	63.4	69.2	60.9	69.1	70.8	42.4	67.1	64.9
GPT-4	-	84.1	76.4	81.6	77.2	77.4	79.1	58.2	78.0	76.5
DeepSeek-Coder-Instruct	6.7B	78.6	63.4	68.4	68.9	67.2	72.8	36.7	72.7	66.1
CodeQwen1.5-Chat	7B	83.2	71.2	70.1	73.5	75.4	75.9	41.1	78.2	71.1

Model	Size	Spider Execution Accuracy Dev Set	Bird Execution Accuracy Dev Set
GPT-3.5-Turbo	-	70.1	37.2
GPT-4	-	85.3	50.7
CodeLlama-Instruct	7B	59.5	22.4
DeepSeek-Coder-Instruct	6.7B	70.1	39.4
CodeQwen1.5-Chat	7B	77.9	42.0