I did this for WK, but since I already have the code I figure I could run it against Bunpro’s item count too.
Method
The accuracy interval 100-51% is evenly sampled at 50 points. For each of these 50 different accuracies 100 simulations are run where all items are reviewed, with a probability of success equivalent to the point’s respective accuracy level, until they all reach SRS level 12. In each simulation the total number of reviews to burn all items is tracked, and the average over the 100 simulations is recorded.
Source Code
import random
import time
class Item:
"""Class for kanji and vocab items to be reviewed. Only attribute is SRS level."""
def __init__(self, multiplicity, true_accuracy):
"""Create a new item."""
self._SRS_level = 0 # Don't count lessons
self._multiplicity = multiplicity # Indicates how many "cards" you have per "note", to use Anki terminology
self._true_accuracy = true_accuracy # If we're using the per-item accuracy instead of per-review
def review_item(self, p):
"""Evaluates based on probability whether the item passes or fails a review."""
# p**self._multiplicity is used here becuase a user has to pass both the meaning and reading review when multiplicity is 2.
p_observed = random.random()
p_pass = p**self._multiplicity
if self._true_accuracy:
p_pass = p
if p_observed < p_pass:
self._SRS_level += 1 # If review is successful item goes up one SRS level
review_count = self._multiplicity # if it's a radical we did one review, else 2
else:
if p_observed < p: # If this is true then we failed one review and passed one
review_count = 3
else:
review_count = 2*self._multiplicity
if self._SRS_level != 0: # Don't change SRS level is item is already at the lowest
#if self._SRS_level <= 4: # If item is an apprentice then reduce SRS by 1 level
# self._SRS_level -= 1
#else: # Else 2 levels
# self._SRS_level -= 2
self._SRS_level -= 1
review_count = 1
return self._SRS_level, review_count
def create_items(count, double, true_accuracy):
"""Creates a hash of items to review."""
items = {}
for i in range(1, double + 1):
items[i] = Item(2, true_accuracy)
for i in range(double + 1, count + 1):
items[i] = Item(1, true_accuracy)
return items
def review_items(count, items, max_srs, p):
"""Reviews all items until they reach the final SRS level."""
reviews = 0
while count > 0:
keys = [key for key in items]
# Review all items once
for i in keys:
srs_level, review = items[i].review_item(p)
# If item reaches last SRS level remove it from the queue
if srs_level == max_srs:
del items[i]
count -= 1
reviews += review
return reviews
def repeat_run(runs, single, double, max_srs, p, true_accuracy):
"""Repeats the same simulation a number of times and returns the average."""
total_reviews = 0
for i in range(1, runs+1):
count = single + double
# Create new items
items = create_items(count, double, true_accuracy)
# Review items until all reach the last SRS level
reviews = review_items(count, items, max_srs, p)
total_reviews += reviews
return total_reviews
def add_to_table(accuracy, total_reviews, runs):
"""Adds data to the discourse table we're making."""
data = "| " + str(accuracy) + " | " + str("{:,}".format(round(total_reviews / runs))) + " "
if (accuracy-1) % 5 == 0 and accuracy:
data += "\n"
else:
data += "| \| "
return data
def parse_time(seconds):
"""Makes sense of seconds."""
minutes = 0
hours = 0
if seconds > 60:
minutes = seconds//60
seconds %= 60
if minutes > 60:
hours = minutes//60
minutes %= 60
return hours, minutes, seconds
def simulate(highest_accuracy, interval_length, lowest_accuracy, runs, number_of_single_items, number_of_double_items, total_estimate, max_srs, true_accuracy, estimate=False):
"""Starts the whole simulation."""
# do stuff
table_data = "| % | Reviews | \| | % | Reviews | \| | % | Reviews | \| | % | Reviews | \| | % | Reviews |\n|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-\n"
reviews_done = 0 # Will contain number of reviews done
accuracy = highest_accuracy # Rename variable
t0 = time.time() # Start time
while accuracy >= lowest_accuracy:
p = accuracy / 100 # Accuracy as a probability
# Redo the simulation a number of times for the current accuracy level
total_reviews = repeat_run(runs, number_of_single_items, number_of_double_items, max_srs, p, true_accuracy)
reviews_done += total_reviews
# Add to the table
table_data += add_to_table(accuracy, total_reviews, runs)
# Print stuff
if not estimate:
time_elapsed = time.time()-t0
seconds = round(total_estimate / reviews_done * time_elapsed - time_elapsed)
hours, minutes, seconds = parse_time(seconds)
if total_estimate is False:
progress = ""
time_left = ""
else:
progress = str(round(reviews_done / total_estimate * 100)) + '%'
time_left = str(hours) + "h " + str(minutes) + "m and " + str(seconds) + "s remaining."
print(accuracy, '-', 'Average Reviews:', "{:,}".format(total_reviews//runs), '-', progress, '-', time_left)
# Go to new accuracy level
accuracy -= interval_length
return table_data, reviews_done
def main():
# Settings
highest_accuracy = 100 # Percent
interval_length = 1 # Percent
lowest_accuracy = 41 # Percent
#runs = 1 # Per level of accuracy
runs = 100 # Per level of accuracy
max_srs = 12 # This is for Bunpro
number_of_double_items = 0 # This is for Bunpro
number_of_single_items = 514 # This is for Bunpro
true_accuracy = True # Use per-item accuracy rather than per-review
# Estimate how many total reviews we can expect by doing one run
# The estimate is used to calculate time left
if runs >= 4:
table_data, reviews_done = simulate(highest_accuracy, interval_length, lowest_accuracy, 1, number_of_single_items, number_of_double_items, 0, max_srs, true_accuracy, estimate=True)
total_estimate = reviews_done*runs
else:
total_estimate = False
# Simulate
table_data, reviews_done = simulate(highest_accuracy, interval_length, lowest_accuracy, runs, number_of_single_items, number_of_double_items, total_estimate, max_srs, true_accuracy)
print('Total reviews:', "{:,}".format(reviews_done))
print(table_data)
main()
Assumptions
- There are 514 items in total.
- There are 12 SRS levels.
- An item either goes up one SRS level, or down one SRS level (unless it’s at SRS 0), upon review.
- After an item reaches SRS level 12 it will no longer be reviewed.
Results
These are the results after a total of 324,640,297 simulated reviews.
The percentage columns indicate the accuracy level. The review columns indicate the average number of reviews needed to burn everything, given the adjacent average accuracy.
% | Reviews | | | % | Reviews | | | % | Reviews | | | % | Reviews | | | % | Reviews |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
100 | 5,654 | | | 99 | 5,858 | | | 98 | 6,078 | | | 97 | 6,306 | | | 96 | 6,550 |
95 | 6,798 | | | 94 | 7,065 | | | 93 | 7,357 | | | 92 | 7,677 | | | 91 | 7,998 |
90 | 8,341 | | | 89 | 8,698 | | | 88 | 9,116 | | | 87 | 9,575 | | | 86 | 10,016 |
85 | 10,536 | | | 84 | 11,063 | | | 83 | 11,666 | | | 82 | 12,305 | | | 81 | 13,065 |
80 | 13,840 | | | 79 | 14,676 | | | 78 | 15,601 | | | 77 | 16,724 | | | 76 | 17,933 |
75 | 19,126 | | | 74 | 20,646 | | | 73 | 22,310 | | | 72 | 24,105 | | | 71 | 26,348 |
70 | 29,088 | | | 69 | 31,885 | | | 68 | 34,983 | | | 67 | 39,007 | | | 66 | 43,493 |
65 | 48,969 | | | 64 | 55,122 | | | 63 | 62,609 | | | 62 | 71,362 | | | 61 | 81,905 |
60 | 95,924 | | | 59 | 110,817 | | | 58 | 131,157 | | | 57 | 155,629 | | | 56 | 183,226 |
55 | 222,752 | | | 54 | 268,194 | | | 53 | 323,623 | | | 52 | 398,613 | | | 51 | 495,017 |
Conclusion
I calculated that I have done 1,975 reviews so far, at an average of 91%. If I keep the same average I can read from the table that I will need to do approximately 7,998 reviews, which means I’m about 25% the way to get all the items to the final SRS level.