#! /usr/bin/env ruby def load(file) File.read file end def corpus return @corpus end def train(text) @corpus = {} count = 0 n = 3 puts "Building n-grams (n = #{n}), this will take a while... " upper_limit = text.length - (n + 1) (0..upper_limit).each do |i| char_limit = i + (n - 1) n_gram = text[i..char_limit] if @corpus[n_gram].nil? @corpus[n_gram] = 1 else @corpus[n_gram] += 1 end count += 1 print "\rCompleted #{i} out of #{upper_limit} (#{"%.2f" % ((i * 1.0 / upper_limit) * 100)}%)" end print "\n\n" @corpus["count"] = count end def prob(word) # Laplacian smoothing (((corpus[word] || 0) + 1) * 1.0) / (corpus.length + corpus["count"]) end def format(problem) matrix = [] problem.split(/[\r\n]/).each do |line| line.gsub(/^\|/,'').split('|').each_with_index do |segment, i| matrix[i] = [] if matrix[i].nil? matrix[i] << segment end end matrix end def print_matrix(matrix) output = "" strip_length = matrix.first.length (0..(strip_length - 1)).each do |i| matrix.each_with_index do |strip, j| output << "|" if j == 0 output << strip[i] << "|" end output << "\n" end puts output end def score(strip_a, strip_b) s = nil strip_a.length.times do |i| p = prob(strip_a[i] + strip_b[i][0]) (s.nil?) ? s = p : s *= p end s end def max(collection, ref_item, &block) highest_item = nil highest_score = 0 collection.each do |item| next if ref_item == item score = block.call(item, ref_item) if score > highest_score highest_score = score highest_item = item end end [[ref_item, highest_item], highest_score] end def solve(m) # pick the 2 strips that fit best matrix = m.dup best_pair = [] best_score = 0 matrix.each do |strip_a| pair, score = max(matrix, strip_a) {|x, passed_in| score(passed_in, x) } if score > best_score best_score = score best_pair = pair end end solution = (score(best_pair.first, best_pair.last) > score(best_pair.last, best_pair.first)) ? best_pair.dup : best_pair.reverse.dup # remove them from the pool matrix.delete(best_pair.first) matrix.delete(best_pair.last) # then go through the pool and find the best match for the first strip and the best match for the last strip # repeat until the list is empty while matrix.length > 0 do start_length = matrix.length # pick whichever of the first and last matches is the best left_pair, left_score = max(matrix, solution.first) {|x, passed_in| score(x, passed_in) } right_pair, right_score = max(matrix, solution.last) {|x, passed_in| score(passed_in, x) } # put that strip in the list of final ordering if left_score > right_score used = left_pair.last solution = [used].concat solution else used = right_pair.last solution << used end # remove that strip from the matrix matrix.delete(used) finish_length = matrix.length end solution end if $0 == __FILE__ puts "Solving shredded message problem..." train(load("corpus.txt")) print_matrix(solve(format(load("input.txt")))) end