Monday, April 19, 2010

The thirteenth Project Euler problem is:

Find the first ten digits of the sum of one-hundred 50-digit numbers.

I used F#'s built in BigInt data type. The array of 50 digit numbers looks like this:

let numbers = [|
    37107287533902102798797998220837590246510135740250I;
    46376937677490009712648124896970078050417018260538I;
    74324986199524741059474233309513058123726617309629I;
    91942213363574161572522430563301811072406154908250I;
    23067588207539346171171980310421047513778063246676I;
    89261670696623633820136378418383684178734361726757I;
    28112879812849979408065481931592621691275889832738I;
    44274228917432520321923589422876796487670272189318I;
    47451445736001306439091167216856844588711603153276I;
    70386486105843025439939619828917593665686757934951I;
    62176457141856560629502157223196586755079324193331I;
    64906352462741904929101432445813822663347944758178I;
    92575867718337217661963751590579239728245598838407I;
    58203565325359399008402633568948830189458628227828I;
    80181199384826282014278194139940567587151170094390I;
    35398664372827112653829987240784473053190104293586I;
    86515506006295864861532075273371959191420517255829I;
    71693888707715466499115593487603532921714970056938I;
    54370070576826684624621495650076471787294438377604I;
    53282654108756828443191190634694037855217779295145I;
    36123272525000296071075082563815656710885258350721I;
    45876576172410976447339110607218265236877223636045I;
    17423706905851860660448207621209813287860733969412I;
    81142660418086830619328460811191061556940512689692I;
    51934325451728388641918047049293215058642563049483I;
    62467221648435076201727918039944693004732956340691I;
    15732444386908125794514089057706229429197107928209I;
    55037687525678773091862540744969844508330393682126I;
    18336384825330154686196124348767681297534375946515I;
    80386287592878490201521685554828717201219257766954I;
    78182833757993103614740356856449095527097864797581I;
    16726320100436897842553539920931837441497806860984I;
    48403098129077791799088218795327364475675590848030I;
    87086987551392711854517078544161852424320693150332I;
    59959406895756536782107074926966537676326235447210I;
    69793950679652694742597709739166693763042633987085I;
    41052684708299085211399427365734116182760315001271I;
    65378607361501080857009149939512557028198746004375I;
    35829035317434717326932123578154982629742552737307I;
    94953759765105305946966067683156574377167401875275I;
    88902802571733229619176668713819931811048770190271I;
    25267680276078003013678680992525463401061632866526I;
    36270218540497705585629946580636237993140746255962I;
    24074486908231174977792365466257246923322810917141I;
    91430288197103288597806669760892938638285025333403I;
    34413065578016127815921815005561868836468420090470I;
    23053081172816430487623791969842487255036638784583I;
    11487696932154902810424020138335124462181441773470I;
    63783299490636259666498587618221225225512486764533I;
    67720186971698544312419572409913959008952310058822I;
    95548255300263520781532296796249481641953868218774I;
    76085327132285723110424803456124867697064507995236I;
    37774242535411291684276865538926205024910326572967I;
    23701913275725675285653248258265463092207058596522I;
    29798860272258331913126375147341994889534765745501I;
    18495701454879288984856827726077713721403798879715I;
    38298203783031473527721580348144513491373226651381I;
    34829543829199918180278916522431027392251122869539I;
    40957953066405232632538044100059654939159879593635I;
    29746152185502371307642255121183693803580388584903I;
    41698116222072977186158236678424689157993532961922I;
    62467957194401269043877107275048102390895523597457I;
    23189706772547915061505504953922979530901129967519I;
    86188088225875314529584099251203829009407770775672I;
    11306739708304724483816533873502340845647058077308I;
    82959174767140363198008187129011875491310547126581I;
    97623331044818386269515456334926366572897563400500I;
    42846280183517070527831839425882145521227251250327I;
    55121603546981200581762165212827652751691296897789I;
    32238195734329339946437501907836945765883352399886I;
    75506164965184775180738168837861091527357929701337I;
    62177842752192623401942399639168044983993173312731I;
    32924185707147349566916674687634660915035914677504I;
    99518671430235219628894890102423325116913619626622I;
    73267460800591547471830798392868535206946944540724I;
    76841822524674417161514036427982273348055556214818I;
    97142617910342598647204516893989422179826088076852I;
    87783646182799346313767754307809363333018982642090I;
    10848802521674670883215120185883543223812876952786I;
    71329612474782464538636993009049310363619763878039I;
    62184073572399794223406235393808339651327408011116I;
    66627891981488087797941876876144230030984490851411I;
    60661826293682836764744779239180335110989069790714I;
    85786944089552990653640447425576083659976645795096I;
    66024396409905389607120198219976047599490197230297I;
    64913982680032973156037120041377903785566085089252I;
    16730939319872750275468906903707539413042652315011I;
    94809377245048795150954100921645863754710598436791I;
    78639167021187492431995700641917969777599028300699I;
    15368713711936614952811305876380278410754449733078I;
    40789923115535562561142322423255033685442488917353I;
    44889911501440648020369068063960672322193204149535I;
    41503128880339536053299340368006977710650566631954I;
    81234880673210146739058568557934581403627822703280I;
    82616570773948327592232845941706525094512325230608I;
    22918802058777319719839450180888072429661980811197I;
    77158542502016545090413245809786882778948721859617I;
    72107838435069186155435662884062257473692284509516I;
    20849603980134001723930671666823555245252804609722I;
    53503534226472524250874054075591789781264330331690I;
    |]

Once we have the numbers in an array it is easy to add them up:

numbers |> Array.sum |> printfn "total = %A"

So easy it feels like cheating. Formatting the data was the most time consuming part of solving this problem.
In the previous post I talked about computing the divisor count of an integer given its prime factorisation. I also said I didn't have to prove the formula. However, I find I just can't leave it alone. Why does that formula work? I will sketch out a brief rationale (that falls far short of a formal proof).

I think it is pretty clear that any number that divides into an integer has to be made up from some mixture of the integer's prime factors. In other words, if a prime p is not a factor of an integer n then p will not divide evenly into n. Also, if p is a factor of n and it divides into n α times then pα+1 will not divide into n.

So we can build all divisors of n by multiplying all of n's prime factors and varying their exponent between 0 and allowed exponent for that factor. That is, for example, if n = pα then the divisors of n are {p0, p1, ... pα}. This set contains α+1 elements.

If n has multiple prime factors then you can multiply them all together with all acceptable permutations of exponents to create the divisor set. In the field of combinatorics this is selection with replacement (pdf). If you have ω(n) positions to fill and each position has αi+1 possible values then the total number of combinations is the product of the possible values. That is:


I feel much better now.

Friday, April 16, 2010

I think I'll post my answer to Problem 12 of Project Euler next:

What is the value of the first triangle number to have over five hundred divisors?

What exactly is a triangle number? It is a number given by this formula:


For this problem we actually want to generate a sequence of triangular numbers. It is most efficient to generate the nth number by adding n to the (n-1)th number. We can capture that in F# as follows:

let triangles =
    let rec inner m n =
        seq {
            let p = m + n
            yield p
            yield! inner p (n+1L)
        }
    seq {
        yield! inner 0L 1L
    }

The next question is how do we find the number of divisors of the number n? The brute force way would be to try all values up to n/2 and find the ones which divide evenly into the target number. I figured there had to be a better way. A little bit of research on the web showed me that if you know the prime factorization for a number then you can easily compute the number of divisors. Suppose that the a number n can be factored as follows:


where ω(n) is the number of distinct prime factors of n. Then the number of divisors of n is given by:


Fortunately, I'm no longer a math major and so I don't have to prove that this is true. For my purposes it's enough to just try it out and see if it seems reasonable. So, we can say that 12 = 22 x 3. According to the formula then the number of divisors should be (2 + 1) x (1 + 1) = 6. Listing the divisors we find the following set: {1, 2, 3, 4, 6, 12}. Good enough for me.

To solve this Project Euler problem I need to be able to factor an arbitrary number. The simplest thing is to try all the primes up to the square root of the target number and see if they divide into it evenly. As before I need a sequence of primes. Here's the code:

let primes =
    let rec sieve n table =
        let rec update n p table =
            let n = n+p
            match Map.tryFind n table with
            | None              -> Map.add n p table
            | Some(prime)       -> update n p table
        seq {
            match Map.tryFind n table with
            | None          ->
                yield n
                yield! sieve (n+1L) (table |> Map.add (n*n) n)
            | Some(prime)   ->
                yield! sieve (n+1L) (Map.remove n table |> update n prime)
        }
    sieve 2L Map.empty

I think this will be the last time I display this code. From now on just assume that when needed I can generate a list of primes with a lazy sequence.

I also need a few helper functions. Here's an integer square root:

let root (n:int64) =
    int64 (sqrt (float n))

And an integer power function (tail recursive just for fun):

let power p e  =
    let rec inner p e acc =
        if e = 0L then
            acc
        else
            inner p (e-1L) p*acc
    inner p e 1L

Finally here's a function to compute the number of times a prime divides into a number:

let exponent p n =
    let rec inner p n acc =
        if n%p = 0L then
            inner p (n/p) (acc+1L)
        else
            acc
    inner p n 0L

The next step is to compute a sequence which consists of the prime factors of a number along with each factor's exponent. Here's what I came up with:

let rec factors n =
    seq {
        if n > 1L then
            let divisor =
                primes
                    |> Seq.takeWhile (fun k -> k <= root n)
                    |> Seq.tryFind (fun k -> n%k = 0L)
            match divisor with
            | None      -> yield (n, 1L)
            | Some(k)   ->
                let e = exponent k n
                yield (k, e)
                yield! factors (n/power k e)
    }

There's some inefficiency here in that each time we recurse we start the sequence of primes over again. I'm not losing any sleep over it...

Given all this we can now compute the number of divisors of a number:

let divisors n =
    factors n
        |> Seq.map (fun k -> snd k)
        |> Seq.fold (fun acc e -> acc*(e+1L)) 1L

The final step is to find the first entry in the triangular number sequence which has more than 500 divisors.

triangles
    |> Seq.tryFind (fun n -> divisors n > 500L)
    |> printfn "%A"

It was nice to revisit this solution while putting together this post. My original solution was a bit hard to read. I think it is much clearer now.

Thursday, April 08, 2010

The next book I want to classify is "Editing Early Music" by John Caldwell (ISBN 0-19-816544-7). The publisher, Oxford University Press, claims that it should be filed at 781.2 Elements of Music but this doesn't seem right to me. I'm not sure which edition of the Dewey Classification system they are using, perhaps this number made more sense before the great revision. Looking at the revised 780 schedule I found 780.149 Editing Music. This seems to be what I'm looking for. There is also a notation under 780.148 Musical notation, abbreviations, symbols that says "Class transcription from one form of notation to another in 780.149". This again indicates that I'm on the right track (this book is all about transforming early music notation into a form that can be understood by a modern musician).

At this point I could easily say I'm done but the Dewey system does allow further aspects or facets of the work to be captured. In this case I think you could add 780.902 500 - 1449 to the base number to indicate that the book is relevant to early music. The way you do this is still a little unclear to me. I believe that you add a facet indicator (which can be either 0 or 1) to the base number and then append the number after the decimal place in the facet. This would yield either 780.1490902 or 780.1491902. I think this would be fine in a large library with a ton of books on the same subject. For my library (with maybe 50 books and a couple hundred scores) I think it makes more sense to use the shorter classification number and then sort all the books in that class by Cutter number.

Update: I now believe that 0 is used to indicate a standard subdivision and 1 indicates a facet for a built number. So, in the example above, 780.1491902 would be the correct class number. Please correct me if I'm wrong.

Friday, April 02, 2010

What is the Dewey classification number for "The Cambridge Companion to the recorder" edited by John Mansfield Thomson (ISBN 0-521-35816-7)? According to the publisher it is 788.36 from 788.36 Recorders. At the Cambridge website they list the call number like this:
Dewey number: 788.3/6
Dewey version: 20

Knowing that they are using the revised 780 schedule means I can use their number directly without having to calculate it myself. Although, in this case I did, because working backwards from a known good value helps me to understand the classification process.
Problem 11 of the Project Euler:

What is the greatest product of four numbers on the same straight line in the 20 by 20 grid?

The problem description lays out an array of two digit integers where any group of 4 numbers either horizontally or vertically or diagonally must be evaluated to find the maximum product. My algorithms kept banging into the edges of the array (which caused ugly code) until I realised that I could embed their array inside a larger one and then just look at a subset of it. The four numbers I would look at in each step could include the padding and, if the pad number was zero, the maximum product would not be affected. I'd have to examine a few more products than if I handled the edges correctly but that's just a minor inefficiency.

With that in mind I defined the array as follows. The actual data starts at (3,3):

let data =
    [|
      [|00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00|];
      [|00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00|];
      [|00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00|];
      [|00; 00; 00; 08; 02; 22; 97; 38; 15; 00; 40; 00; 75; 04; 05; 07; 78; 52; 12; 50; 77; 91; 08; 00; 00; 00|];
      [|00; 00; 00; 49; 49; 99; 40; 17; 81; 18; 57; 60; 87; 17; 40; 98; 43; 69; 48; 04; 56; 62; 00; 00; 00; 00|];
      [|00; 00; 00; 81; 49; 31; 73; 55; 79; 14; 29; 93; 71; 40; 67; 53; 88; 30; 03; 49; 13; 36; 65; 00; 00; 00|];
      [|00; 00; 00; 52; 70; 95; 23; 04; 60; 11; 42; 69; 24; 68; 56; 01; 32; 56; 71; 37; 02; 36; 91; 00; 00; 00|];
      [|00; 00; 00; 22; 31; 16; 71; 51; 67; 63; 89; 41; 92; 36; 54; 22; 40; 40; 28; 66; 33; 13; 80; 00; 00; 00|];
      [|00; 00; 00; 24; 47; 32; 60; 99; 03; 45; 02; 44; 75; 33; 53; 78; 36; 84; 20; 35; 17; 12; 50; 00; 00; 00|];
      [|00; 00; 00; 32; 98; 81; 28; 64; 23; 67; 10; 26; 38; 40; 67; 59; 54; 70; 66; 18; 38; 64; 70; 00; 00; 00|];
      [|00; 00; 00; 67; 26; 20; 68; 02; 62; 12; 20; 95; 63; 94; 39; 63; 08; 40; 91; 66; 49; 94; 21; 00; 00; 00|];
      [|00; 00; 00; 24; 55; 58; 05; 66; 73; 99; 26; 97; 17; 78; 78; 96; 83; 14; 88; 34; 89; 63; 72; 00; 00; 00|];
      [|00; 00; 00; 21; 36; 23; 09; 75; 00; 76; 44; 20; 45; 35; 14; 00; 61; 33; 97; 34; 31; 33; 95; 00; 00; 00|];
      [|00; 00; 00; 78; 17; 53; 28; 22; 75; 31; 67; 15; 94; 03; 80; 04; 62; 16; 14; 09; 53; 56; 92; 00; 00; 00|];
      [|00; 00; 00; 16; 39; 05; 42; 96; 35; 31; 47; 55; 58; 88; 24; 00; 17; 54; 24; 36; 29; 85; 57; 00; 00; 00|];
      [|00; 00; 00; 86; 56; 00; 48; 35; 71; 89; 07; 05; 44; 44; 37; 44; 60; 21; 58; 51; 54; 17; 58; 00; 00; 00|];
      [|00; 00; 00; 19; 80; 81; 68; 05; 94; 47; 69; 28; 73; 92; 13; 86; 52; 17; 77; 04; 89; 55; 40; 00; 00; 00|];
      [|00; 00; 00; 04; 52; 08; 83; 97; 35; 99; 16; 07; 97; 57; 32; 16; 26; 26; 79; 33; 27; 98; 66; 00; 00; 00|];
      [|00; 00; 00; 88; 36; 68; 87; 57; 62; 20; 72; 03; 46; 33; 67; 46; 55; 12; 32; 63; 93; 53; 69; 00; 00; 00|];
      [|00; 00; 00; 04; 42; 16; 73; 38; 25; 39; 11; 24; 94; 72; 18; 08; 46; 29; 32; 40; 62; 76; 36; 00; 00; 00|];
      [|00; 00; 00; 20; 69; 36; 41; 72; 30; 23; 88; 34; 62; 99; 69; 82; 67; 59; 85; 74; 04; 36; 16; 00; 00; 00|];
      [|00; 00; 00; 20; 73; 35; 29; 78; 31; 90; 01; 74; 31; 49; 71; 48; 86; 81; 16; 23; 57; 05; 54; 00; 00; 00|];
      [|00; 00; 00; 01; 70; 54; 71; 83; 51; 54; 69; 16; 92; 33; 48; 61; 43; 52; 01; 89; 19; 67; 48; 00; 00; 00|];
      [|00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00|];
      [|00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00|];
      [|00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00; 00|];
    |]

Now that I have the array data in place I can create a sequence consisting of the products of four numbers in various directions. I chose to look at the four numbers to the left of the current number, down diagonally to the left, straight down and down diagonally to the right. See the coloured numbers in the above array. That's all that's needed to get every possibly four digit product. I created a sequence from the array data that looked like this:

let products =
    seq {
        for i in 3..22 do
            for j in 3..22 do
                yield (data.[i].[j] * data.[i-1].[j  ] * data.[i-2].[j  ] * data.[i-3].[j  ])
                yield (data.[i].[j] * data.[i-1].[j+1] * data.[i-2].[j+2] * data.[i-3].[j+3])
                yield (data.[i].[j] * data.[i  ].[j+1] * data.[i  ].[j+2] * data.[i  ].[j+3])
                yield (data.[i].[j] * data.[i+1].[j+1] * data.[i+2].[j+2] * data.[i+3].[j+3])
    }

A simple chain of operations finds the maximum product:

products |> Seq.max |> printfn "maximum = %A"