Closed ruiAzevedo19 closed 2 months ago
TODO merge https://github.com/symflower/eval-dev-quality/pull/248 after this
We have a case where models can achieve different coverage depending on what code they produce
Gemini Flash 1.5
package com.eval;
class IsSorted {
static boolean isSorted(int[] a) {
int i = 0;
for (; i < a.length - 1 && a[i] <= a[i + 1]; i++) {
}
return i == a.length - 1;
}
}
GPT-4o
package com.eval;
class IsSorted {
static boolean isSorted(int[] a) {
int i = 0;
while (i < a.length - 1 && a[i] <= a[i + 1]) {
i++;
}
return i == a.length - 1;
}
}
OR
GPT-4o
package balancedBrackets
func hasBalancedBrackets(charArray string) bool {
brackets := 0
for _, ch := range charArray {
if ch == '[' {
brackets++
} else if ch == ']' {
brackets--
} else {
return false // Non-bracket characters.
}
if brackets < 0 { // Closing bracket before opening bracket.
return false
}
}
return brackets == 0
}
Sonnet 3.5
package balancedBrackets
func hasBalancedBrackets(charArray string) bool {
brackets := 0
for _, ch := range charArray {
switch ch {
case '[':
brackets++
case ']':
brackets--
default:
return false // Non-bracket characters.
}
if brackets < 0 { // Closing bracket before opening bracket.
return false
}
}
return brackets == 0
}
Part of #201